aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core
diff options
context:
space:
mode:
Diffstat (limited to 'arm_compute/core')
-rw-r--r--arm_compute/core/AccessWindowAutoPadding.h85
-rw-r--r--arm_compute/core/AccessWindowStatic.h101
-rw-r--r--arm_compute/core/AccessWindowTranspose.h48
-rw-r--r--arm_compute/core/CL/CLCompileContext.h46
-rw-r--r--arm_compute/core/CL/CLCoreRuntimeContext.h75
-rw-r--r--arm_compute/core/CL/CLDevice.h39
-rw-r--r--arm_compute/core/CL/CLHelpers.h98
-rw-r--r--arm_compute/core/CL/CLKernelLibrary.h16
-rw-r--r--arm_compute/core/CL/CLKernels.h165
-rw-r--r--arm_compute/core/CL/CLTypes.h22
-rw-r--r--arm_compute/core/CL/CLValidate.h61
-rw-r--r--arm_compute/core/CL/ICLArray.h17
-rw-r--r--arm_compute/core/CL/ICLDistribution1D.h102
-rw-r--r--arm_compute/core/CL/ICLGEMMKernelConfiguration.h68
-rw-r--r--arm_compute/core/CL/ICLHOG.h113
-rw-r--r--arm_compute/core/CL/ICLKernel.h387
-rw-r--r--arm_compute/core/CL/ICLLut.h94
-rw-r--r--arm_compute/core/CL/ICLMultiHOG.h56
-rw-r--r--arm_compute/core/CL/ICLMultiImage.h59
-rw-r--r--arm_compute/core/CL/ICLSimple2DKernel.h41
-rw-r--r--arm_compute/core/CL/ICLSimple3DKernel.h43
-rw-r--r--arm_compute/core/CL/ICLSimpleKernel.h66
-rw-r--r--arm_compute/core/CL/ICLTensor.h11
-rw-r--r--arm_compute/core/CL/OpenCL.h43
-rw-r--r--arm_compute/core/CL/gemm/CLGEMMHelpers.h53
-rw-r--r--arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfiguration.h65
-rw-r--r--arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationBifrost.h56
-rw-r--r--arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationMidgard.h51
-rw-r--r--arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationValhall.h53
-rw-r--r--arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfiguration.h63
-rw-r--r--arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.h56
-rw-r--r--arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationValhall.h53
-rw-r--r--arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h63
-rw-r--r--arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.h59
-rw-r--r--arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.h53
-rw-r--r--arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h79
-rw-r--r--arm_compute/core/CL/kernels/CLAccumulateKernel.h114
-rw-r--r--arm_compute/core/CL/kernels/CLActivationLayerKernel.h89
-rw-r--r--arm_compute/core/CL/kernels/CLArgMinMaxLayerKernel.h106
-rw-r--r--arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h95
-rw-r--r--arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h120
-rw-r--r--arm_compute/core/CL/kernels/CLBatchToSpaceLayerKernel.h111
-rw-r--r--arm_compute/core/CL/kernels/CLBitwiseAndKernel.h76
-rw-r--r--arm_compute/core/CL/kernels/CLBitwiseNotKernel.h56
-rw-r--r--arm_compute/core/CL/kernels/CLBitwiseOrKernel.h76
-rw-r--r--arm_compute/core/CL/kernels/CLBitwiseXorKernel.h76
-rw-r--r--arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h99
-rw-r--r--arm_compute/core/CL/kernels/CLBox3x3Kernel.h59
-rw-r--r--arm_compute/core/CL/kernels/CLCannyEdgeKernel.h187
-rw-r--r--arm_compute/core/CL/kernels/CLChannelCombineKernel.h102
-rw-r--r--arm_compute/core/CL/kernels/CLChannelExtractKernel.h95
-rw-r--r--arm_compute/core/CL/kernels/CLChannelShuffleLayerKernel.h82
-rw-r--r--arm_compute/core/CL/kernels/CLCol2ImKernel.h106
-rw-r--r--arm_compute/core/CL/kernels/CLColorConvertKernel.h121
-rw-r--r--arm_compute/core/CL/kernels/CLComparisonKernel.h89
-rw-r--r--arm_compute/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h90
-rw-r--r--arm_compute/core/CL/kernels/CLConvolutionKernel.h224
-rw-r--r--arm_compute/core/CL/kernels/CLCopyKernel.h86
-rw-r--r--arm_compute/core/CL/kernels/CLCropKernel.h103
-rw-r--r--arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h86
-rw-r--r--arm_compute/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h106
-rw-r--r--arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h95
-rw-r--r--arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h91
-rw-r--r--arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h84
-rw-r--r--arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h114
-rw-r--r--arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h114
-rw-r--r--arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h131
-rw-r--r--arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h85
-rw-r--r--arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h79
-rw-r--r--arm_compute/core/CL/kernels/CLDerivativeKernel.h83
-rw-r--r--arm_compute/core/CL/kernels/CLDilateKernel.h59
-rw-r--r--arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h125
-rw-r--r--arm_compute/core/CL/kernels/CLElementWiseUnaryLayerKernel.h68
-rw-r--r--arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h224
-rw-r--r--arm_compute/core/CL/kernels/CLErodeKernel.h59
-rw-r--r--arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h89
-rw-r--r--arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h97
-rw-r--r--arm_compute/core/CL/kernels/CLFFTScaleKernel.h86
-rw-r--r--arm_compute/core/CL/kernels/CLFastCornersKernel.h133
-rw-r--r--arm_compute/core/CL/kernels/CLFillBorderKernel.h86
-rw-r--r--arm_compute/core/CL/kernels/CLFlattenLayerKernel.h83
-rw-r--r--arm_compute/core/CL/kernels/CLFloorKernel.h81
-rw-r--r--arm_compute/core/CL/kernels/CLFuseBatchNormalizationKernel.h126
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h108
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h123
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h155
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h116
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h135
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h101
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h103
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h108
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h112
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h112
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h176
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h78
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h122
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h127
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h150
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h130
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h84
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h105
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h100
-rw-r--r--arm_compute/core/CL/kernels/CLGatherKernel.h89
-rw-r--r--arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h59
-rw-r--r--arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h83
-rw-r--r--arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h111
-rw-r--r--arm_compute/core/CL/kernels/CLGenerateProposalsLayerKernel.h85
-rw-r--r--arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h122
-rw-r--r--arm_compute/core/CL/kernels/CLHOGDetectorKernel.h96
-rw-r--r--arm_compute/core/CL/kernels/CLHarrisCornersKernel.h100
-rw-r--r--arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h90
-rw-r--r--arm_compute/core/CL/kernels/CLHistogramKernel.h111
-rw-r--r--arm_compute/core/CL/kernels/CLIm2ColKernel.h136
-rw-r--r--arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernel.h90
-rw-r--r--arm_compute/core/CL/kernels/CLIntegralImageKernel.h86
-rw-r--r--arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h100
-rw-r--r--arm_compute/core/CL/kernels/CLLKTrackerKernel.h240
-rw-r--r--arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h85
-rw-r--r--arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h90
-rw-r--r--arm_compute/core/CL/kernels/CLMeanStdDevKernel.h98
-rw-r--r--arm_compute/core/CL/kernels/CLMeanStdDevNormalizationKernel.h90
-rw-r--r--arm_compute/core/CL/kernels/CLMedian3x3Kernel.h59
-rw-r--r--arm_compute/core/CL/kernels/CLMemsetKernel.h85
-rw-r--r--arm_compute/core/CL/kernels/CLMinMaxLayerKernel.h87
-rw-r--r--arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h124
-rw-r--r--arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h77
-rw-r--r--arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h60
-rw-r--r--arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h90
-rw-r--r--arm_compute/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h94
-rw-r--r--arm_compute/core/CL/kernels/CLPadLayerKernel.h97
-rw-r--r--arm_compute/core/CL/kernels/CLPermuteKernel.h90
-rw-r--r--arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h170
-rw-r--r--arm_compute/core/CL/kernels/CLPoolingLayerKernel.h96
-rw-r--r--arm_compute/core/CL/kernels/CLPriorBoxLayerKernel.h99
-rw-r--r--arm_compute/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h88
-rw-r--r--arm_compute/core/CL/kernels/CLQuantizationLayerKernel.h86
-rw-r--r--arm_compute/core/CL/kernels/CLROIAlignLayerKernel.h110
-rw-r--r--arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h92
-rw-r--r--arm_compute/core/CL/kernels/CLRangeKernel.h92
-rw-r--r--arm_compute/core/CL/kernels/CLReductionOperationKernel.h99
-rw-r--r--arm_compute/core/CL/kernels/CLRemapKernel.h81
-rw-r--r--arm_compute/core/CL/kernels/CLReorgLayerKernel.h90
-rw-r--r--arm_compute/core/CL/kernels/CLReshapeLayerKernel.h81
-rw-r--r--arm_compute/core/CL/kernels/CLReverseKernel.h84
-rw-r--r--arm_compute/core/CL/kernels/CLScaleKernel.h98
-rw-r--r--arm_compute/core/CL/kernels/CLScharr3x3Kernel.h97
-rw-r--r--arm_compute/core/CL/kernels/CLSelectKernel.h94
-rw-r--r--arm_compute/core/CL/kernels/CLSobel3x3Kernel.h83
-rw-r--r--arm_compute/core/CL/kernels/CLSobel5x5Kernel.h139
-rw-r--r--arm_compute/core/CL/kernels/CLSobel7x7Kernel.h139
-rw-r--r--arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h241
-rw-r--r--arm_compute/core/CL/kernels/CLSpaceToBatchLayerKernel.h121
-rw-r--r--arm_compute/core/CL/kernels/CLSpaceToDepthLayerKernel.h84
-rw-r--r--arm_compute/core/CL/kernels/CLStackLayerKernel.h101
-rw-r--r--arm_compute/core/CL/kernels/CLStridedSliceKernel.h115
-rw-r--r--arm_compute/core/CL/kernels/CLTableLookupKernel.h55
-rw-r--r--arm_compute/core/CL/kernels/CLThresholdKernel.h69
-rw-r--r--arm_compute/core/CL/kernels/CLTileKernel.h88
-rw-r--r--arm_compute/core/CL/kernels/CLTransposeKernel.h64
-rw-r--r--arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h89
-rw-r--r--arm_compute/core/CL/kernels/CLWarpAffineKernel.h62
-rw-r--r--arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h59
-rw-r--r--arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h121
-rw-r--r--arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h87
-rw-r--r--arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h95
-rw-r--r--arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h89
-rw-r--r--arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h115
-rw-r--r--arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h121
-rw-r--r--arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h127
-rw-r--r--arm_compute/core/CL/kernels/CLYOLOLayerKernel.h98
-rw-r--r--arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h105
-rw-r--r--arm_compute/core/CPP/CPPKernels.h5
-rw-r--r--arm_compute/core/CPP/CPPTypes.h261
-rw-r--r--arm_compute/core/CPP/ICPPKernel.h41
-rw-r--r--arm_compute/core/CPP/ICPPSimpleKernel.h76
-rw-r--r--arm_compute/core/CPP/Validate.h117
-rw-r--r--arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h26
-rw-r--r--arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h80
-rw-r--r--arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h75
-rw-r--r--arm_compute/core/CPP/kernels/CPPNonMaximumSuppressionKernel.h20
-rw-r--r--arm_compute/core/CPP/kernels/CPPPermuteKernel.h10
-rw-r--r--arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h73
-rw-r--r--arm_compute/core/CPP/kernels/CPPTopKVKernel.h9
-rw-r--r--arm_compute/core/CPP/kernels/CPPUpsampleKernel.h6
-rw-r--r--arm_compute/core/Coordinates.h7
-rw-r--r--arm_compute/core/CoreTypes.h352
-rw-r--r--arm_compute/core/Dimensions.h44
-rw-r--r--arm_compute/core/Error.h145
-rw-r--r--arm_compute/core/GLES_COMPUTE/GCCoreRuntimeContext.h63
-rw-r--r--arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h271
-rw-r--r--arm_compute/core/GLES_COMPUTE/GCKernels.h54
-rw-r--r--arm_compute/core/GLES_COMPUTE/IGCKernel.h165
-rw-r--r--arm_compute/core/GLES_COMPUTE/IGCSimple2DKernel.h41
-rw-r--r--arm_compute/core/GLES_COMPUTE/IGCSimple3DKernel.h43
-rw-r--r--arm_compute/core/GLES_COMPUTE/IGCSimpleKernel.h66
-rw-r--r--arm_compute/core/GLES_COMPUTE/IGCTensor.h113
-rw-r--r--arm_compute/core/GLES_COMPUTE/OpenGLES.h165
-rw-r--r--arm_compute/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.h71
-rw-r--r--arm_compute/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.h74
-rw-r--r--arm_compute/core/GLES_COMPUTE/kernels/GCArithmeticAdditionKernel.h82
-rw-r--r--arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h98
-rw-r--r--arm_compute/core/GLES_COMPUTE/kernels/GCCol2ImKernel.h92
-rw-r--r--arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.h74
-rw-r--r--arm_compute/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.h77
-rw-r--r--arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h92
-rw-r--r--arm_compute/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.h79
-rw-r--r--arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h77
-rw-r--r--arm_compute/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.h80
-rw-r--r--arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.h64
-rw-r--r--arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.h70
-rw-r--r--arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h92
-rw-r--r--arm_compute/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.h67
-rw-r--r--arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h128
-rw-r--r--arm_compute/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.h72
-rw-r--r--arm_compute/core/GLES_COMPUTE/kernels/GCNormalizePlanarYUVLayerKernel.h84
-rw-r--r--arm_compute/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.h70
-rw-r--r--arm_compute/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.h85
-rw-r--r--arm_compute/core/GLES_COMPUTE/kernels/GCScaleKernel.h54
-rw-r--r--arm_compute/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h109
-rw-r--r--arm_compute/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.h84
-rw-r--r--arm_compute/core/GLES_COMPUTE/kernels/GCTransposeKernel.h52
-rw-r--r--arm_compute/core/GLES_COMPUTE/kernels/GCWeightsReshapeKernel.h90
-rw-r--r--arm_compute/core/GPUTarget.h61
-rw-r--r--arm_compute/core/HOGInfo.h152
-rw-r--r--arm_compute/core/Helpers.h682
-rw-r--r--arm_compute/core/Helpers.inl268
-rw-r--r--arm_compute/core/IAccessWindow.h17
-rw-r--r--arm_compute/core/IArray.h26
-rw-r--r--arm_compute/core/IDevice.h3
-rw-r--r--arm_compute/core/IDistribution.h59
-rw-r--r--arm_compute/core/IDistribution1D.h84
-rw-r--r--arm_compute/core/IHOG.h54
-rw-r--r--arm_compute/core/IKernel.h9
-rw-r--r--arm_compute/core/ILut.h69
-rw-r--r--arm_compute/core/IMultiHOG.h61
-rw-r--r--arm_compute/core/IMultiImage.h60
-rw-r--r--arm_compute/core/IPyramid.h56
-rw-r--r--arm_compute/core/ITensor.h10
-rw-r--r--arm_compute/core/ITensorInfo.h103
-rw-r--r--arm_compute/core/ITensorPack.h116
-rw-r--r--arm_compute/core/KernelDescriptors.h188
-rw-r--r--arm_compute/core/Log.h18
-rw-r--r--arm_compute/core/MultiImageInfo.h66
-rw-r--r--arm_compute/core/NEON/INESimpleKernel.h34
-rw-r--r--arm_compute/core/NEON/NEAsymm.h760
-rw-r--r--arm_compute/core/NEON/NEAsymm.inl92
-rw-r--r--arm_compute/core/NEON/NEColorConvertHelper.inl1045
-rw-r--r--arm_compute/core/NEON/NEFixedPoint.inl43
-rw-r--r--arm_compute/core/NEON/NEKernels.h155
-rw-r--r--arm_compute/core/NEON/NEMath.h307
-rw-r--r--arm_compute/core/NEON/NEMath.inl529
-rw-r--r--arm_compute/core/NEON/NESymm.h256
-rw-r--r--arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h86
-rw-r--r--arm_compute/core/NEON/kernels/NEAccumulateKernel.h139
-rw-r--r--arm_compute/core/NEON/kernels/NEActivationLayerKernel.h120
-rw-r--r--arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h108
-rw-r--r--arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h109
-rw-r--r--arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h91
-rw-r--r--arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h152
-rw-r--r--arm_compute/core/NEON/kernels/NEBatchToSpaceLayerKernel.h101
-rw-r--r--arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h72
-rw-r--r--arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h70
-rw-r--r--arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h72
-rw-r--r--arm_compute/core/NEON/kernels/NEBitwiseXorKernel.h72
-rw-r--r--arm_compute/core/NEON/kernels/NEBoundingBoxTransformKernel.h95
-rw-r--r--arm_compute/core/NEON/kernels/NEBox3x3Kernel.h71
-rw-r--r--arm_compute/core/NEON/kernels/NECannyEdgeKernel.h189
-rw-r--r--arm_compute/core/NEON/kernels/NEChannelCombineKernel.h129
-rw-r--r--arm_compute/core/NEON/kernels/NEChannelExtractKernel.h113
-rw-r--r--arm_compute/core/NEON/kernels/NEChannelShuffleLayerKernel.h80
-rw-r--r--arm_compute/core/NEON/kernels/NECol2ImKernel.h115
-rw-r--r--arm_compute/core/NEON/kernels/NEColorConvertKernel.h93
-rw-r--r--arm_compute/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h95
-rw-r--r--arm_compute/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h76
-rw-r--r--arm_compute/core/NEON/kernels/NEConvolutionKernel.h267
-rw-r--r--arm_compute/core/NEON/kernels/NECopyKernel.h78
-rw-r--r--arm_compute/core/NEON/kernels/NECropKernel.h114
-rw-r--r--arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h83
-rw-r--r--arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h91
-rw-r--r--arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h94
-rw-r--r--arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernel.h81
-rw-r--r--arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h96
-rw-r--r--arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h129
-rw-r--r--arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h76
-rw-r--r--arm_compute/core/NEON/kernels/NEDerivativeKernel.h98
-rw-r--r--arm_compute/core/NEON/kernels/NEDilateKernel.h53
-rw-r--r--arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h100
-rw-r--r--arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h102
-rw-r--r--arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h214
-rw-r--r--arm_compute/core/NEON/kernels/NEElementwiseUnaryKernel.h103
-rw-r--r--arm_compute/core/NEON/kernels/NEErodeKernel.h53
-rw-r--r--arm_compute/core/NEON/kernels/NEFFTDigitReverseKernel.h93
-rw-r--r--arm_compute/core/NEON/kernels/NEFFTRadixStageKernel.h103
-rw-r--r--arm_compute/core/NEON/kernels/NEFFTScaleKernel.h84
-rw-r--r--arm_compute/core/NEON/kernels/NEFastCornersKernel.h76
-rw-r--r--arm_compute/core/NEON/kernels/NEFillArrayKernel.h77
-rw-r--r--arm_compute/core/NEON/kernels/NEFillBorderKernel.h82
-rw-r--r--arm_compute/core/NEON/kernels/NEFillInnerBorderKernel.h79
-rw-r--r--arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h81
-rw-r--r--arm_compute/core/NEON/kernels/NEFloorKernel.h60
-rw-r--r--arm_compute/core/NEON/kernels/NEFuseBatchNormalizationKernel.h116
-rw-r--r--arm_compute/core/NEON/kernels/NEGEMMAssemblyBaseKernel.h89
-rw-r--r--arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h91
-rw-r--r--arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h90
-rw-r--r--arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h103
-rw-r--r--arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h137
-rw-r--r--arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h112
-rw-r--r--arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h116
-rw-r--r--arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h119
-rw-r--r--arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h119
-rw-r--r--arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h171
-rw-r--r--arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h75
-rw-r--r--arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h96
-rw-r--r--arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h94
-rw-r--r--arm_compute/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.h99
-rw-r--r--arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h95
-rw-r--r--arm_compute/core/NEON/kernels/NEGatherKernel.h113
-rw-r--r--arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h54
-rw-r--r--arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h81
-rw-r--r--arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h105
-rw-r--r--arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h85
-rw-r--r--arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h149
-rw-r--r--arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h89
-rw-r--r--arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h105
-rw-r--r--arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h85
-rw-r--r--arm_compute/core/NEON/kernels/NEHistogramKernel.h135
-rw-r--r--arm_compute/core/NEON/kernels/NEIm2ColKernel.h139
-rw-r--r--arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h98
-rw-r--r--arm_compute/core/NEON/kernels/NEIntegralImageKernel.h54
-rw-r--r--arm_compute/core/NEON/kernels/NEL2NormalizeLayerKernel.h90
-rw-r--r--arm_compute/core/NEON/kernels/NELKTrackerKernel.h149
-rw-r--r--arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h77
-rw-r--r--arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h101
-rw-r--r--arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h83
-rw-r--r--arm_compute/core/NEON/kernels/NEMeanStdDevNormalizationKernel.h98
-rw-r--r--arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h54
-rw-r--r--arm_compute/core/NEON/kernels/NEMemsetKernel.h71
-rw-r--r--arm_compute/core/NEON/kernels/NEMinMaxLayerKernel.h90
-rw-r--r--arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h171
-rw-r--r--arm_compute/core/NEON/kernels/NENonLinearFilterKernel.h151
-rw-r--r--arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h108
-rw-r--r--arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h110
-rw-r--r--arm_compute/core/NEON/kernels/NEPadLayerKernel.h113
-rw-r--r--arm_compute/core/NEON/kernels/NEPermuteKernel.h102
-rw-r--r--arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h195
-rw-r--r--arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h222
-rw-r--r--arm_compute/core/NEON/kernels/NEPriorBoxLayerKernel.h98
-rw-r--r--arm_compute/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h137
-rw-r--r--arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h102
-rw-r--r--arm_compute/core/NEON/kernels/NEROIAlignLayerKernel.h101
-rw-r--r--arm_compute/core/NEON/kernels/NEROIPoolingLayerKernel.h81
-rw-r--r--arm_compute/core/NEON/kernels/NERangeKernel.h90
-rw-r--r--arm_compute/core/NEON/kernels/NEReductionOperationKernel.h94
-rw-r--r--arm_compute/core/NEON/kernels/NERemapKernel.h83
-rw-r--r--arm_compute/core/NEON/kernels/NEReorgLayerKernel.h83
-rw-r--r--arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h62
-rw-r--r--arm_compute/core/NEON/kernels/NEReverseKernel.h80
-rw-r--r--arm_compute/core/NEON/kernels/NEScaleKernel.h117
-rw-r--r--arm_compute/core/NEON/kernels/NEScharr3x3Kernel.h86
-rw-r--r--arm_compute/core/NEON/kernels/NESelectKernel.h103
-rw-r--r--arm_compute/core/NEON/kernels/NESobel3x3Kernel.h86
-rw-r--r--arm_compute/core/NEON/kernels/NESobel5x5Kernel.h126
-rw-r--r--arm_compute/core/NEON/kernels/NESobel7x7Kernel.h130
-rw-r--r--arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h139
-rw-r--r--arm_compute/core/NEON/kernels/NESpaceToBatchLayerKernel.h111
-rw-r--r--arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernel.h81
-rw-r--r--arm_compute/core/NEON/kernels/NEStackLayerKernel.h93
-rw-r--r--arm_compute/core/NEON/kernels/NEStridedSliceKernel.h104
-rw-r--r--arm_compute/core/NEON/kernels/NETableLookupKernel.h80
-rw-r--r--arm_compute/core/NEON/kernels/NEThresholdKernel.h85
-rw-r--r--arm_compute/core/NEON/kernels/NETileKernel.h76
-rw-r--r--arm_compute/core/NEON/kernels/NETransposeKernel.h90
-rw-r--r--arm_compute/core/NEON/kernels/NEUpsampleLayerKernel.h100
-rw-r--r--arm_compute/core/NEON/kernels/NEWarpKernel.h129
-rw-r--r--arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h109
-rw-r--r--arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h85
-rw-r--r--arm_compute/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h596
-rw-r--r--arm_compute/core/NEON/kernels/NEYOLOLayerKernel.h106
-rw-r--r--arm_compute/core/NEON/kernels/arm_gemm/ndrange.hpp185
-rw-r--r--arm_compute/core/NEON/kernels/assembly/Helpers.h122
-rw-r--r--arm_compute/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h108
-rw-r--r--arm_compute/core/NEON/kernels/assembly/NEDepthwiseConvolutionAssemblyKernelWrapper.h88
-rw-r--r--arm_compute/core/NEON/kernels/assembly/NEGEMMAssemblyWrapperKernel.h121
-rw-r--r--arm_compute/core/NEON/kernels/assembly/arm_gemm.hpp176
-rw-r--r--arm_compute/core/NEON/kernels/assembly/arm_gemm_compute_iface.hpp121
-rw-r--r--arm_compute/core/NEON/kernels/assembly/arm_gemm_local.hpp37
-rw-r--r--arm_compute/core/NEON/kernels/assembly/gemm_common.hpp201
-rw-r--r--arm_compute/core/NEON/kernels/convolution/common/activation.hpp37
-rw-r--r--arm_compute/core/NEON/kernels/convolution/common/alloc.hpp31
-rw-r--r--arm_compute/core/NEON/kernels/convolution/common/arm.hpp39
-rw-r--r--arm_compute/core/NEON/kernels/convolution/common/convolution.hpp29
-rw-r--r--arm_compute/core/NEON/kernels/convolution/common/padding.hpp91
-rw-r--r--arm_compute/core/NEON/kernels/convolution/common/perf.h32
-rw-r--r--arm_compute/core/NEON/kernels/convolution/common/qasymm8.hpp54
-rw-r--r--arm_compute/core/NEON/kernels/convolution/common/qsymm8.hpp76
-rw-r--r--arm_compute/core/NEON/kernels/convolution/common/shims.hpp749
-rw-r--r--arm_compute/core/NEON/kernels/convolution/common/tensor.hpp178
-rw-r--r--arm_compute/core/NEON/kernels/convolution/common/tensor_utils.hpp46
-rw-r--r--arm_compute/core/NEON/kernels/convolution/common/utils.hpp60
-rw-r--r--arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp551
-rw-r--r--arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_dilated.hpp156
-rw-r--r--arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_quantized.hpp291
-rw-r--r--arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_quantized_dilated.hpp88
-rw-r--r--arm_compute/core/NEON/kernels/convolution/winograd/winograd.hpp621
-rw-r--r--arm_compute/core/NEON/kernels/convolution/winograd/winograd_layer.hpp207
-rw-r--r--arm_compute/core/NEON/kernels/detail/NEActivationFunctionDetail.h247
-rw-r--r--arm_compute/core/NEON/kernels/detail/NEDirectConvolution3x3.h170
-rw-r--r--arm_compute/core/NEON/kernels/detail/NEDirectConvolutionDetail.h965
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/abs.h75
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/add.h201
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/and.h60
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/bsl.h64
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/ceq.h64
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/cge.h64
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/cgt.h64
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/clt.h64
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/combine.h53
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/cvt.h80
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/div.h73
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/dup_n.h66
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/eor.h56
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/exp.h56
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/ext.h62
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/gethigh.h53
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/getlane.h223
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/getlow.h53
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h73
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/inv.h62
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/invsqrt.h61
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/load.h73
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/log.h56
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/max.h64
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/min.h64
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/mla.h71
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/movl.h49
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/movn.h62
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/mul.h64
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/neg.h58
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/not.h64
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/orr.h60
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/pmax.h53
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/pmin.h53
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/pow.h48
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/qmovun.h46
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/reinterpret.h49
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/rev64.h64
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/round.h56
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/setlane.h208
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/sin.h57
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/store.h70
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/sub.h103
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/tanh.h47
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/tbl.h45
-rw-r--r--arm_compute/core/NEON/wrapper/scalar/add.h62
-rw-r--r--arm_compute/core/NEON/wrapper/scalar/scalar.h30
-rw-r--r--arm_compute/core/NEON/wrapper/scalar/sub.h62
-rw-r--r--arm_compute/core/NEON/wrapper/traits.h140
-rw-r--r--arm_compute/core/NEON/wrapper/wrapper.h34
-rw-r--r--arm_compute/core/PixelValue.h81
-rw-r--r--arm_compute/core/PyramidInfo.h131
-rw-r--r--arm_compute/core/QuantizationInfo.h145
-rw-r--r--arm_compute/core/Rounding.h4
-rw-r--r--arm_compute/core/Size2D.h7
-rw-r--r--arm_compute/core/Size3D.h97
-rw-r--r--arm_compute/core/Steps.h7
-rw-r--r--arm_compute/core/Strides.h6
-rw-r--r--arm_compute/core/SubTensorInfo.h56
-rw-r--r--arm_compute/core/TensorInfo.h138
-rw-r--r--arm_compute/core/TensorShape.h50
-rw-r--r--arm_compute/core/TracePoint.h134
-rw-r--r--arm_compute/core/Types.h1299
-rw-r--r--arm_compute/core/Utils.h1239
-rw-r--r--arm_compute/core/Validate.h581
-rw-r--r--arm_compute/core/Version.h15
-rw-r--r--arm_compute/core/Window.h59
-rw-r--r--arm_compute/core/Window.inl87
-rw-r--r--arm_compute/core/WindowIterator.h37
-rw-r--r--arm_compute/core/experimental/Types.h126
-rw-r--r--arm_compute/core/utils/ActivationFunctionUtils.h (renamed from arm_compute/core/NEON/wrapper/intrinsics/qmov.h)36
-rw-r--r--arm_compute/core/utils/DataLayoutUtils.h (renamed from arm_compute/core/NEON/INEKernel.h)20
-rw-r--r--arm_compute/core/utils/DataTypeUtils.h549
-rw-r--r--arm_compute/core/utils/FormatUtils.h344
-rw-r--r--arm_compute/core/utils/InterpolationPolicyUtils.h (renamed from arm_compute/core/NEON/NEFixedPoint.h)22
-rw-r--r--arm_compute/core/utils/StringUtils.h (renamed from arm_compute/core/GLES_COMPUTE/GCHelpers.h)53
-rw-r--r--arm_compute/core/utils/helpers/AdjustVecSize.h (renamed from arm_compute/core/utils/misc/CRTP.h)44
-rw-r--r--arm_compute/core/utils/helpers/bit_ops.h52
-rw-r--r--arm_compute/core/utils/helpers/fft.h55
-rw-r--r--arm_compute/core/utils/helpers/float_ops.h116
-rw-r--r--arm_compute/core/utils/helpers/tensor_info.h57
-rw-r--r--arm_compute/core/utils/helpers/tensor_transform.h35
-rw-r--r--arm_compute/core/utils/io/FileHandler.h2
-rw-r--r--arm_compute/core/utils/logging/FilePrinter.h5
-rw-r--r--arm_compute/core/utils/logging/Helpers.h8
-rw-r--r--arm_compute/core/utils/logging/IPrinter.h5
-rw-r--r--arm_compute/core/utils/logging/LogMsgDecorators.h7
-rw-r--r--arm_compute/core/utils/logging/Logger.h8
-rw-r--r--arm_compute/core/utils/logging/LoggerRegistry.h15
-rw-r--r--arm_compute/core/utils/logging/Macros.h45
-rw-r--r--arm_compute/core/utils/logging/Printers.h2
-rw-r--r--arm_compute/core/utils/logging/StdPrinter.h2
-rw-r--r--arm_compute/core/utils/logging/Types.h8
-rw-r--r--arm_compute/core/utils/math/Math.h71
-rw-r--r--arm_compute/core/utils/math/SafeOps.h37
-rw-r--r--arm_compute/core/utils/misc/Cast.h119
-rw-r--r--arm_compute/core/utils/misc/ICloneable.h48
-rw-r--r--arm_compute/core/utils/misc/InfoHelpers.h62
-rw-r--r--arm_compute/core/utils/misc/Iterable.h108
-rw-r--r--arm_compute/core/utils/misc/MMappedFile.h6
-rw-r--r--arm_compute/core/utils/misc/Macros.h11
-rw-r--r--arm_compute/core/utils/misc/Random.h98
-rw-r--r--arm_compute/core/utils/misc/Requires.h51
-rw-r--r--arm_compute/core/utils/misc/Rounding.h205
-rw-r--r--arm_compute/core/utils/misc/SaturateCast.h218
-rw-r--r--arm_compute/core/utils/misc/ShapeCalculator.h779
-rw-r--r--arm_compute/core/utils/misc/Traits.h4
-rw-r--r--arm_compute/core/utils/misc/Utility.h57
-rw-r--r--arm_compute/core/utils/quantization/AsymmHelpers.h31
517 files changed, 4903 insertions, 51791 deletions
diff --git a/arm_compute/core/AccessWindowAutoPadding.h b/arm_compute/core/AccessWindowAutoPadding.h
deleted file mode 100644
index 8a182c6eb4..0000000000
--- a/arm_compute/core/AccessWindowAutoPadding.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_ACCESS_WINDOW_AUTO_PADDING_H
-#define ARM_COMPUTE_ACCESS_WINDOW_AUTO_PADDING_H
-
-#include "arm_compute/core/Coordinates.h"
-#include "arm_compute/core/IAccessWindow.h"
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class Window;
-class ITensorInfo;
-
-/** Dummy access window.
- *
- * This implementation always uses the auto padding of the tensor info and
- * never updates the window. The valid region is always set to cover the entire
- * tensor.
- *
- * @note This access window is only used during the migration to the new
- * padding system. It will be removed once all kernels have been ported.
- *
- * */
-class AccessWindowAutoPadding : public IAccessWindow
-{
-public:
- /** Default constructor.
- *
- * @param[in,out] info Tensor info of the accessed kernel.
- */
- AccessWindowAutoPadding(ITensorInfo *info);
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- AccessWindowAutoPadding(const AccessWindowAutoPadding &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- AccessWindowAutoPadding &operator=(const AccessWindowAutoPadding &) = delete;
- /** Allow instances of this class to be move constructed */
- AccessWindowAutoPadding(AccessWindowAutoPadding &&) = default;
- /** Allow instances of this class to be moved */
- AccessWindowAutoPadding &operator=(AccessWindowAutoPadding &&) = default;
- /** Default destructor */
- ~AccessWindowAutoPadding() = default;
-
- /** Set the valid region to match the entire tensor. */
- void set_valid_region();
-
- /** Return a valid region that spans across the entire tensor.
- *
- * @return a valid region.
- *
- */
- ValidRegion compute_valid_region() const;
-
- // Inherited methods overridden:
- bool update_window_if_needed(Window &window) const override;
- bool update_padding_if_needed(const Window &window) override;
- ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) const override;
-
-private:
- ITensorInfo *_info;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_ACCESS_WINDOW_AUTO_PADDING_H*/
diff --git a/arm_compute/core/AccessWindowStatic.h b/arm_compute/core/AccessWindowStatic.h
deleted file mode 100644
index e40c188fcd..0000000000
--- a/arm_compute/core/AccessWindowStatic.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_IACCESS_WINDOW_STATIC_H
-#define ARM_COMPUTE_IACCESS_WINDOW_STATIC_H
-
-#include "arm_compute/core/Coordinates.h"
-#include "arm_compute/core/IAccessWindow.h"
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-
-#include <array>
-
-namespace arm_compute
-{
-class Window;
-class ITensorInfo;
-
-/** Implementation of a static rectangular access pattern.
- *
- * In this implementation the access offsets and sizes are not relative to the
- * current element. Instead they are considered to be absolute coordinates
- * within the accessed tensor's shape.
- *
- * */
-class AccessWindowStatic : public IAccessWindow
-{
-public:
- /** Constructor for a static access pattern.
- *
- * @param[in,out] info Tensor info of the accessed kernel.
- * @param[in] start_x Start of the access in X direction.
- * @param[in] start_y Start of the access in Y direction.
- * @param[in] end_x End of the access in X direction.
- * @param[in] end_y End of the access in Y direction.
- */
- AccessWindowStatic(ITensorInfo *info, int start_x, int start_y, int end_x, int end_y);
-
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- AccessWindowStatic(const AccessWindowStatic &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- AccessWindowStatic &operator=(const AccessWindowStatic &) = delete;
- /** Allow instances of this class to be move constructed */
- AccessWindowStatic(AccessWindowStatic &&) = default;
- /** Allow instances of this class to be moved */
- AccessWindowStatic &operator=(AccessWindowStatic &&) = default;
- /** Default destructor */
- ~AccessWindowStatic() = default;
-
- /** Set the valid region based on the static access pattern and valid
- * region of the inputs.
- *
- * @param[in] window Execution window of the kernel.
- * @param[in] input_valid_region Combined valid region of all inputs.
- */
- void set_valid_region(const Window &window, const ValidRegion &input_valid_region);
-
- /** Compute the valid region based on the static access pattern and valid region of the inputs.
- *
- * @param[in] window Execution window of the kernel.
- * @param[in] input_valid_region Combined valid region of all inputs.
- *
- * @return a valid region.
- *
- */
- ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region) const;
-
- // Inherited methods overriden:
- bool update_window_if_needed(Window &window) const override;
- bool update_padding_if_needed(const Window &window) override;
- ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) const override;
-
-private:
- ITensorInfo *_info;
- int _start_x;
- int _start_y;
- int _end_x;
- int _end_y;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_IACCESS_WINDOW_STATIC_H*/
diff --git a/arm_compute/core/AccessWindowTranspose.h b/arm_compute/core/AccessWindowTranspose.h
deleted file mode 100644
index 16105bce7c..0000000000
--- a/arm_compute/core/AccessWindowTranspose.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_IACCESS_WINDOW_TRANSPOSE_H
-#define ARM_COMPUTE_IACCESS_WINDOW_TRANSPOSE_H
-
-#include "arm_compute/core/Coordinates.h"
-#include "arm_compute/core/IAccessWindow.h"
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class Window;
-class ITensorInfo;
-
-/** Implementation of a XY-transpose access pattern. */
-class AccessWindowTranspose : public AccessWindowRectangle
-{
-public:
- using AccessWindowRectangle::AccessWindowRectangle;
- bool update_window_if_needed(Window &window) const override;
- bool update_padding_if_needed(const Window &window) override;
- using AccessWindowRectangle::compute_valid_region;
- ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) const override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_IACCESS_WINDOW_TRANSPOSE_H*/
diff --git a/arm_compute/core/CL/CLCompileContext.h b/arm_compute/core/CL/CLCompileContext.h
index 2b6d8cd2cb..dcd3b45670 100644
--- a/arm_compute/core/CL/CLCompileContext.h
+++ b/arm_compute/core/CL/CLCompileContext.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020 ARM Limited.
+ * Copyright (c) 2020-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -77,6 +77,8 @@ public:
*/
const StringSet &options() const;
+ bool operator==(const CLBuildOptions &other) const;
+
private:
StringSet _build_opts; /**< Build options set */
};
@@ -118,6 +120,14 @@ public:
{
return _name;
}
+ /** Returns program binary data.
+ *
+ * @return Program's binary data.
+ */
+ const std::vector<unsigned char> &binary() const
+ {
+ return _binary;
+ }
/** User-defined conversion to the underlying CL program.
*
* @return The CL program object.
@@ -240,8 +250,12 @@ public:
*
* @return The created kernel.
*/
- Kernel create_kernel(const std::string &kernel_name, const std::string &program_name, const std::string &program_source,
- const std::string &kernel_path, const StringSet &build_options_set, bool is_binary) const;
+ Kernel create_kernel(const std::string &kernel_name,
+ const std::string &program_name,
+ const std::string &program_source,
+ const std::string &kernel_path,
+ const StringSet &build_options_set,
+ bool is_binary) const;
/** Clear the library's cache of binary programs
*/
@@ -288,6 +302,24 @@ public:
*/
bool int64_base_atomics_supported() const;
+ /* Returns true if the workgroup batch size modifier parameter is supported on the cl device
+ *
+ * @return true if the workgroup batch size modifier parameter is supported, false otherwise
+ */
+ bool is_wbsm_supported() const;
+
+ /** Return the DDK version. If the DDK version cannot be detected, return -1.
+ *
+ * @return The DDK version.
+ */
+ int32_t get_ddk_version() const;
+
+ /** Return the Gpu target of the associated device
+ *
+ * @return GPUTarget
+ */
+ GPUTarget get_gpu_target() const;
+
private:
/** Load program and its dependencies.
*
@@ -295,7 +327,8 @@ private:
* @param[in] program_source Source of the program.
* @param[in] is_binary Flag to indicate if the program source is binary.
*/
- const Program &load_program(const std::string &program_name, const std::string &program_source, bool is_binary) const;
+ const Program &
+ load_program(const std::string &program_name, const std::string &program_source, bool is_binary) const;
/** Generates the build options given a string of user defined ones
*
@@ -315,10 +348,11 @@ private:
*/
std::string stringify_set(const StringSet &s, const std::string &kernel_path) const;
- cl::Context _context; /**< Underlying CL context. */
- CLDevice _device; /**< Underlying CL device. */
+ cl::Context _context; /**< Underlying CL context. */
+ CLDevice _device; /**< Underlying CL device. */
mutable std::map<std::string, const Program> _programs_map; /**< Map with all already loaded program data. */
mutable std::map<std::string, cl::Program> _built_programs_map; /**< Map with all already built program data. */
+ bool _is_wbsm_supported; /**< Support of worksize batch size modifier support boolean*/
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLCOMPILECONTEXT_H */
diff --git a/arm_compute/core/CL/CLCoreRuntimeContext.h b/arm_compute/core/CL/CLCoreRuntimeContext.h
deleted file mode 100644
index 2b2269dece..0000000000
--- a/arm_compute/core/CL/CLCoreRuntimeContext.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLCORERUNTIME_CONTEXT_H
-#define ARM_COMPUTE_CLCORERUNTIME_CONTEXT_H
-
-#include "arm_compute/core/CL/OpenCL.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class CLKernelLibrary;
-
-/** Core runtime context for OpenCL */
-class CLCoreRuntimeContext final
-{
-public:
- /** Legacy constructor */
- CLCoreRuntimeContext();
-
- /** Constructor */
- CLCoreRuntimeContext(CLKernelLibrary *kernel_lib, cl::Context ctx, cl::CommandQueue queue);
- /** Destructor */
- ~CLCoreRuntimeContext() = default;
- /** Default copy constructor */
- CLCoreRuntimeContext(const CLCoreRuntimeContext &) = default;
- /** Default move constructor */
- CLCoreRuntimeContext(CLCoreRuntimeContext &&) = default;
- /** Default copy assignment */
- CLCoreRuntimeContext &operator=(const CLCoreRuntimeContext &) = default;
- /** Default move assignment operator */
- CLCoreRuntimeContext &operator=(CLCoreRuntimeContext &&) = default;
- /** Kernel Library accessor
- *
- * @return The kernel library instance used by the core context
- */
- CLKernelLibrary *kernel_library() const;
- /** OpenCL context accessor
- *
- * @return The OpenCL context used by the core context
- */
- cl::Context context();
- /** OpenCL command queue accessor
- *
- * @return The OpenCL queue used by the core context
- */
- cl::CommandQueue queue();
-
-private:
- CLKernelLibrary *_kernel_lib{ nullptr };
- cl::Context _ctx{};
- cl::CommandQueue _queue{};
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLCORERUNTIME_CONTEXT_H */
diff --git a/arm_compute/core/CL/CLDevice.h b/arm_compute/core/CL/CLDevice.h
index 812834743d..ded6bb8493 100644
--- a/arm_compute/core/CL/CLDevice.h
+++ b/arm_compute/core/CL/CLDevice.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020 ARM Limited.
+ * Copyright (c) 2020-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -30,6 +30,7 @@
#include "arm_compute/core/IDevice.h"
#include <set>
+#include <sstream>
#include <string>
namespace arm_compute
@@ -43,8 +44,7 @@ class CLDevice : public IDevice
{
public:
/** Default Constructor */
- CLDevice()
- : _device(cl::Device()), _options()
+ CLDevice() : _device(cl::Device()), _options()
{
}
@@ -52,8 +52,7 @@ public:
*
* @param[in] cl_device OpenCL device
*/
- CLDevice(const cl::Device &cl_device)
- : _device(), _options()
+ CLDevice(const cl::Device &cl_device) : _device(), _options()
{
_device = cl_device;
@@ -65,13 +64,13 @@ public:
std::string extensions = _device.getInfo<CL_DEVICE_EXTENSIONS>();
std::istringstream iss(extensions);
- for(std::string s; iss >> s;)
+ for (std::string s; iss >> s;)
{
_options.extensions.insert(s);
}
// SW workaround for G76
- if(_options.gpu_target == GPUTarget::G76)
+ if (_options.gpu_target == GPUTarget::G76)
{
_options.extensions.insert("cl_arm_integer_dot_product_int8");
}
@@ -142,6 +141,32 @@ public:
return _options.extensions.count(extension) != 0;
}
+ /** Returns whether non-uniform workgroup is supported and the build options.
+ *
+ * If the feature is supported, the appropriate build options will be
+ * appended to the specified string.
+ *
+ * @return A tuple (supported, build_options) indicating whether the feature
+ * is supported and the corresponding build options to enable it.
+ */
+ std::tuple<bool, std::string> is_non_uniform_workgroup_supported() const
+ {
+ if (version() == CLVersion::CL30 && get_cl_non_uniform_work_group_supported(_device))
+ {
+ return {true, " -cl-std=CL3.0 "};
+ }
+ else if (version() == CLVersion::CL20)
+ {
+ return {true, " -cl-std=CL2.0 "};
+ }
+ else if (supported("cl_arm_non_uniform_work_group_size"))
+ {
+ return {true, " -cl-arm-non-uniform-work-group-size "};
+ }
+
+ return {false, ""};
+ }
+
private:
cl::Device _device; /**< OpenCL device. */
struct CLDeviceOptions _options; /**< OpenCL device options */
diff --git a/arm_compute/core/CL/CLHelpers.h b/arm_compute/core/CL/CLHelpers.h
index fc3f4d5db0..1a639e47f9 100644
--- a/arm_compute/core/CL/CLHelpers.h
+++ b/arm_compute/core/CL/CLHelpers.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 ARM Limited.
+ * Copyright (c) 2016-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,13 +26,13 @@
#include "arm_compute/core/CL/CLTypes.h"
#include "arm_compute/core/CL/OpenCL.h"
+#include "arm_compute/core/Types.h"
#include <set>
#include <string>
namespace arm_compute
{
-class CLCoreRuntimeContext;
class CLCompileContext;
class CLBuildOptions;
@@ -41,6 +41,9 @@ enum class DataType;
/** Max vector width of an OpenCL vector */
static constexpr unsigned int max_cl_vector_width = 16;
+/** Max number of manual loop unrolling */
+static constexpr int max_manual_loop_unrolling = 128;
+
/** Translates a tensor data type to the appropriate OpenCL type.
*
* @param[in] dt @ref DataType to be translated to OpenCL type.
@@ -97,14 +100,6 @@ std::string get_cl_dot8_acc_type_from_data_type(const DataType &dt);
*/
std::string get_data_size_from_data_type(const DataType &dt);
-/** Translates fixed point tensor data type to the underlying OpenCL type.
- *
- * @param[in] dt @ref DataType to be translated to OpenCL type.
- *
- * @return The string specifying the underlying OpenCL type to be used.
- */
-std::string get_underlying_cl_type_from_data_type(const DataType &dt);
-
/** Helper function to get the GPU target from CL device
*
* @param[in] device A CL device
@@ -129,6 +124,14 @@ CLVersion get_cl_version(const cl::Device &device);
*/
size_t get_cl_image_pitch_alignment(const cl::Device &device);
+/** Helper function to check whether non-uniform work group is supported
+ *
+ * @param[in] device A CL device
+ *
+ * @return True if the feature is supported
+ */
+bool get_cl_non_uniform_work_group_supported(const cl::Device &device);
+
/** Helper function to check whether a given extension is supported
*
* @param[in] device A CL device
@@ -176,7 +179,9 @@ bool dot8_acc_supported(const cl::Device &device);
*
* @return True if the configuration is supported
*/
-bool cl_winograd_convolution_layer_supported(const Size2D &output_tile, const Size2D &kernel_size, DataLayout data_layout);
+bool cl_winograd_convolution_layer_supported(const Size2D &output_tile,
+ const Size2D &kernel_size,
+ DataLayout data_layout);
/** Helper function to get the preferred native vector width size for built-in scalar types that can be put into vectors
*
@@ -204,16 +209,6 @@ bool preferred_dummy_work_items_support(const cl::Device &device);
*/
bool image2d_from_buffer_supported(const cl::Device &device);
-/** Creates an opencl kernel
- *
- * @param[in] ctx A context to be used to create the opencl kernel.
- * @param[in] kernel_name The kernel name.
- * @param[in] build_opts The build options to be used for the opencl kernel compilation.
- *
- * @return An opencl kernel
- */
-cl::Kernel create_opencl_kernel(CLCoreRuntimeContext *ctx, const std::string &kernel_name, const CLBuildOptions &build_opts);
-
/** Creates an opencl kernel using a compile context
*
* @param[in] ctx A compile context to be used to create the opencl kernel.
@@ -222,7 +217,9 @@ cl::Kernel create_opencl_kernel(CLCoreRuntimeContext *ctx, const std::string &ke
*
* @return An opencl kernel
*/
-cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_name, const std::set<std::string> &build_opts = std::set<std::string>());
+cl::Kernel create_kernel(const CLCompileContext &ctx,
+ const std::string &kernel_name,
+ const std::set<std::string> &build_opts = std::set<std::string>());
/** Creates a suitable LWS hint object for parallel implementations. Sets the number of WG based on the input size.
* If input width is smaller than 128 we can use fewer threads than 8.
@@ -234,5 +231,62 @@ cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_
*/
cl::NDRange create_lws_hint_parallel_implementations(unsigned int input_dimension, unsigned int vector_size);
+/* Helper function to check if the workgroup batch size modifier parameter is supported on the cl device
+ *
+ * @param[in] device cl device to check for support
+ *
+ * @return true if the workgroup batch size modifier parameter is supported, false otherwise
+ */
+bool get_wbsm_support_info(const cl::Device &device);
+
+/* Helper function to set the workgroup batch size modifier parameter in the kernel
+ *
+ * @param[in] kernel cl kernel to set the workgroup batch size modifier parameter
+ * @param[in] wbsm_hint workgroup batch size modifier to use
+ */
+void set_wbsm(cl::Kernel &kernel, cl_int wbsm_hint);
+
+/* Helper function to check if we can export the tensor to cl_image
+ *
+ * @param[in] input tensor
+ *
+ * @return true if we can export the tensor to cl_image
+ */
+bool export_to_cl_image(const ITensorInfo *tensor);
+
+/* Helper function to force unroll with pragma when any of the input values (iterations) are greater than @ref max_manual_loop_unrolling
+ *
+ * This function passes UNROLL_WITH_PRAGMA at compile time when any of the input values are greater than @ref max_manual_loop_unrolling
+ *
+ * @param[in] built_opts OpenCL kernel build options
+ * @param[in] values Input values (iterations)
+ *
+ */
+void set_unroll_with_pragma(CLBuildOptions &built_opts, std::initializer_list<int> values);
+
+/** Helper function to check whether the cl_arm_matrix_multiply extension is supported
+ *
+ * @param[in] device A CL device
+ *
+ * @return True if the extension is supported
+ */
+bool arm_matrix_multiply_supported(const cl::Device &device);
+
+/** Check whether cl_khr_command_buffer extension is supported by the specified CL device.
+ *
+ * @param[in] device The CL device
+ *
+ * @return True if the extension is supported by the CL device.
+ */
+bool command_buffer_supported(const cl::Device &device);
+
+/** Check whether cl_khr_command_buffer_mutable_dispatch extension is supported by the specified CL device.
+ *
+ * @param[in] device The CL device
+ *
+ * @return True if the extension is supported by the CL device.
+ */
+bool command_buffer_mutable_dispatch_supported(const cl::Device &device);
+
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLHELPERS_H */
diff --git a/arm_compute/core/CL/CLKernelLibrary.h b/arm_compute/core/CL/CLKernelLibrary.h
index 6c5df6cb08..527733ccf1 100644
--- a/arm_compute/core/CL/CLKernelLibrary.h
+++ b/arm_compute/core/CL/CLKernelLibrary.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 ARM Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -47,7 +47,7 @@ private:
public:
/** Access the KernelLibrary singleton.
- * This method has been deprecated and will be removed in the next release.
+ * This method has been deprecated and will be removed in future releases
* @return The KernelLibrary instance.
*/
static CLKernelLibrary &get();
@@ -148,6 +148,12 @@ public:
*/
std::string get_program_name(const std::string &kernel_name) const;
+ /* Returns true if the workgroup batch size modifier parameter is supported on the cl device
+ *
+ * @return true if the workgroup batch size modifier parameter is supported, false otherwise
+ */
+ bool is_wbsm_supported();
+
/** Sets the CL context used to create programs.
*
* @note Setting the context also resets the device to the
@@ -164,11 +170,7 @@ public:
CLCompileContext &get_compile_context();
private:
- CLCompileContext _compile_context; /**< Compile Context. */
- std::string _kernel_path; /**< Path to the kernels folder. */
- static const std::map<std::string, std::string> _kernel_program_map; /**< Map that associates kernel names with programs. */
- static const std::map<std::string, std::string> _program_source_map; /**< Contains sources for all programs.
- Used for compile-time kernel inclusion. >*/
+ CLCompileContext _compile_context; /**< Compile Context. */
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLKERNELLIBRARY_H */
diff --git a/arm_compute/core/CL/CLKernels.h b/arm_compute/core/CL/CLKernels.h
deleted file mode 100644
index cd26399390..0000000000
--- a/arm_compute/core/CL/CLKernels.h
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLKERNELS_H
-#define ARM_COMPUTE_CLKERNELS_H
-
-/* Header regrouping all the CL kernels */
-#include "arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h"
-#include "arm_compute/core/CL/kernels/CLAccumulateKernel.h"
-#include "arm_compute/core/CL/kernels/CLActivationLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLArgMinMaxLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLBatchToSpaceLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLBitwiseAndKernel.h"
-#include "arm_compute/core/CL/kernels/CLBitwiseNotKernel.h"
-#include "arm_compute/core/CL/kernels/CLBitwiseOrKernel.h"
-#include "arm_compute/core/CL/kernels/CLBitwiseXorKernel.h"
-#include "arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h"
-#include "arm_compute/core/CL/kernels/CLBox3x3Kernel.h"
-#include "arm_compute/core/CL/kernels/CLCannyEdgeKernel.h"
-#include "arm_compute/core/CL/kernels/CLChannelCombineKernel.h"
-#include "arm_compute/core/CL/kernels/CLChannelExtractKernel.h"
-#include "arm_compute/core/CL/kernels/CLChannelShuffleLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLCol2ImKernel.h"
-#include "arm_compute/core/CL/kernels/CLColorConvertKernel.h"
-#include "arm_compute/core/CL/kernels/CLComparisonKernel.h"
-#include "arm_compute/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h"
-#include "arm_compute/core/CL/kernels/CLConvolutionKernel.h"
-#include "arm_compute/core/CL/kernels/CLCopyKernel.h"
-#include "arm_compute/core/CL/kernels/CLCropKernel.h"
-#include "arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h"
-#include "arm_compute/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h"
-#include "arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLDerivativeKernel.h"
-#include "arm_compute/core/CL/kernels/CLDilateKernel.h"
-#include "arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLElementWiseUnaryLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h"
-#include "arm_compute/core/CL/kernels/CLErodeKernel.h"
-#include "arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h"
-#include "arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h"
-#include "arm_compute/core/CL/kernels/CLFFTScaleKernel.h"
-#include "arm_compute/core/CL/kernels/CLFastCornersKernel.h"
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
-#include "arm_compute/core/CL/kernels/CLFlattenLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLFloorKernel.h"
-#include "arm_compute/core/CL/kernels/CLFuseBatchNormalizationKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
-#include "arm_compute/core/CL/kernels/CLGatherKernel.h"
-#include "arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h"
-#include "arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h"
-#include "arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h"
-#include "arm_compute/core/CL/kernels/CLGenerateProposalsLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h"
-#include "arm_compute/core/CL/kernels/CLHOGDetectorKernel.h"
-#include "arm_compute/core/CL/kernels/CLHarrisCornersKernel.h"
-#include "arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLHistogramKernel.h"
-#include "arm_compute/core/CL/kernels/CLIm2ColKernel.h"
-#include "arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLIntegralImageKernel.h"
-#include "arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLLKTrackerKernel.h"
-#include "arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h"
-#include "arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h"
-#include "arm_compute/core/CL/kernels/CLMeanStdDevKernel.h"
-#include "arm_compute/core/CL/kernels/CLMeanStdDevNormalizationKernel.h"
-#include "arm_compute/core/CL/kernels/CLMedian3x3Kernel.h"
-#include "arm_compute/core/CL/kernels/CLMemsetKernel.h"
-#include "arm_compute/core/CL/kernels/CLMinMaxLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h"
-#include "arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h"
-#include "arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h"
-#include "arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLPadLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLPermuteKernel.h"
-#include "arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h"
-#include "arm_compute/core/CL/kernels/CLPoolingLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLPriorBoxLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h"
-#include "arm_compute/core/CL/kernels/CLQuantizationLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLROIAlignLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLRangeKernel.h"
-#include "arm_compute/core/CL/kernels/CLReductionOperationKernel.h"
-#include "arm_compute/core/CL/kernels/CLRemapKernel.h"
-#include "arm_compute/core/CL/kernels/CLReorgLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLReshapeLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLReverseKernel.h"
-#include "arm_compute/core/CL/kernels/CLScaleKernel.h"
-#include "arm_compute/core/CL/kernels/CLScharr3x3Kernel.h"
-#include "arm_compute/core/CL/kernels/CLSelectKernel.h"
-#include "arm_compute/core/CL/kernels/CLSobel3x3Kernel.h"
-#include "arm_compute/core/CL/kernels/CLSobel5x5Kernel.h"
-#include "arm_compute/core/CL/kernels/CLSobel7x7Kernel.h"
-#include "arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLSpaceToBatchLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLSpaceToDepthLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLStackLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLStridedSliceKernel.h"
-#include "arm_compute/core/CL/kernels/CLTableLookupKernel.h"
-#include "arm_compute/core/CL/kernels/CLThresholdKernel.h"
-#include "arm_compute/core/CL/kernels/CLTileKernel.h"
-#include "arm_compute/core/CL/kernels/CLTransposeKernel.h"
-#include "arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLWarpAffineKernel.h"
-#include "arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h"
-#include "arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h"
-#include "arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h"
-#include "arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h"
-#include "arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h"
-#include "arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h"
-#include "arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h"
-#include "arm_compute/core/CL/kernels/CLYOLOLayerKernel.h"
-#include "arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h"
-
-#endif /* ARM_COMPUTE_CLKERNELS_H */
diff --git a/arm_compute/core/CL/CLTypes.h b/arm_compute/core/CL/CLTypes.h
index 3643b178d3..0f088e2b10 100644
--- a/arm_compute/core/CL/CLTypes.h
+++ b/arm_compute/core/CL/CLTypes.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 ARM Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -41,7 +41,8 @@ enum class CLVersion
CL10, /* the OpenCL 1.0 */
CL11, /* the OpenCL 1.1 */
CL12, /* the OpenCL 1.2 */
- CL20, /* the OpenCL 2.0 and above */
+ CL20, /* the OpenCL 2.x */
+ CL30, /* the OpenCL 3.x */
UNKNOWN /* unkown version */
};
@@ -62,18 +63,27 @@ struct CLDeviceOptions
struct CLQuantization
{
/** Default Constructor */
- CLQuantization()
- : scale(nullptr), offset(nullptr) {};
+ CLQuantization() : scale(nullptr), offset(nullptr){};
/** Constructor
*
* @param[in] scale OpenCL scale array
* @param[in] offset OpenCL offset array
*/
- CLQuantization(const ICLFloatArray *scale, const ICLInt32Array *offset)
- : scale(scale), offset(offset) {};
+ CLQuantization(const ICLFloatArray *scale, const ICLInt32Array *offset) : scale(scale), offset(offset){};
const ICLFloatArray *scale; /**< Quantization scale array */
const ICLInt32Array *offset; /**< Quantization offset array */
};
+
+enum CLKernelType
+{
+ UNKNOWN, /**< Unknown CL kernel type */
+ DEPTHWISE, /**< Depthwise CL kernel type */
+ DIRECT, /**< Direct Convolution CL kernel type */
+ ELEMENTWISE, /**< Elementwise CL kernel type */
+ GEMM, /**< GEMM CL kernel type */
+ POOL, /**< Pool CL kernel type */
+ WINOGRAD /**< Winograd CL kernel type */
+};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CL_TYPES_H */
diff --git a/arm_compute/core/CL/CLValidate.h b/arm_compute/core/CL/CLValidate.h
deleted file mode 100644
index 8f1733dcfe..0000000000
--- a/arm_compute/core/CL/CLValidate.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_VALIDATE_H
-#define ARM_COMPUTE_CL_VALIDATE_H
-
-#include "arm_compute/core/Validate.h"
-
-namespace arm_compute
-{
-#define ARM_COMPUTE_ERROR_ON_F16_UNSUPPORTED(tensor) \
- ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_unsupported_fp16(__func__, __FILE__, __LINE__, tensor, CLKernelLibrary::get().fp16_supported()))
-
-#define ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(tensor) \
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_fp16(__func__, __FILE__, __LINE__, tensor, CLKernelLibrary::get().fp16_supported()))
-
-/** Return an error if int64_base_atomics extension is not supported by the device.
- *
- * @param[in] function Function in which the error occurred.
- * @param[in] file Name of the file where the error occurred.
- * @param[in] line Line on which the error occurred.
- *
- * @return Status
- */
-inline arm_compute::Status error_on_unsupported_int64_base_atomics(const char *function, const char *file, const int line)
-{
- if(!CLKernelLibrary::get().int64_base_atomics_supported())
- {
- return ARM_COMPUTE_CREATE_ERROR_LOC(arm_compute::ErrorCode::UNSUPPORTED_EXTENSION_USE, function, file, line, "Atomic functions are not supported");
- }
- return arm_compute::Status{};
-}
-
-#define ARM_COMPUTE_ERROR_ON_INT64_BASE_ATOMICS_UNSUPPORTED() \
- ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_unsupported_int64_base_atomics(__func__, __FILE__, __LINE__));
-
-#define ARM_COMPUTE_RETURN_ERROR_ON_INT64_BASE_ATOMICS_UNSUPPORTED() \
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_int64_base_atomics(__func__, __FILE__, __LINE__));
-
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_VALIDATE_H */
diff --git a/arm_compute/core/CL/ICLArray.h b/arm_compute/core/CL/ICLArray.h
index e11fb95bf8..a2b2baa5b3 100644
--- a/arm_compute/core/CL/ICLArray.h
+++ b/arm_compute/core/CL/ICLArray.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -40,8 +40,7 @@ public:
* @param[in] max_num_values Maximum size of the array.
*
*/
- explicit ICLArray(size_t max_num_values)
- : IArray<T>(max_num_values), _mapping(nullptr)
+ explicit ICLArray(size_t max_num_values) : IArray<T>(max_num_values), _mapping(nullptr)
{
}
@@ -66,8 +65,6 @@ public:
* @param[in] blocking If true, then the mapping will be ready to use by the time
* this method returns, else it is the caller's responsibility
* to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer.
- *
- * @return The mapping address.
*/
void map(cl::CommandQueue &q, bool blocking = true)
{
@@ -115,14 +112,6 @@ private:
uint8_t *_mapping;
};
-/** Interface for OpenCL Array of Key Points. */
-using ICLKeyPointArray = ICLArray<KeyPoint>;
-/** Interface for OpenCL Array of 2D Coordinates. */
-using ICLCoordinates2DArray = ICLArray<Coordinates2D>;
-/** Interface for OpenCL Array of Detection Windows. */
-using ICLDetectionWindowArray = ICLArray<DetectionWindow>;
-/** Interface for OpenCL Array of 2D Sizes. */
-using ICLSize2DArray = ICLArray<Size2D>;
/** Interface for OpenCL Array of uint8s. */
using ICLUInt8Array = ICLArray<cl_uchar>;
/** Interface for OpenCL Array of uint16s. */
@@ -135,5 +124,5 @@ using ICLInt16Array = ICLArray<cl_short>;
using ICLInt32Array = ICLArray<cl_int>;
/** Interface for OpenCL Array of floats. */
using ICLFloatArray = ICLArray<cl_float>;
-}
+} // namespace arm_compute
#endif /*ARM_COMPUTE_ICLARRAY_H*/
diff --git a/arm_compute/core/CL/ICLDistribution1D.h b/arm_compute/core/CL/ICLDistribution1D.h
deleted file mode 100644
index a9bafe3d5a..0000000000
--- a/arm_compute/core/CL/ICLDistribution1D.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_ICLDISTRIBUTION1D_H
-#define ARM_COMPUTE_ICLDISTRIBUTION1D_H
-
-#include "arm_compute/core/IDistribution1D.h"
-
-#include <cstddef>
-#include <cstdint>
-
-namespace cl
-{
-class Buffer;
-class CommandQueue;
-}
-
-namespace arm_compute
-{
-/** ICLDistribution1D interface class */
-class ICLDistribution1D : public IDistribution1D
-{
-public:
- /** Constructor: Creates a 1D CLDistribution of a consecutive interval [offset, offset + range - 1]
- * defined by a start offset and valid range, divided equally into num_bins parts.
- *
- * @param[in] num_bins The number of bins the distribution is divided in.
- * @param[in] offset The start of the values to use.
- * @param[in] range The total number of the consecutive values of the distribution interval.
- */
- ICLDistribution1D(size_t num_bins, int32_t offset, uint32_t range);
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- ICLDistribution1D(const ICLDistribution1D &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- const ICLDistribution1D &operator=(const ICLDistribution1D &) = delete;
- /** Enqueue a map operation of the allocated buffer on the given queue.
- *
- * @param[in,out] q The CL command queue to use for the mapping operation.
- * @param[in] blocking If true, then the mapping will be ready to use by the time
- * this method returns, else it is the caller's responsibility
- * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer.
- */
- void map(cl::CommandQueue &q, bool blocking = true);
- /** Enqueue an unmap operation of the allocated and mapped buffer on the given queue.
- *
- * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before
- * the memory is accessed by the device.
- *
- * @param[in,out] q The CL command queue to use for the mapping operation.
- */
- void unmap(cl::CommandQueue &q);
- /** Interface to be implemented by the child class to return a reference to the OpenCL buffer containing the distribution's data.
- *
- * @return A reference to an OpenCL buffer containing the distribution's data.
- */
- virtual cl::Buffer &cl_buffer() = 0;
- // Inherited methods overridden:
- uint32_t *buffer() const override;
-
-protected:
- /** Method to be implemented by the child class to map the OpenCL buffer
- *
- * @param[in,out] q The CL command queue to use for the mapping operation.
- * @param[in] blocking If true, then the mapping will be ready to use by the time
- * this method returns, else it is the caller's responsibility
- * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer.
- */
- virtual uint32_t *do_map(cl::CommandQueue &q, bool blocking) = 0;
- /** Method to be implemented by the child class to unmap the OpenCL buffer
- *
- * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before
- * the memory is accessed by the device.
- *
- * @param[in,out] q The CL command queue to use for the mapping operation.
- */
- virtual void do_unmap(cl::CommandQueue &q) = 0;
-
-protected:
- uint32_t *_mapping; /**< The distribution data. */
-};
-}
-#endif /* ARM_COMPUTE_ICLDISTRIBUTION1D_H */
diff --git a/arm_compute/core/CL/ICLGEMMKernelConfiguration.h b/arm_compute/core/CL/ICLGEMMKernelConfiguration.h
deleted file mode 100644
index e5f4a78297..0000000000
--- a/arm_compute/core/CL/ICLGEMMKernelConfiguration.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_ICLGEMMKERNELCONFIGURATION_H
-#define ARM_COMPUTE_ICLGEMMKERNELCONFIGURATION_H
-
-#include "arm_compute/core/GPUTarget.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-/** Basic interface for the GEMM kernel configuration */
-class ICLGEMMKernelConfiguration
-{
-public:
- /** Constructor
- *
- * @param[in] arch GPU target
- */
- ICLGEMMKernelConfiguration(GPUTarget arch)
- : _target(arch)
- {
- }
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- ICLGEMMKernelConfiguration(const ICLGEMMKernelConfiguration &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- ICLGEMMKernelConfiguration &operator=(const ICLGEMMKernelConfiguration &) = delete;
- /** Default Move Constructor. */
- ICLGEMMKernelConfiguration(ICLGEMMKernelConfiguration &&) = default;
- /** Default move assignment operator */
- ICLGEMMKernelConfiguration &operator=(ICLGEMMKernelConfiguration &&) = default;
- /** Virtual destructor */
- virtual ~ICLGEMMKernelConfiguration() = default;
- /** Given M, N, K and B, this method returns the @ref GEMMLHSMatrixInfo and @ref GEMMRHSMatrixInfo to be used
- *
- * @param[in] m Number of rows LHS matrix
- * @param[in] n Number of columns RHS matrix
- * @param[in] k Number of columns LHS matrix or number of rows RHS matrix
- * @param[in] b Batch size
- * @param[in] data_type Data type
- */
- virtual std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) = 0;
-
-protected:
- GPUTarget _target;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_ICLGEMMKERNELCONFIGURATION_H */
diff --git a/arm_compute/core/CL/ICLHOG.h b/arm_compute/core/CL/ICLHOG.h
deleted file mode 100644
index b42566ef11..0000000000
--- a/arm_compute/core/CL/ICLHOG.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_ICLHOG_H
-#define ARM_COMPUTE_ICLHOG_H
-
-#include "arm_compute/core/IHOG.h"
-
-#include <cstdint>
-
-namespace cl
-{
-class Buffer;
-class CommandQueue;
-}
-
-namespace arm_compute
-{
-/** Interface for OpenCL HOG data-object */
-class ICLHOG : public IHOG
-{
-public:
- /** Default constructor */
- ICLHOG();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- ICLHOG(const ICLHOG &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- ICLHOG &operator=(const ICLHOG &) = delete;
- /** Allow instances of this class to be moved */
- ICLHOG(ICLHOG &&) = default;
- /** Allow instances of this class to be moved */
- ICLHOG &operator=(ICLHOG &&) = default;
- /** Default destructor */
- virtual ~ICLHOG() = default;
-
- /** Interface to be implemented by the child class to return a reference to the OpenCL buffer containing the hog's descriptor
- *
- * @return A reference to an OpenCL buffer containing the hog's descriptor
- */
- virtual const cl::Buffer &cl_buffer() const = 0;
-
- /** Enqueue a map operation of the allocated buffer on the given queue.
- *
- * @param[in,out] q The CL command queue to use for the mapping operation.
- * @param[in] blocking If true, then the mapping will be ready to use by the time
- * this method returns, else it is the caller's responsibility
- * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer.
- *
- * @return The mapping address.
- */
- void map(cl::CommandQueue &q, bool blocking = true);
-
- /** Enqueue an unmap operation of the allocated and mapped buffer on the given queue.
- *
- * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before
- * the memory is accessed by the device.
- *
- * @param[in,out] q The CL command queue to use for the mapping operation.
- */
- void unmap(cl::CommandQueue &q);
-
- /** Interface to be implemented by the child class to free the allocated cl buffer.
- *
- * @warning The buffer must have been allocated previously. Otherwise calling the function will fail.
- */
- virtual void free() = 0;
-
- // Inherited methods overridden:
- float *descriptor() const override;
-
-protected:
- /** Method to be implemented by the child class to map the OpenCL buffer
- *
- * @param[in,out] q The CL command queue to use for the mapping operation.
- * @param[in] blocking If true, then the mapping will be ready to use by the time
- * this method returns, else it is the caller's responsibility
- * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer.
- */
- virtual uint8_t *do_map(cl::CommandQueue &q, bool blocking) = 0;
- /** Method to be implemented by the child class to unmap the OpenCL buffer
- *
- * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before
- * the memory is accessed by the device.
- *
- * @param[in,out] q The CL command queue to use for the mapping operation.
- */
- virtual void do_unmap(cl::CommandQueue &q) = 0;
-
-private:
- uint8_t *_mapping;
-};
-}
-#endif /*ARM_COMPUTE_ICLHOG_H */
diff --git a/arm_compute/core/CL/ICLKernel.h b/arm_compute/core/CL/ICLKernel.h
deleted file mode 100644
index 3e545c61aa..0000000000
--- a/arm_compute/core/CL/ICLKernel.h
+++ /dev/null
@@ -1,387 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_ICLKERNEL_H
-#define ARM_COMPUTE_ICLKERNEL_H
-
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLTypes.h"
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/GPUTarget.h"
-#include "arm_compute/core/IKernel.h"
-
-#include <string>
-
-namespace arm_compute
-{
-template <typename T>
-class ICLArray;
-class ICLTensor;
-class Window;
-
-/** Common interface for all the OpenCL kernels */
-class ICLKernel : public IKernel
-{
-private:
- /** Returns the number of arguments enqueued per array object.
- *
- * @return The number of arguments enqueued per array object.
- */
- template <unsigned int dimension_size>
- constexpr static unsigned int num_arguments_per_array()
- {
- return num_arguments_per_tensor<dimension_size>();
- }
- /** Returns the number of arguments enqueued per tensor object.
- *
- * @return The number of arguments enqueued per tensor object.
- */
- template <unsigned int dimension_size>
- constexpr static unsigned int num_arguments_per_tensor()
- {
- return 2 + 2 * dimension_size;
- }
- using IKernel::configure; //Prevent children from calling IKernel::configure() directly
-protected:
- /** Configure the kernel's window and local workgroup size hint.
- *
- * @param[in] window The maximum window which will be returned by window()
- * @param[in] lws_hint (Optional) Local-Workgroup-Size to use.
- */
- void configure_internal(const Window &window, cl::NDRange lws_hint = CLKernelLibrary::get().default_ndrange())
- {
- _lws_hint = lws_hint;
- IKernel::configure(window);
- }
-
-public:
- /** Constructor */
- ICLKernel()
- : _kernel(nullptr), _target(GPUTarget::MIDGARD), _config_id(arm_compute::default_config_id), _max_workgroup_size(0), _lws_hint()
- {
- }
- /** Returns a reference to the OpenCL kernel of this object.
- *
- * @return A reference to the OpenCL kernel of this object.
- */
- cl::Kernel &kernel()
- {
- return _kernel;
- }
- /** Add the passed 1D array's parameters to the object's kernel's arguments starting from the index idx.
- *
- * @param[in,out] idx Index at which to start adding the array's arguments. Will be incremented by the number of kernel arguments set.
- * @param[in] array Array to set as an argument of the object's kernel.
- * @param[in] strides @ref Strides object containing stride of each dimension in bytes.
- * @param[in] num_dimensions Number of dimensions of the @p array.
- * @param[in] window Window the kernel will be executed on.
- */
- template <typename T>
- void add_1D_array_argument(unsigned int &idx, const ICLArray<T> *array, const Strides &strides, unsigned int num_dimensions, const Window &window)
- {
- add_array_argument<T, 1>(idx, array, strides, num_dimensions, window);
- }
- /** Add the passed 1D tensor's parameters to the object's kernel's arguments starting from the index idx.
- *
- * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
- * @param[in] tensor Tensor to set as an argument of the object's kernel.
- * @param[in] window Window the kernel will be executed on.
- */
- void add_1D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
- {
- add_tensor_argument<1>(idx, tensor, window);
- }
- /** Add the passed 1D tensor's parameters to the object's kernel's arguments starting from the index idx if the condition is true.
- *
- * @param[in] cond Condition to check
- * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
- * @param[in] tensor Tensor to set as an argument of the object's kernel.
- * @param[in] window Window the kernel will be executed on.
- */
- void add_1D_tensor_argument_if(bool cond, unsigned int &idx, const ICLTensor *tensor, const Window &window)
- {
- if(cond)
- {
- add_1D_tensor_argument(idx, tensor, window);
- }
- }
- /** Add the passed 2D tensor's parameters to the object's kernel's arguments starting from the index idx.
- *
- * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
- * @param[in] tensor Tensor to set as an argument of the object's kernel.
- * @param[in] window Window the kernel will be executed on.
- */
- void add_2D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
- {
- add_tensor_argument<2>(idx, tensor, window);
- }
- /** Add the passed 2D tensor's parameters to the object's kernel's arguments starting from the index idx if the condition is true.
- *
- * @param[in] cond Condition to check
- * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
- * @param[in] tensor Tensor to set as an argument of the object's kernel.
- * @param[in] window Window the kernel will be executed on.
- */
- void add_2D_tensor_argument_if(bool cond, unsigned int &idx, const ICLTensor *tensor, const Window &window)
- {
- if(cond)
- {
- add_2D_tensor_argument(idx, tensor, window);
- }
- }
- /** Add the passed 3D tensor's parameters to the object's kernel's arguments starting from the index idx.
- *
- * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
- * @param[in] tensor Tensor to set as an argument of the object's kernel.
- * @param[in] window Window the kernel will be executed on.
- */
- void add_3D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
- {
- add_tensor_argument<3>(idx, tensor, window);
- }
- /** Add the passed 4D tensor's parameters to the object's kernel's arguments starting from the index idx.
- *
- * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
- * @param[in] tensor Tensor to set as an argument of the object's kernel.
- * @param[in] window Window the kernel will be executed on.
- */
- void add_4D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
- {
- add_tensor_argument<4>(idx, tensor, window);
- }
- /** Returns the number of arguments enqueued per 1D array object.
- *
- * @return The number of arguments enqueues per 1D array object.
- */
- constexpr static unsigned int num_arguments_per_1D_array()
- {
- return num_arguments_per_array<1>();
- }
- /** Returns the number of arguments enqueued per 1D tensor object.
- *
- * @return The number of arguments enqueues per 1D tensor object.
- */
- constexpr static unsigned int num_arguments_per_1D_tensor()
- {
- return num_arguments_per_tensor<1>();
- }
- /** Returns the number of arguments enqueued per 2D tensor object.
- *
- * @return The number of arguments enqueues per 2D tensor object.
- */
- constexpr static unsigned int num_arguments_per_2D_tensor()
- {
- return num_arguments_per_tensor<2>();
- }
- /** Returns the number of arguments enqueued per 3D tensor object.
- *
- * @return The number of arguments enqueues per 3D tensor object.
- */
- constexpr static unsigned int num_arguments_per_3D_tensor()
- {
- return num_arguments_per_tensor<3>();
- }
- /** Returns the number of arguments enqueued per 4D tensor object.
- *
- * @return The number of arguments enqueues per 4D tensor object.
- */
- constexpr static unsigned int num_arguments_per_4D_tensor()
- {
- return num_arguments_per_tensor<4>();
- }
- /** Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue.
- *
- * @note The queue is *not* flushed by this method, and therefore the kernel will not have been executed by the time this method returns.
- *
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
- * @param[in,out] queue Command queue on which to enqueue the kernel.
- */
- virtual void run(const Window &window, cl::CommandQueue &queue) = 0;
- /** Add the passed parameters to the object's kernel's arguments starting from the index idx.
- *
- * @param[in,out] idx Index at which to start adding the arguments. Will be incremented by the number of kernel arguments set.
- * @param[in] value Value to set as an argument of the object's kernel.
- */
- template <typename T>
- void add_argument(unsigned int &idx, T value)
- {
- _kernel.setArg(idx++, value);
- }
-
- /** Set the Local-Workgroup-Size hint
- *
- * @note This method should be called after the configuration of the kernel
- *
- * @param[in] lws_hint Local-Workgroup-Size to use
- */
- void set_lws_hint(const cl::NDRange &lws_hint)
- {
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); // lws_hint will be overwritten by configure()
- _lws_hint = lws_hint;
- }
-
- /** Return the Local-Workgroup-Size hint
- *
- * @return Current lws hint
- */
- cl::NDRange lws_hint() const
- {
- return _lws_hint;
- }
-
- /** Get the configuration ID
- *
- * @note The configuration ID can be used by the caller to distinguish different calls of the same OpenCL kernel
- * In particular, this method can be used by CLScheduler to keep track of the best LWS for each configuration of the same kernel.
- * The configuration ID should be provided only for the kernels potentially affected by the LWS geometry
- *
- * @note This method should be called after the configuration of the kernel
- *
- * @return configuration id string
- */
- const std::string &config_id() const
- {
- return _config_id;
- }
-
- /** Set the targeted GPU architecture
- *
- * @param[in] target The targeted GPU architecture
- */
- void set_target(GPUTarget target)
- {
- _target = target;
- }
-
- /** Set the targeted GPU architecture according to the CL device
- *
- * @param[in] device A CL device
- */
- void set_target(cl::Device &device);
-
- /** Get the targeted GPU architecture
- *
- * @return The targeted GPU architecture.
- */
- GPUTarget get_target() const
- {
- return _target;
- }
-
- /** Get the maximum workgroup size for the device the CLKernelLibrary uses.
- *
- * @return The maximum workgroup size value.
- */
- size_t get_max_workgroup_size();
- /** Get the global work size given an execution window
- *
- * @param[in] window Execution window
- *
- * @return Global work size of the given execution window
- */
- static cl::NDRange gws_from_window(const Window &window);
-
-private:
- /** Add the passed array's parameters to the object's kernel's arguments starting from the index idx.
- *
- * @param[in,out] idx Index at which to start adding the array's arguments. Will be incremented by the number of kernel arguments set.
- * @param[in] array Array to set as an argument of the object's kernel.
- * @param[in] strides @ref Strides object containing stride of each dimension in bytes.
- * @param[in] num_dimensions Number of dimensions of the @p array.
- * @param[in] window Window the kernel will be executed on.
- */
- template <typename T, unsigned int dimension_size>
- void add_array_argument(unsigned int &idx, const ICLArray<T> *array, const Strides &strides, unsigned int num_dimensions, const Window &window);
- /** Add the passed tensor's parameters to the object's kernel's arguments starting from the index idx.
- *
- * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
- * @param[in] tensor Tensor to set as an argument of the object's kernel.
- * @param[in] window Window the kernel will be executed on.
- */
- template <unsigned int dimension_size>
- void add_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window);
-
-protected:
- cl::Kernel _kernel; /**< OpenCL kernel to run */
- GPUTarget _target; /**< The targeted GPU */
- std::string _config_id; /**< Configuration ID */
- size_t _max_workgroup_size; /**< The maximum workgroup size for this kernel */
-private:
- cl::NDRange _lws_hint; /**< Local workgroup size hint for the OpenCL kernel */
-};
-
-/** Add the kernel to the command queue with the given window.
- *
- * @note Depending on the size of the window, this might translate into several jobs being enqueued.
- *
- * @note If kernel->kernel() is empty then the function will return without adding anything to the queue.
- *
- * @param[in,out] queue OpenCL command queue.
- * @param[in] kernel Kernel to enqueue
- * @param[in] window Window the kernel has to process.
- * @param[in] lws_hint (Optional) Local workgroup size requested. Default is based on the device target.
- * @param[in] use_dummy_work_items (Optional) Use dummy work items in order to have two dimensional power of two NDRange. Default is false
- * Note: it is kernel responsibility to check if the work-item is out-of-range
- *
- * @note If any dimension of the lws is greater than the global workgroup size then no lws will be passed.
- */
-void enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint = CLKernelLibrary::get().default_ndrange(), bool use_dummy_work_items = false);
-
-/** Add the passed array's parameters to the object's kernel's arguments starting from the index idx.
- *
- * @param[in,out] idx Index at which to start adding the array's arguments. Will be incremented by the number of kernel arguments set.
- * @param[in] array Array to set as an argument of the object's kernel.
- * @param[in] strides @ref Strides object containing stride of each dimension in bytes.
- * @param[in] num_dimensions Number of dimensions of the @p array.
- * @param[in] window Window the kernel will be executed on.
- */
-template <typename T, unsigned int dimension_size>
-void ICLKernel::add_array_argument(unsigned &idx, const ICLArray<T> *array, const Strides &strides, unsigned int num_dimensions, const Window &window)
-{
- ARM_COMPUTE_ERROR_ON(array == nullptr);
-
- // Calculate offset to the start of the window
- unsigned int offset_first_element = 0;
-
- for(unsigned int n = 0; n < num_dimensions; ++n)
- {
- offset_first_element += window[n].start() * strides[n];
- }
-
- unsigned int idx_start = idx;
- _kernel.setArg(idx++, array->cl_buffer());
-
- for(unsigned int dimension = 0; dimension < dimension_size; dimension++)
- {
- _kernel.setArg<cl_uint>(idx++, strides[dimension]);
- _kernel.setArg<cl_uint>(idx++, strides[dimension] * window[dimension].step());
- }
-
- _kernel.setArg<cl_uint>(idx++, offset_first_element);
-
- ARM_COMPUTE_ERROR_ON_MSG_VAR(idx_start + num_arguments_per_array<dimension_size>() != idx,
- "add_%dD_array_argument() is supposed to add exactly %d arguments to the kernel", dimension_size, num_arguments_per_array<dimension_size>());
- ARM_COMPUTE_UNUSED(idx_start);
-}
-}
-#endif /*ARM_COMPUTE_ICLKERNEL_H */
diff --git a/arm_compute/core/CL/ICLLut.h b/arm_compute/core/CL/ICLLut.h
deleted file mode 100644
index 430adb8727..0000000000
--- a/arm_compute/core/CL/ICLLut.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_ICLLUT_H
-#define ARM_COMPUTE_ICLLUT_H
-
-#include "arm_compute/core/ILut.h"
-
-#include <cstdint>
-
-namespace cl
-{
-class Buffer;
-class CommandQueue;
-}
-
-namespace arm_compute
-{
-/** Interface for OpenCL LUT */
-class ICLLut : public ILut
-{
-public:
- ICLLut();
- ICLLut(const ICLLut &) = delete;
- ICLLut &operator=(const ICLLut &) = delete;
-
- /** Interface to be implemented by the child class to return a reference to the OpenCL buffer containing the lut's data.
- *
- * @return A reference to an OpenCL buffer containing the lut's data.
- */
- virtual const cl::Buffer &cl_buffer() const = 0;
- /** Enqueue a map operation of the allocated buffer on the given queue.
- *
- * @param[in,out] q The CL command queue to use for the mapping operation.
- * @param[in] blocking If true, then the mapping will be ready to use by the time
- * this method returns, else it is the caller's responsibility
- * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer.
- */
- void map(cl::CommandQueue &q, bool blocking = true);
- /** Enqueue an unmap operation of the allocated and mapped buffer on the given queue.
- *
- * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before
- * the memory is accessed by the device.
- *
- * @param[in,out] q The CL command queue to use for the mapping operation.
- */
- void unmap(cl::CommandQueue &q);
-
- // Inherited methods overridden:
- uint8_t *buffer() const override;
-
-protected:
- /** Method to be implemented by the child class to map the OpenCL buffer
- *
- * @param[in,out] q The CL command queue to use for the mapping operation.
- * @param[in] blocking If true, then the mapping will be ready to use by the time
- * this method returns, else it is the caller's responsibility
- * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer.
- */
- virtual uint8_t *do_map(cl::CommandQueue &q, bool blocking) = 0;
- /** Method to be implemented by the child class to unmap the OpenCL buffer
- *
- * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before
- * the memory is accessed by the device.
- *
- * @param[in,out] q The CL command queue to use for the mapping operation.
- */
- virtual void do_unmap(cl::CommandQueue &q) = 0;
-
-private:
- uint8_t *_mapping;
-};
-}
-#endif /*ARM_COMPUTE_ICLLUT_H */
diff --git a/arm_compute/core/CL/ICLMultiHOG.h b/arm_compute/core/CL/ICLMultiHOG.h
deleted file mode 100644
index f9213018a2..0000000000
--- a/arm_compute/core/CL/ICLMultiHOG.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_ICLMULTIHOG_H
-#define ARM_COMPUTE_ICLMULTIHOG_H
-
-#include "arm_compute/core/CL/ICLHOG.h"
-#include "arm_compute/core/IMultiHOG.h"
-
-namespace arm_compute
-{
-/** Interface for storing multiple HOG data-objects */
-class ICLMultiHOG : public IMultiHOG
-{
-public:
- /** Return a pointer to the requested OpenCL HOG model
- *
- * @param[in] index The index of the wanted OpenCL HOG model.
- *
- * @return A pointer pointed to the HOG model
- */
- virtual ICLHOG *cl_model(size_t index) = 0;
- /** Return a constant pointer to the requested OpenCL HOG model
- *
- * @param[in] index The index of the wanted OpenCL HOG model.
- *
- * @return A constant pointer pointed to the OpenCL HOG model
- */
- virtual const ICLHOG *cl_model(size_t index) const = 0;
-
- // Inherited methods overridden:
- IHOG *model(size_t index) override;
- const IHOG *model(size_t index) const override;
-};
-}
-#endif /*ARM_COMPUTE_ICLMULTIHOG_H */
diff --git a/arm_compute/core/CL/ICLMultiImage.h b/arm_compute/core/CL/ICLMultiImage.h
deleted file mode 100644
index 0233600e73..0000000000
--- a/arm_compute/core/CL/ICLMultiImage.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_ICLMULTIIMAGE_H
-#define ARM_COMPUTE_ICLMULTIIMAGE_H
-
-#include "arm_compute/core/IMultiImage.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-/** Interface for OpenCL images */
-using ICLImage = ICLTensor;
-
-/** Interface for OpenCL multi-planar images */
-class ICLMultiImage : public IMultiImage
-{
-public:
- /** Return a pointer to the requested OpenCL plane of the image.
- *
- * @param[in] index The index of the wanted planed.
- *
- * @return A pointer pointed to the OpenCL plane
- */
- virtual ICLImage *cl_plane(unsigned int index) = 0;
- /** Return a constant pointer to the requested OpenCL plane of the image.
- *
- * @param[in] index The index of the wanted planed.
- *
- * @return A constant pointer pointed to the OpenCL plane
- */
- virtual const ICLImage *cl_plane(unsigned int index) const = 0;
-
- // Inherited methods overridden:
- IImage *plane(unsigned int index) override;
- const IImage *plane(unsigned int index) const override;
-};
-}
-#endif /*ARM_COMPUTE_ICLMULTIIMAGE_H */
diff --git a/arm_compute/core/CL/ICLSimple2DKernel.h b/arm_compute/core/CL/ICLSimple2DKernel.h
deleted file mode 100644
index bd423303bb..0000000000
--- a/arm_compute/core/CL/ICLSimple2DKernel.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_ICLSIMPLE2DKERNEL_H
-#define ARM_COMPUTE_ICLSIMPLE2DKERNEL_H
-
-#include "arm_compute/core/CL/ICLSimpleKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for simple OpenCL kernels having 1 tensor input and 1 tensor output. This interface can be used when the work-item processes a 2D tile */
-class ICLSimple2DKernel : public ICLSimpleKernel
-{
-public:
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-};
-}
-#endif /*ARM_COMPUTE_ICLSIMPLE2DKERNEL_H */
diff --git a/arm_compute/core/CL/ICLSimple3DKernel.h b/arm_compute/core/CL/ICLSimple3DKernel.h
deleted file mode 100644
index e25051f578..0000000000
--- a/arm_compute/core/CL/ICLSimple3DKernel.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_ICLSIMPLE3DKERNEL_H
-#define ARM_COMPUTE_ICLSIMPLE3DKERNEL_H
-
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for simple OpenCL kernels having 1 tensor input and 1 tensor output.
- * Both input tensor and output tensor must have at least 3 dimensions.
- */
-class ICLSimple3DKernel : public ICLSimple2DKernel
-{
-public:
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-};
-}
-#endif /*ARM_COMPUTE_ICLSIMPLE3DKERNEL_H */
diff --git a/arm_compute/core/CL/ICLSimpleKernel.h b/arm_compute/core/CL/ICLSimpleKernel.h
deleted file mode 100644
index e8b6f0a81c..0000000000
--- a/arm_compute/core/CL/ICLSimpleKernel.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_ICLSIMPLEKERNEL_H
-#define ARM_COMPUTE_ICLSIMPLEKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Helpers.h"
-
-namespace arm_compute
-{
-/** Interface for simple OpenCL kernels having 1 tensor input and 1 tensor output */
-class ICLSimpleKernel : public ICLKernel
-{
-public:
- /** Constructor. */
- ICLSimpleKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- ICLSimpleKernel(const ICLSimpleKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- ICLSimpleKernel &operator=(const ICLSimpleKernel &) = delete;
- /** Allow instances of this class to be moved */
- ICLSimpleKernel(ICLSimpleKernel &&) = default;
- /** Allow instances of this class to be moved */
- ICLSimpleKernel &operator=(ICLSimpleKernel &&) = default;
- /** Default destructor */
- ~ICLSimpleKernel() = default;
-
- /** Configure the kernel
- *
- * @param[in] input Source tensor.
- * @param[out] output Destination tensor.
- * @param[in] num_elems_processed_per_iteration Number of processed elements per iteration.
- * @param[in] border_undefined (Optional) True if the border mode is undefined. False if it's replicate or constant.
- * @param[in] border_size (Optional) Size of the border.
- */
- void configure(const ICLTensor *input, ICLTensor *output, unsigned int num_elems_processed_per_iteration, bool border_undefined = false, const BorderSize &border_size = BorderSize());
-
-protected:
- const ICLTensor *_input;
- ICLTensor *_output;
-};
-}
-
-#endif /*ARM_COMPUTE_ICLSIMPLEKERNEL_H */
diff --git a/arm_compute/core/CL/ICLTensor.h b/arm_compute/core/CL/ICLTensor.h
index 001f892231..8de5423762 100644
--- a/arm_compute/core/CL/ICLTensor.h
+++ b/arm_compute/core/CL/ICLTensor.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2019, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,9 +24,8 @@
#ifndef ARM_COMPUTE_ICLTENSOR_H
#define ARM_COMPUTE_ICLTENSOR_H
-#include "arm_compute/core/ITensor.h"
-
#include "arm_compute/core/CL/CLTypes.h"
+#include "arm_compute/core/ITensor.h"
#include <cstdint>
@@ -34,7 +33,7 @@ namespace cl
{
class Buffer;
class CommandQueue;
-}
+} // namespace cl
namespace arm_compute
{
@@ -71,8 +70,6 @@ public:
* @param[in] blocking If true, then the mapping will be ready to use by the time
* this method returns, else it is the caller's responsibility
* to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer.
- *
- * @return The mapping address.
*/
void map(cl::CommandQueue &q, bool blocking = true);
/** Enqueue an unmap operation of the allocated and mapped buffer on the given queue.
@@ -115,5 +112,5 @@ private:
};
using ICLImage = ICLTensor;
-}
+} // namespace arm_compute
#endif /*ARM_COMPUTE_ICLTENSOR_H */
diff --git a/arm_compute/core/CL/OpenCL.h b/arm_compute/core/CL/OpenCL.h
index 72cbb3d2b2..8b5bf97099 100644
--- a/arm_compute/core/CL/OpenCL.h
+++ b/arm_compute/core/CL/OpenCL.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 ARM Limited.
+ * Copyright (c) 2016-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_OPENCL_H
-#define ARM_COMPUTE_OPENCL_H
+#ifndef ACL_ARM_COMPUTE_CORE_CL_OPENCL_H
+#define ACL_ARM_COMPUTE_CORE_CL_OPENCL_H
#include <string>
#include <utility>
@@ -31,8 +31,8 @@
#ifndef ARM_COMPUTE_NO_EXCEPTIONS
#define CL_HPP_ENABLE_EXCEPTIONS
#endif // ARM_COMPUTE_NO_EXCEPTIONS
-#define CL_TARGET_OPENCL_VERSION 200
-#define CL_HPP_TARGET_OPENCL_VERSION 110
+#define CL_TARGET_OPENCL_VERSION 300
+#define CL_HPP_TARGET_OPENCL_VERSION 110
#define CL_HPP_MINIMUM_OPENCL_VERSION 110
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Weffc++"
@@ -40,8 +40,8 @@
#pragma GCC diagnostic ignored "-Wunused-parameter"
#if defined(__GNUG__) && __GNUG__ >= 8
#pragma GCC diagnostic ignored "-Wcatch-value"
-#endif // defined(__GNUG__) && __GNUG__ >= 8
-#include <CL/cl2.hpp>
+#endif // defined(__GNUG__) && __GNUG__ >= 8
+#include <CL/opencl.hpp> // include new hpp header instead of cl2.hpp
#pragma GCC diagnostic pop
namespace cl
@@ -73,25 +73,27 @@ public:
* @return The static instance of CLSymbols.
*/
static CLSymbols &get();
- /** Load symbols from the given OpenCL library path.
+ /** This method attempts to load the OpenCL symbols from the first available library from the provided OpenCL libraries.
*
- * @param[in] library Path to the OpenCL library.
+ * @param[in] libraries_filenames Vector containing the filenames of the libraries to be loaded.
+ * @param[in] use_loader Use symbol loader function loadOpenCLPointer.
*
- * @return True if loading the library is successful.
+ * @return True if loading the library is successful. False if all the provided libraries could not be loaded.
*/
- bool load(const std::string &library);
+ bool load(const std::vector<std::string> &libraries_filenames, bool use_loader = false);
/** Load symbols from any of the default OpenCL library names.
+ * If all the default libraries could not be loaded, this method will print a warning message and return false.
*
* @return True if loading any library is successful.
*/
bool load_default();
-#define DECLARE_FUNCTION_PTR(func_name) \
- std::function<decltype(func_name)> func_name##_ptr = nullptr
+#define DECLARE_FUNCTION_PTR(func_name) std::function<decltype(func_name)> func_name##_ptr = nullptr
DECLARE_FUNCTION_PTR(clCreateContext);
DECLARE_FUNCTION_PTR(clCreateContextFromType);
DECLARE_FUNCTION_PTR(clCreateCommandQueue);
+ DECLARE_FUNCTION_PTR(clCreateCommandQueueWithProperties);
DECLARE_FUNCTION_PTR(clGetContextInfo);
DECLARE_FUNCTION_PTR(clBuildProgram);
DECLARE_FUNCTION_PTR(clEnqueueNDRangeKernel);
@@ -123,6 +125,7 @@ public:
DECLARE_FUNCTION_PTR(clGetDeviceIDs);
DECLARE_FUNCTION_PTR(clGetMemObjectInfo);
DECLARE_FUNCTION_PTR(clRetainEvent);
+ DECLARE_FUNCTION_PTR(clGetPlatformInfo);
DECLARE_FUNCTION_PTR(clGetPlatformIDs);
DECLARE_FUNCTION_PTR(clGetKernelWorkGroupInfo);
DECLARE_FUNCTION_PTR(clGetCommandQueueInfo);
@@ -135,6 +138,18 @@ public:
DECLARE_FUNCTION_PTR(clEnqueueMarker);
DECLARE_FUNCTION_PTR(clWaitForEvents);
DECLARE_FUNCTION_PTR(clCreateImage);
+ DECLARE_FUNCTION_PTR(clSetKernelExecInfo);
+ DECLARE_FUNCTION_PTR(clGetExtensionFunctionAddressForPlatform);
+
+ // Command buffer and mutable dispatch command buffer extensions
+ DECLARE_FUNCTION_PTR(clCreateCommandBufferKHR);
+ DECLARE_FUNCTION_PTR(clRetainCommandBufferKHR);
+ DECLARE_FUNCTION_PTR(clReleaseCommandBufferKHR);
+ DECLARE_FUNCTION_PTR(clFinalizeCommandBufferKHR);
+ DECLARE_FUNCTION_PTR(clEnqueueCommandBufferKHR);
+ DECLARE_FUNCTION_PTR(clCommandNDRangeKernelKHR);
+
+ DECLARE_FUNCTION_PTR(clUpdateMutableCommandsKHR);
// Third-party extensions
DECLARE_FUNCTION_PTR(clImportMemoryARM);
@@ -145,4 +160,4 @@ private:
std::pair<bool, bool> _loaded;
};
} // namespace arm_compute
-#endif /* ARM_COMPUTE_OPENCL_H */
+#endif // ACL_ARM_COMPUTE_CORE_CL_OPENCL_H
diff --git a/arm_compute/core/CL/gemm/CLGEMMHelpers.h b/arm_compute/core/CL/gemm/CLGEMMHelpers.h
deleted file mode 100644
index dcda732c2d..0000000000
--- a/arm_compute/core/CL/gemm/CLGEMMHelpers.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMHELPERS_H
-#define ARM_COMPUTE_CLGEMMHELPERS_H
-
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-namespace cl_gemm
-{
-/** Configure @ref GEMMLHSMatrixInfo and @ref GEMMRHSMatrixInfo
- *
- * @param[in] m Number of rows (M) in the LHS matrix not reshaped
- * @param[in] n Number of columns (N) in the RHS matrix not reshaped
- * @param[in] m0 Number of rows processed by each thread/work-item
- * @param[in] n0 Number of columns processed by each thread/work-item
- * @param[in] k0 Number of inner accumulation performed by each thread/work-item
- * @param[in] v0 Number of vertical blocks of size (m0xk0) stored on the same output row
- * @param[in] h0 Number of horizontal blocks of size (k0xn0) stored on the same output row
- * @param[in] lhs_interleave True if the v0 (m0xk0) blocks have to be interleaved in the output row
- * @param[in] rhs_interleave True if the h0 (k0xn0) blocks have to be interleaved in the output row
- * @param[in] lhs_transpose True if the (m0xk0) block has to be transposed before been stored
- * @param[in] rhs_transpose True if the (k0xn0) block has to be transposed before been stored
- *
- * @return @ref GEMMLHSMatrixInfo and @ref GEMMRHSMatrixInfo
- */
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_lhs_rhs_info(unsigned int m, unsigned int n, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int v0, unsigned int h0,
- bool lhs_interleave, bool rhs_interleave, bool lhs_transpose, bool rhs_transpose);
-} // namespace cl_gemm
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLGEMMHELPERS_H */
diff --git a/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfiguration.h b/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfiguration.h
deleted file mode 100644
index a6341e5094..0000000000
--- a/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfiguration.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (c) 2019-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATION_H
-#define ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATION_H
-
-#include "arm_compute/core/CL/ICLGEMMKernelConfiguration.h"
-#include "arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationBifrost.h"
-#include "arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationMidgard.h"
-#include "arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationValhall.h"
-
-#include <memory>
-
-namespace arm_compute
-{
-namespace cl_gemm
-{
-/** CLGEMMNative factory class */
-class CLGEMMNativeKernelConfigurationFactory final
-{
-public:
- /** Static method to construct CLGEMMNative kernel object accordingly with the GPU target
- *
- * @param[in] gpu GPU target
- *
- * @return CLGEMMNative kernel configuration class
- */
- static std::unique_ptr<ICLGEMMKernelConfiguration> create(GPUTarget gpu)
- {
- switch(get_arch_from_target(gpu))
- {
- case GPUTarget::MIDGARD:
- return support::cpp14::make_unique<CLGEMMNativeKernelConfigurationMidgard>(gpu);
- case GPUTarget::BIFROST:
- return support::cpp14::make_unique<CLGEMMNativeKernelConfigurationBifrost>(gpu);
- case GPUTarget::VALHALL:
- return support::cpp14::make_unique<CLGEMMNativeKernelConfigurationValhall>(gpu);
- default:
- ARM_COMPUTE_ERROR("Not supported GPU target");
- }
- }
-};
-} // namespace cl_gemm
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATION_H */
diff --git a/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationBifrost.h b/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationBifrost.h
deleted file mode 100644
index 5b2abe6f0f..0000000000
--- a/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationBifrost.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2019-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONBIFROST_H
-#define ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONBIFROST_H
-
-#include "arm_compute/core/CL/ICLGEMMKernelConfiguration.h"
-
-namespace arm_compute
-{
-namespace cl_gemm
-{
-/** Bifrost based OpenCL GEMMNative configuration */
-class CLGEMMNativeKernelConfigurationBifrost final : public ICLGEMMKernelConfiguration
-{
-public:
- /** Constructor
- *
- * @param[in] gpu GPU target
- */
- CLGEMMNativeKernelConfigurationBifrost(GPUTarget gpu);
-
- // Inherited overridden method
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
-
-private:
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G71_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G71_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_default_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_default_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-};
-} // namespace cl_gemm
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONBIFROST_H */
diff --git a/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationMidgard.h b/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationMidgard.h
deleted file mode 100644
index 0e95a15613..0000000000
--- a/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationMidgard.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONMIDGARD_H
-#define ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONMIDGARD_H
-
-#include "arm_compute/core/CL/ICLGEMMKernelConfiguration.h"
-
-namespace arm_compute
-{
-namespace cl_gemm
-{
-/** Midgard based OpenCL GEMMNative configuration */
-class CLGEMMNativeKernelConfigurationMidgard final : public ICLGEMMKernelConfiguration
-{
-public:
- /** Constructor
- *
- * @param[in] gpu GPU target
- */
- CLGEMMNativeKernelConfigurationMidgard(GPUTarget gpu);
-
- // Inherited overridden method
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
-
-private:
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> default_q8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-};
-} // namespace cl_gemm
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONMIDGARD_H */
diff --git a/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationValhall.h b/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationValhall.h
deleted file mode 100644
index e739997b3a..0000000000
--- a/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationValhall.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONVALHALL_H
-#define ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONVALHALL_H
-
-#include "arm_compute/core/CL/ICLGEMMKernelConfiguration.h"
-
-namespace arm_compute
-{
-namespace cl_gemm
-{
-/** Valhall based OpenCL GEMMNative configuration */
-class CLGEMMNativeKernelConfigurationValhall final : public ICLGEMMKernelConfiguration
-{
-public:
- /** Constructor
- *
- * @param[in] gpu GPU target
- */
- CLGEMMNativeKernelConfigurationValhall(GPUTarget gpu);
-
- // Inherited overridden method
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
-
-private:
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-};
-} // namespace cl_gemm
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONVALHALL_H */
diff --git a/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfiguration.h b/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfiguration.h
deleted file mode 100644
index 10dc9aefdb..0000000000
--- a/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfiguration.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2019-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATION_H
-#define ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATION_H
-
-#include "arm_compute/core/CL/ICLGEMMKernelConfiguration.h"
-#include "arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.h"
-#include "arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationValhall.h"
-
-#include <memory>
-
-namespace arm_compute
-{
-namespace cl_gemm
-{
-/** CLGEMMReshaped factory class */
-class CLGEMMReshapedKernelConfigurationFactory final
-{
-public:
- /** Static method to call the CLGEMMReshaped kernel configuration class accordingly with the GPU target
- *
- * @param[in] gpu GPU target
- *
- * @return CLGEMMReshaped kernel configuration class
- */
- static std::unique_ptr<ICLGEMMKernelConfiguration> create(GPUTarget gpu)
- {
- switch(get_arch_from_target(gpu))
- {
- case GPUTarget::MIDGARD:
- case GPUTarget::BIFROST:
- return support::cpp14::make_unique<CLGEMMReshapedKernelConfigurationBifrost>(gpu);
- case GPUTarget::VALHALL:
- return support::cpp14::make_unique<CLGEMMReshapedKernelConfigurationValhall>(gpu);
- default:
- ARM_COMPUTE_ERROR("Not supported GPU target");
- }
- }
-};
-} // namespace cl_gemm
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATION_H */
diff --git a/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.h b/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.h
deleted file mode 100644
index 55742e3e56..0000000000
--- a/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2019-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATIONBIFROST_H
-#define ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATIONBIFROST_H
-
-#include "arm_compute/core/CL/ICLGEMMKernelConfiguration.h"
-
-namespace arm_compute
-{
-namespace cl_gemm
-{
-/** Bifrost based OpenCL GEMMReshaped configuration */
-class CLGEMMReshapedKernelConfigurationBifrost final : public ICLGEMMKernelConfiguration
-{
-public:
- /** Constructor
- *
- * @param[in] gpu GPU target
- */
- CLGEMMReshapedKernelConfigurationBifrost(GPUTarget gpu);
-
- // Inherited overridden method
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
-
-private:
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G7x_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G7x_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G7x_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-};
-} // namespace cl_gemm
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATIONBIFROST_H */
diff --git a/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationValhall.h b/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationValhall.h
deleted file mode 100644
index e65974144d..0000000000
--- a/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationValhall.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATIONVALHALL_H
-#define ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATIONVALHALL_H
-
-#include "arm_compute/core/CL/ICLGEMMKernelConfiguration.h"
-
-namespace arm_compute
-{
-namespace cl_gemm
-{
-/** Valhall based OpenCL GEMMReshaped configuration */
-class CLGEMMReshapedKernelConfigurationValhall final : public ICLGEMMKernelConfiguration
-{
-public:
- /** Constructor
- *
- * @param[in] gpu GPU target
- */
- CLGEMMReshapedKernelConfigurationValhall(GPUTarget gpu);
-
- // Inherited overridden method
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
-
-private:
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-};
-} // namespace cl_gemm
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATIONVALHALL_H */
diff --git a/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h b/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h
deleted file mode 100644
index 7909726164..0000000000
--- a/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2019-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATION_H
-#define ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATION_H
-
-#include "arm_compute/core/CL/ICLGEMMKernelConfiguration.h"
-#include "arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.h"
-#include "arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.h"
-
-#include <memory>
-
-namespace arm_compute
-{
-namespace cl_gemm
-{
-/** CLGEMMReshapedOnlyRHS factory class */
-class CLGEMMReshapedOnlyRHSKernelConfigurationFactory final
-{
-public:
- /** Static method to call the CLGEMMReshapedOnlyRHS kernel configuration class accordingly with the GPU target
- *
- * @param[in] gpu GPU target
- *
- * @return CLGEMMReshapedOnlyRHS kernel configuration class
- */
- static std::unique_ptr<ICLGEMMKernelConfiguration> create(GPUTarget gpu)
- {
- switch(get_arch_from_target(gpu))
- {
- case GPUTarget::MIDGARD:
- case GPUTarget::BIFROST:
- return support::cpp14::make_unique<CLGEMMReshapedOnlyRHSKernelConfigurationBifrost>(gpu);
- case GPUTarget::VALHALL:
- return support::cpp14::make_unique<CLGEMMReshapedOnlyRHSKernelConfigurationValhall>(gpu);
- default:
- ARM_COMPUTE_ERROR("Not supported GPU target");
- }
- }
-};
-} // namespace cl_gemm
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATION_H */
diff --git a/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.h b/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.h
deleted file mode 100644
index 044bdc7b18..0000000000
--- a/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2019-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATIONBIFROST_H
-#define ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATIONBIFROST_H
-
-#include "arm_compute/core/CL/ICLGEMMKernelConfiguration.h"
-
-namespace arm_compute
-{
-namespace cl_gemm
-{
-/** Bifrost based OpenCL GEMMReshapedOnlyRHS configuration */
-class CLGEMMReshapedOnlyRHSKernelConfigurationBifrost final : public ICLGEMMKernelConfiguration
-{
-public:
- /** Constructor
- *
- * @param[in] gpu GPU target
- */
- CLGEMMReshapedOnlyRHSKernelConfigurationBifrost(GPUTarget gpu);
-
- // Inherited overridden method
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
-
-private:
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G7x_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G51_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G7x_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G51_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G7x_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G51_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-};
-} // namespace cl_gemm
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATIONBIFROST_H */
diff --git a/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.h b/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.h
deleted file mode 100644
index 6dba6fdb00..0000000000
--- a/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATIONVALHALL_H
-#define ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATIONVALHALL_H
-
-#include "arm_compute/core/CL/ICLGEMMKernelConfiguration.h"
-
-namespace arm_compute
-{
-namespace cl_gemm
-{
-/** Valhall based OpenCL GEMMReshapedOnlyRHS configuration */
-class CLGEMMReshapedOnlyRHSKernelConfigurationValhall final : public ICLGEMMKernelConfiguration
-{
-public:
- /** Constructor
- *
- * @param[in] gpu GPU target
- */
- CLGEMMReshapedOnlyRHSKernelConfigurationValhall(GPUTarget gpu);
-
- // Inherited overridden method
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
-
-private:
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-};
-} // namespace cl_gemm
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATIONVALHALL_H */
diff --git a/arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h b/arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h
deleted file mode 100644
index 58dea3bdae..0000000000
--- a/arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLABSOLUTEDIFFERENCEKERNEL_H
-#define ARM_COMPUTE_CLABSOLUTEDIFFERENCEKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the absolute difference kernel.
- *
- * Absolute difference is computed by:
- * @f[ output(x,y) = | input1(x,y) - input2(x,y) | @f]
- */
-class CLAbsoluteDifferenceKernel : public ICLKernel
-{
-public:
- /** Default constructor. */
- CLAbsoluteDifferenceKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLAbsoluteDifferenceKernel(const CLAbsoluteDifferenceKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLAbsoluteDifferenceKernel &operator=(const CLAbsoluteDifferenceKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLAbsoluteDifferenceKernel(CLAbsoluteDifferenceKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLAbsoluteDifferenceKernel &operator=(CLAbsoluteDifferenceKernel &&) = default;
- /** Default destructor */
- ~CLAbsoluteDifferenceKernel() = default;
-
- /** Set the inputs and output images.
- *
- * @param[in] input1 Source tensor. Data types supported: U8/S16.
- * @param[in] input2 Source tensor. Data types supported: U8/S16.
- * @param[out] output Destination tensor. Data types supported: U8/S16.
- */
- void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
- /** Set the inputs and output images.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input1 Source tensor. Data types supported: U8/S16.
- * @param[in] input2 Source tensor. Data types supported: U8/S16.
- * @param[out] output Destination tensor. Data types supported: U8/S16.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input1; /**< Source tensor 1. */
- const ICLTensor *_input2; /**< Source tensor 2. */
- ICLTensor *_output; /**< Destination tensor. */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLABSOLUTEDIFFERENCEKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLAccumulateKernel.h b/arm_compute/core/CL/kernels/CLAccumulateKernel.h
deleted file mode 100644
index f639148e25..0000000000
--- a/arm_compute/core/CL/kernels/CLAccumulateKernel.h
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLACCUMULATEKERNEL_H
-#define ARM_COMPUTE_CLACCUMULATEKERNEL_H
-
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the accumulate kernel.
- *
- * Accumulation is computed by:
- * @f[ accum(x,y) = accum(x,y) + input(x,y) @f]
- */
-class CLAccumulateKernel : public ICLSimple2DKernel
-{
-public:
- /** Set the input and accumulation tensors.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] accum Destination tensor. Data types supported: S16.
- */
- void configure(const ICLTensor *input, ICLTensor *accum);
- /** Set the input and accumulation tensors.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] accum Destination tensor. Data types supported: S16.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *accum);
-};
-
-/** Interface for the accumulate weighted kernel.
- *
- * Weighted accumulation is computed:
- * @f[ accum(x,y) = (1 - \alpha)*accum(x,y) + \alpha*input(x,y) @f]
- *
- * Where @f$ 0 \le \alpha \le 1 @f$
- * Conceptually, the rounding for this is defined as:
- * @f[ output(x,y)= uint8( (1 - \alpha) * float32( int32( output(x,y) ) ) + \alpha * float32( int32( input(x,y) ) ) ) @f]
-*/
-class CLAccumulateWeightedKernel : public ICLSimple2DKernel
-{
-public:
- /** Set the input and accumulation images, and the scale value.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[in] alpha Scalar value in the range [0, 1.0]. Data types supported: F32.
- * @param[in,out] accum Accumulated tensor. Data types supported: U8.
- */
- void configure(const ICLTensor *input, float alpha, ICLTensor *accum);
- /** Set the input and accumulation images, and the scale value.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[in] alpha Scalar value in the range [0, 1.0]. Data types supported: F32.
- * @param[in,out] accum Accumulated tensor. Data types supported: U8.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, float alpha, ICLTensor *accum);
-};
-
-/** Interface for the accumulate squared kernel.
- *
- * The accumulation of squares is computed:
- * @f[ accum(x,y) = saturate_{int16} ( (uint16) accum(x,y) + (((uint16)(input(x,y)^2)) >> (shift)) ) @f]
- *
- * Where @f$ 0 \le shift \le 15 @f$
-*/
-class CLAccumulateSquaredKernel : public ICLSimple2DKernel
-{
-public:
- /** Set the input and accumulation tensors and the shift value.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[in] shift Shift value in the range of [0, 15]. Data types supported: U32.
- * @param[in,out] accum Accumulated tensor. Data types supported: S16.
- */
- void configure(const ICLTensor *input, uint32_t shift, ICLTensor *accum);
- /** Set the input and accumulation tensors and the shift value.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[in] shift Shift value in the range of [0, 15]. Data types supported: U32.
- * @param[in,out] accum Accumulated tensor. Data types supported: S16.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, uint32_t shift, ICLTensor *accum);
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLACCUMULATEKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLActivationLayerKernel.h b/arm_compute/core/CL/kernels/CLActivationLayerKernel.h
deleted file mode 100644
index 1e83a689cd..0000000000
--- a/arm_compute/core/CL/kernels/CLActivationLayerKernel.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLACTIVATIONLAYERKERNEL_H
-#define ARM_COMPUTE_CLACTIVATIONLAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-/** Interface for the activation layer kernel. */
-class CLActivationLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLActivationLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLActivationLayerKernel(const CLActivationLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLActivationLayerKernel &operator=(const CLActivationLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLActivationLayerKernel(CLActivationLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLActivationLayerKernel &operator=(CLActivationLayerKernel &&) = default;
- /** Default destructor */
- ~CLActivationLayerKernel() = default;
- /** Set the input and output tensor.
- *
- * @note If the output tensor is a nullptr, the activation function will be performed in-place
- *
- * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
- * of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32.
- * @param[out] output Destination tensor. Data type supported: same as @p input
- * @param[in] act_info Activation layer information.
- */
- void configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info);
- /** Set the input and output tensor.
- *
- * @note If the output tensor is a nullptr, the activation function will be performed in-place
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
- * of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32.
- * @param[out] output Destination tensor. Data type supported: same as @p input
- * @param[in] act_info Activation layer information.
- */
- void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info);
- /** Static function to check if given info will lead to a valid configuration of @ref CLActivationLayerKernel
- *
- * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result
- * of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32.
- * @param[in] output Destination tensor info. Data type supported: same as @p input
- * @param[in] act_info Activation layer information.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- ICLTensor *_input;
- ICLTensor *_output;
- bool _run_in_place;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLACTIVATIONLAYERKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLArgMinMaxLayerKernel.h b/arm_compute/core/CL/kernels/CLArgMinMaxLayerKernel.h
deleted file mode 100644
index 94e8baed13..0000000000
--- a/arm_compute/core/CL/kernels/CLArgMinMaxLayerKernel.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (c) 2019-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLARGMINMAXLAYERKERNEL_H
-#define ARM_COMPUTE_CLARGMINMAXLAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the reduction operation kernel
- *
- * @note The default data type for an uninitialized output tensor is
- * signed 32-bit integer (S32). It is the user's responsibility to check
- * that the results do not overflow because the indices are computed
- * in unsigned 32-bit (U32).
- */
-class CLArgMinMaxLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLArgMinMaxLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLArgMinMaxLayerKernel(const CLArgMinMaxLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLArgMinMaxLayerKernel &operator=(const CLArgMinMaxLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLArgMinMaxLayerKernel(CLArgMinMaxLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLArgMinMaxLayerKernel &operator=(CLArgMinMaxLayerKernel &&) = default;
- /** Default destructor */
- ~CLArgMinMaxLayerKernel() = default;
-
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. Data types supported: S32/F16/F32.
- * @param[in] prev_output Destination tensor of the previous iterations of @ref CLArgMinMaxLayerKernel. Data types supported: U32/S32
- * Has to be nullptr for the first iteration
- * @param[out] output Destination tensor. Data types supported: U32/S32
- * Output will have the same number of dimensions as input.
- * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3
- * @param[in] op Reduction operation to perform. Only ArgMin and ArgMax are supported.
- */
- void configure(const ICLTensor *input, const ICLTensor *prev_output, ICLTensor *output, unsigned int axis, ReductionOperation op);
- /** Set the input and output tensors.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: S32/F16/F32.
- * @param[in] prev_output Destination tensor of the previous iterations of @ref CLArgMinMaxLayerKernel. Data types supported: U32/S32
- * Has to be nullptr for the first iteration
- * @param[out] output Destination tensor. Data types supported: U32/S32
- * Output will have the same number of dimensions as input.
- * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3
- * @param[in] op Reduction operation to perform. Only ArgMin and ArgMax are supported.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *prev_output, ICLTensor *output, unsigned int axis, ReductionOperation op);
-
- /** Static function to check if given info will lead to a valid configuration of @ref CLArgMinMaxLayerKernel.
- *
- * @param[in] input Source tensor info. Data types supported: S32/F16/F32.
- * @param[in] prev_output Destination tensor info of the previous iterations. Data types supported: U32/S32
- * Has to be nullptr for the first iteration
- * @param[in] output Destination tensor info. Data types supported: U32/S32
- * Output will have the same number of dimensions as input.
- * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3
- * @param[in] op Reduction operation to perform. Only ArgMin and ArgMax are supported.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *prev_output, const ITensorInfo *output, unsigned int axis, ReductionOperation op);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- const ICLTensor *_prev_output;
- ICLTensor *_output;
- unsigned int _reduction_axis;
- ReductionOperation _op;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLARGMINMAXLAYERKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h b/arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h
deleted file mode 100644
index 163666853c..0000000000
--- a/arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (c) 2019-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_CLBATCHCONCATENATEKERNEL_H
-#define ARM_COMPUTE_CLBATCHCONCATENATEKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the batch concatenate kernel.
- * The input tensor will be concatenated into the output tensor.
- */
-class CLBatchConcatenateLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLBatchConcatenateLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLBatchConcatenateLayerKernel(const CLBatchConcatenateLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLBatchConcatenateLayerKernel &operator=(const CLBatchConcatenateLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLBatchConcatenateLayerKernel(CLBatchConcatenateLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLBatchConcatenateLayerKernel &operator=(CLBatchConcatenateLayerKernel &&) = default;
- /** Default destructor */
- ~CLBatchConcatenateLayerKernel() = default;
- /** Initialise the kernel's inputs and output
- *
- * @param[in] input Input tensor. Data types supported: All.
- * @param[in] batch_offset The offset on axis # 3.
- * @param[in,out] output Output tensor. Data types supported: Same as @p input.
- *
- * @note: The output tensor's low two dimensions can't be smaller than the input one's.
- * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2.
- *
- */
- void configure(const ICLTensor *input, unsigned int batch_offset, ICLTensor *output);
- /** Initialise the kernel's inputs and output
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor. Data types supported: All.
- * @param[in] batch_offset The offset on axis # 3.
- * @param[in,out] output Output tensor. Data types supported: Same as @p input.
- *
- * @note: The output tensor's low two dimensions can't be smaller than the input one's.
- * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2.
- *
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, unsigned int batch_offset, ICLTensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLBatchConcatenateLayerKernel
- *
- * @param[in] input Input tensor info. Data types supported: All.
- * @param[in] batch_offset The offset on axis # 3.
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, unsigned int batch_offset, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
- unsigned int _batch_offset;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLBATCHCONCATENATEKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h b/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h
deleted file mode 100644
index 8eaaca845a..0000000000
--- a/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLBATCHNORMALIZATIONLAYERKERNEL_H
-#define ARM_COMPUTE_CLBATCHNORMALIZATIONLAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the BatchNormalization layer kernel.
- */
-class CLBatchNormalizationLayerKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLBatchNormalizationLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLBatchNormalizationLayerKernel(const CLBatchNormalizationLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLBatchNormalizationLayerKernel &operator=(const CLBatchNormalizationLayerKernel &) = delete;
- /** Default Move Constructor. */
- CLBatchNormalizationLayerKernel(CLBatchNormalizationLayerKernel &&) = default;
- /** Default move assignment operator */
- CLBatchNormalizationLayerKernel &operator=(CLBatchNormalizationLayerKernel &&) = default;
- /** Default destructor */
- ~CLBatchNormalizationLayerKernel() = default;
-
- /** Set the input and output tensors.
- *
- * @note If the output tensor is a nullptr, the batch normalization function will be performed in-place
- *
- * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result.
- * 3 lower dimensions represent a single input with dimensions [width, height, FM].
- * The rest are optional and used for representing batches. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC.
- * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input
- * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
- * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
- * @param[in] beta (Optional) Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for beta is 0. Data types supported: Same as @p input
- * @param[in] gamma (Optional) Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for gamma is 1. Data types supported: Same as @p input
- * @param[in] epsilon (Optional) Small value to avoid division with zero. Default value is 0.001f.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
- */
- void configure(ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta = nullptr, const ICLTensor *gamma = nullptr, float epsilon = 0.001f,
- ActivationLayerInfo act_info = ActivationLayerInfo());
- /** Set the input and output tensors.
- *
- * @note If the output tensor is a nullptr, the batch normalization function will be performed in-place
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result.
- * 3 lower dimensions represent a single input with dimensions [width, height, FM].
- * The rest are optional and used for representing batches. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC.
- * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input
- * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
- * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
- * @param[in] beta (Optional) Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for beta is 0. Data types supported: Same as @p input
- * @param[in] gamma (Optional) Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for gamma is 1. Data types supported: Same as @p input
- * @param[in] epsilon (Optional) Small value to avoid division with zero. Default value is 0.001f.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
- */
- void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta = nullptr,
- const ICLTensor *gamma = nullptr, float epsilon = 0.001f, ActivationLayerInfo act_info = ActivationLayerInfo());
- /** Static function to check if given info will lead to a valid configuration of @ref CLBatchNormalizationLayerKernel
- *
- * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result.
- * 3 lower dimensions represent a single input with dimensions [width, height, FM].
- * The rest are optional and used for representing batches. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC.
- * @param[in] output Destination tensor info. Output will have the same number of dimensions as input. Data type supported: same as @p input
- * @param[in] mean Mean values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
- * @param[in] var Variance values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
- * @param[in] beta (Optional) Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for beta is 0. Data types supported: Same as @p input
- * @param[in] gamma (Optional) Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for gamma is 1. Data types supported: Same as @p input
- * @param[in] epsilon (Optional) Small value to avoid division with zero. Default value is 0.001f.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output,
- const ITensorInfo *mean, const ITensorInfo *var,
- const ITensorInfo *beta = nullptr, const ITensorInfo *gamma = nullptr,
- float epsilon = 0.001f, ActivationLayerInfo act_info = ActivationLayerInfo());
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- ICLTensor *_input;
- ICLTensor *_output;
- const ICLTensor *_mean;
- const ICLTensor *_var;
- const ICLTensor *_beta;
- const ICLTensor *_gamma;
- float _epsilon;
- bool _run_in_place;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLBATCHNORMALIZATIONLAYERKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLBatchToSpaceLayerKernel.h b/arm_compute/core/CL/kernels/CLBatchToSpaceLayerKernel.h
deleted file mode 100644
index 2b12ad094a..0000000000
--- a/arm_compute/core/CL/kernels/CLBatchToSpaceLayerKernel.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLBATCHTOSPACELAYERKERNEL_H
-#define ARM_COMPUTE_CLBATCHTOSPACELAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the batch to space kernel */
-class CLBatchToSpaceLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLBatchToSpaceLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLBatchToSpaceLayerKernel(const CLBatchToSpaceLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLBatchToSpaceLayerKernel &operator=(const CLBatchToSpaceLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLBatchToSpaceLayerKernel(CLBatchToSpaceLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLBatchToSpaceLayerKernel &operator=(CLBatchToSpaceLayerKernel &&) = default;
- /** Default destructor */
- ~CLBatchToSpaceLayerKernel() = default;
- /** Initialise the kernel's inputs and output.
- *
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
- * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32
- * @param[out] output Tensor output. Data types supported: same as @p input
- */
- void configure(const ICLTensor *input, const ICLTensor *block_shape, ICLTensor *output);
- /** Initialise the kernel's inputs and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
- * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32
- * @param[out] output Tensor output. Data types supported: same as @p input
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *block_shape, ICLTensor *output);
- /** Initialise the kernel's inputs and output (Static block shape).
- *
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
- * @param[in] block_shape_x Block shape x value.
- * @param[in] block_shape_y Block shape y value.
- * @param[out] output Tensor output. Data types supported: same as @p input
- */
- void configure(const ICLTensor *input, const int32_t block_shape_x, const int32_t block_shape_y, ICLTensor *output);
- /** Initialise the kernel's inputs and output (Static block shape).
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
- * @param[in] block_shape_x Block shape x value.
- * @param[in] block_shape_y Block shape y value.
- * @param[out] output Tensor output. Data types supported: same as @p input
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const int32_t block_shape_x, const int32_t block_shape_y, ICLTensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLBatchToSpaceLayerKernel
- *
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
- * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32
- * @param[in] output Tensor output. Data types supported: same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLBatchToSpaceLayerKernel (Static block shape).
- *
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
- * @param[in] block_shape_x Block shape x value.
- * @param[in] block_shape_y Block shape y value.
- * @param[in] output Tensor output. Data types supported: same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const int32_t block_shape_x, const int32_t block_shape_y, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input; /**< Source tensor */
- const ICLTensor *_block_shape; /**< Block shape tensor */
- ICLTensor *_output; /**< Destination tensor */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLBATCHTOSPACELAYERKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLBitwiseAndKernel.h b/arm_compute/core/CL/kernels/CLBitwiseAndKernel.h
deleted file mode 100644
index 8defe32862..0000000000
--- a/arm_compute/core/CL/kernels/CLBitwiseAndKernel.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLBITWISEANDKERNEL_H
-#define ARM_COMPUTE_CLBITWISEANDKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the bitwise AND operation kernel.
- *
- * Result is computed by:
- * @f[ output(x,y) = input1(x,y) \land input2(x,y) @f]
- */
-class CLBitwiseAndKernel : public ICLKernel
-{
-public:
- /** Default constructor. */
- CLBitwiseAndKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLBitwiseAndKernel(const CLBitwiseAndKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLBitwiseAndKernel &operator=(const CLBitwiseAndKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLBitwiseAndKernel(CLBitwiseAndKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLBitwiseAndKernel &operator=(CLBitwiseAndKernel &&) = default;
- /** Set the inputs and output images
- *
- * @param[in] input1 Source tensor. Data types supported: U8.
- * @param[in] input2 Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor. Data types supported: U8.
- */
- void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
- /** Set the inputs and output images
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input1 Source tensor. Data types supported: U8.
- * @param[in] input2 Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor. Data types supported: U8.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input1; /**< Source tensor 1 */
- const ICLTensor *_input2; /**< Source tensor 2 */
- ICLTensor *_output; /**< Destination tensor */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLBITWISEANDKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLBitwiseNotKernel.h b/arm_compute/core/CL/kernels/CLBitwiseNotKernel.h
deleted file mode 100644
index b86ce7f173..0000000000
--- a/arm_compute/core/CL/kernels/CLBitwiseNotKernel.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLBITWISENOTKERNEL_H
-#define ARM_COMPUTE_CLBITWISENOTKERNEL_H
-
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the bitwise NOT operation kernel.
- *
- * Result is computed by:
- * @f[ output(x,y) = \lnot input(x,y) @f]
- */
-class CLBitwiseNotKernel : public ICLSimple2DKernel
-{
-public:
- /** Set the inputs and output images.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor. Data types supported: U8.
- */
- void configure(const ICLTensor *input, ICLTensor *output);
- /** Set the inputs and output images.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor. Data types supported: U8.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLBITWISENOTKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLBitwiseOrKernel.h b/arm_compute/core/CL/kernels/CLBitwiseOrKernel.h
deleted file mode 100644
index 65eb50f0fd..0000000000
--- a/arm_compute/core/CL/kernels/CLBitwiseOrKernel.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLBITWISEORKERNEL_H
-#define ARM_COMPUTE_CLBITWISEORKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the bitwise OR operation kernel.
- *
- * Result is computed by:
- * @f[ output(x,y) = input1(x,y) \lor input2(x,y) @f]
- */
-class CLBitwiseOrKernel : public ICLKernel
-{
-public:
- /** Default constructor. */
- CLBitwiseOrKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLBitwiseOrKernel(const CLBitwiseOrKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLBitwiseOrKernel &operator=(const CLBitwiseOrKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLBitwiseOrKernel(CLBitwiseOrKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLBitwiseOrKernel &operator=(CLBitwiseOrKernel &&) = default;
- /** Set the inputs and output images
- *
- * @param[in] input1 Source tensor. Data types supported: U8.
- * @param[in] input2 Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor. Data types supported: U8.
- */
- void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
- /** Set the inputs and output images
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input1 Source tensor. Data types supported: U8.
- * @param[in] input2 Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor. Data types supported: U8.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input1; /**< Source tensor 1 */
- const ICLTensor *_input2; /**< Source tensor 2 */
- ICLTensor *_output; /**< Destination tensor */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLBITWISEORKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLBitwiseXorKernel.h b/arm_compute/core/CL/kernels/CLBitwiseXorKernel.h
deleted file mode 100644
index 5c63a7f22c..0000000000
--- a/arm_compute/core/CL/kernels/CLBitwiseXorKernel.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLBITWISEXORKERNEL_H
-#define ARM_COMPUTE_CLBITWISEXORKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the bitwise XOR operation kernel.
- *
- * Result is computed by:
- * @f[ output(x,y) = input1(x,y) \oplus input2(x,y) @f]
- */
-class CLBitwiseXorKernel : public ICLKernel
-{
-public:
- /** Default constructor. */
- CLBitwiseXorKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLBitwiseXorKernel(const CLBitwiseXorKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLBitwiseXorKernel &operator=(const CLBitwiseXorKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLBitwiseXorKernel(CLBitwiseXorKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLBitwiseXorKernel &operator=(CLBitwiseXorKernel &&) = default;
- /** Set the inputs and output images
- *
- * @param[in] input1 Source tensor. Data types supported: U8.
- * @param[in] input2 Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor. Data types supported: U8.
- */
- void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
- /** Set the inputs and output images
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input1 Source tensor. Data types supported: U8.
- * @param[in] input2 Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor. Data types supported: U8.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input1; /**< Source tensor 1 */
- const ICLTensor *_input2; /**< Source tensor 2 */
- ICLTensor *_output; /**< Destination tensor */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLBITWISEXORKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h b/arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h
deleted file mode 100644
index bbe11562ed..0000000000
--- a/arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLBOUNDINGBOXTRANSFORMKERNEL_H
-#define ARM_COMPUTE_CLBOUNDINGBOXTRANSFORMKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the bounding box kernel */
-class CLBoundingBoxTransformKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLBoundingBoxTransformKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLBoundingBoxTransformKernel(const CLBoundingBoxTransformKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLBoundingBoxTransformKernel &operator=(const CLBoundingBoxTransformKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLBoundingBoxTransformKernel(CLBoundingBoxTransformKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLBoundingBoxTransformKernel &operator=(CLBoundingBoxTransformKernel &&) = default;
- /** Default destructor */
- ~CLBoundingBoxTransformKernel() = default;
-
- /** Set the input and output tensors.
- *
- * @param[in] boxes Source tensor. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32.
- * @param[out] pred_boxes Destination tensor. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input
- * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes.
- * Data types supported: QASYMM8 if @p input is QASYMM16, otherwise same as @p input
- * @param[in] info Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo.
- *
- * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct.
- *
- */
- void configure(const ICLTensor *boxes, ICLTensor *pred_boxes, const ICLTensor *deltas, const BoundingBoxTransformInfo &info);
- /** Set the input and output tensors.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] boxes Source tensor. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32.
- * @param[out] pred_boxes Destination tensor. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input
- * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes.
- * Data types supported: QASYMM8 if @p input is QASYMM16, otherwise same as @p input
- * @param[in] info Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo.
- *
- * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct.
- *
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *boxes, ICLTensor *pred_boxes, const ICLTensor *deltas, const BoundingBoxTransformInfo &info);
-
- /** Static function to check if given info will lead to a valid configuration of @ref CLBoundingBoxTransform
- *
- * @param[in] boxes Source tensor info. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32.
- * @param[in] pred_boxes Destination tensor info. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input
- * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes.
- * Data types supported: QASYMM8 if @p input is QASYMM16, otherwise same as @p input
- * @param[in] info Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo.
- *
- * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct.
- *
- * @return a Status
- */
- static Status validate(const ITensorInfo *boxes, const ITensorInfo *pred_boxes, const ITensorInfo *deltas, const BoundingBoxTransformInfo &info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_boxes;
- ICLTensor *_pred_boxes;
- const ICLTensor *_deltas;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLBOUNDINGBOXTRANSFORMKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLBox3x3Kernel.h b/arm_compute/core/CL/kernels/CLBox3x3Kernel.h
deleted file mode 100644
index ea3c1c1f3e..0000000000
--- a/arm_compute/core/CL/kernels/CLBox3x3Kernel.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLBOX3X3KERNEL_H
-#define ARM_COMPUTE_CLBOX3X3KERNEL_H
-
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the box 3x3 filter kernel.
- *
- */
-class CLBox3x3Kernel : public ICLSimple2DKernel
-{
-public:
- /**Initialise the kernel's input and output.
- *
- * @param[in] input An input tensor. Data types supported: U8
- * @param[out] output The output tensor. Data types supported: U8.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
- /**Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input An input tensor. Data types supported: U8
- * @param[out] output The output tensor. Data types supported: U8.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined);
-
- //Inherited methods overriden:
- BorderSize border_size() const override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLBOX3X3KERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLCannyEdgeKernel.h b/arm_compute/core/CL/kernels/CLCannyEdgeKernel.h
deleted file mode 100644
index 40ad4dcd84..0000000000
--- a/arm_compute/core/CL/kernels/CLCannyEdgeKernel.h
+++ /dev/null
@@ -1,187 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLCANNYEDGEKERNEL_H
-#define ARM_COMPUTE_CLCANNYEDGEKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to perform Gradient computation.
- */
-class CLGradientKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLGradientKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGradientKernel(const CLGradientKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGradientKernel &operator=(const CLGradientKernel &) = delete;
- /** Initialise the kernel's sources, destinations and border mode.
- *
- * @note gx, gy and mag must all be the same size (either 16 or 32).
- *
- * @param[in] gx Source tensor - Gx component. Data types supported: S16/S32.
- * @param[in] gy Source tensor - Gy component. Data types supported: Same as gx.
- * @param[out] magnitude Destination tensor - Magnitude. Data types supported: U16/U32. Must match the pixel size of gx, gy.
- * @param[out] phase Destination tensor - Quantized phase. Data types supported: U8.
- * @param[in] norm_type Normalization type. if 1, L1-Norm otherwise L2-Norm.
- */
- void configure(const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, int32_t norm_type);
- /** Initialise the kernel's sources, destinations and border mode.
- *
- * @note gx, gy and mag must all be the same size (either 16 or 32).
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] gx Source tensor - Gx component. Data types supported: S16/S32.
- * @param[in] gy Source tensor - Gy component. Data types supported: Same as gx.
- * @param[out] magnitude Destination tensor - Magnitude. Data types supported: U16/U32. Must match the pixel size of gx, gy.
- * @param[out] phase Destination tensor - Quantized phase. Data types supported: U8.
- * @param[in] norm_type Normalization type. if 1, L1-Norm otherwise L2-Norm.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, int32_t norm_type);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_gx; /**< Source tensor - Gx component */
- const ICLTensor *_gy; /**< Source tensor - Gy component */
- ICLTensor *_magnitude; /**< Destination tensor - Magnitude */
- ICLTensor *_phase; /**< Destination tensor - Quantized phase */
-};
-
-/** OpenCL kernel to perform Non-Maxima suppression for Canny Edge.
- *
- * @note This kernel is meant to be used alongside CannyEdge and performs a non-maxima suppression using magnitude and phase of input
- * to characterize points as possible edges. The output buffer needs to be cleared before this kernel is executed.
- *
- * @note Hysteresis is computed in @ref CLEdgeTraceKernel
- */
-class CLEdgeNonMaxSuppressionKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLEdgeNonMaxSuppressionKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLEdgeNonMaxSuppressionKernel(const CLEdgeNonMaxSuppressionKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLEdgeNonMaxSuppressionKernel &operator=(const CLEdgeNonMaxSuppressionKernel &) = delete;
- /** Initialise the kernel's sources, destination and border mode.
- *
- * @param[in] magnitude Source tensor - Magnitude. Data types supported: U16/U32.
- * @param[in] phase Source tensor - Quantized phase. Data types supported: U8.
- * @param[out] output Destination tensor. Data types supported: U16/U32.
- * @param[in] lower_thr Lower threshold.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *magnitude, const ICLTensor *phase, ICLTensor *output, int32_t lower_thr, bool border_undefined);
- /** Initialise the kernel's sources, destination and border mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] magnitude Source tensor - Magnitude. Data types supported: U16/U32.
- * @param[in] phase Source tensor - Quantized phase. Data types supported: U8.
- * @param[out] output Destination tensor. Data types supported: U16/U32.
- * @param[in] lower_thr Lower threshold.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *magnitude, const ICLTensor *phase, ICLTensor *output, int32_t lower_thr, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- const ICLTensor *_magnitude; /**< Source tensor - Magnitude. */
- const ICLTensor *_phase; /**< Source tensor - Quantized phase. */
- ICLTensor *_output; /**< Destination tensor. */
-};
-
-/** OpenCL kernel to perform Edge tracing.
- */
-class CLEdgeTraceKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLEdgeTraceKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLEdgeTraceKernel(const CLEdgeTraceKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLEdgeTraceKernel &operator=(const CLEdgeTraceKernel &) = delete;
- /** Initialise the kernel's source, destination and border mode.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor. Data types supported: U8.
- * @param[in] upper_thr Upper threshold used for the hysteresis
- * @param[in] lower_thr Lower threshold used for the hysteresis
- * @param[in,out] visited Tensor for keeping the visited pixels. Data types supported: U32.
- * Expected to be initialized to 0 before each run.
- * @param[in,out] recorded Tensor for keeping the recorded pixels. Data types supported: U32
- * Expected to be initialized to 0 before each run.
- * @param[in,out] l1_stack Tensor with the L1 stack for each pixel. Data types supported: S32.
- * Expected to be initialized to 0 before each run.
- * @param[in,out] l1_stack_counter Tensor for counting the elements in the L1 stack of each pixel. Data types supported: U8.
- * Expected to be initialized to 0 before each run.
- */
- void configure(const ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr,
- ICLTensor *visited, ICLTensor *recorded, ICLTensor *l1_stack, ICLTensor *l1_stack_counter);
- /** Initialise the kernel's source, destination and border mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor. Data types supported: U8.
- * @param[in] upper_thr Upper threshold used for the hysteresis
- * @param[in] lower_thr Lower threshold used for the hysteresis
- * @param[in,out] visited Tensor for keeping the visited pixels. Data types supported: U32.
- * Expected to be initialized to 0 before each run.
- * @param[in,out] recorded Tensor for keeping the recorded pixels. Data types supported: U32
- * Expected to be initialized to 0 before each run.
- * @param[in,out] l1_stack Tensor with the L1 stack for each pixel. Data types supported: S32.
- * Expected to be initialized to 0 before each run.
- * @param[in,out] l1_stack_counter Tensor for counting the elements in the L1 stack of each pixel. Data types supported: U8.
- * Expected to be initialized to 0 before each run.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr,
- ICLTensor *visited, ICLTensor *recorded, ICLTensor *l1_stack, ICLTensor *l1_stack_counter);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input; /**< Source tensor. */
- ICLTensor *_output; /**< Destination tensor. */
- int32_t _lower_thr; /**< Lower threshold used for the hysteresis. */
- int32_t _upper_thr; /**< Upper threshold used for the hysteresis. */
- ICLTensor *_visited; /**< Marks visited elements */
- ICLTensor *_recorded; /**< Marks recorded elements */
- ICLTensor *_l1_stack; /**< L1 hysteris stack */
- ICLTensor *_l1_stack_counter; /**< L1 hysteris stack counter */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLCANNYEDGEKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLChannelCombineKernel.h b/arm_compute/core/CL/kernels/CLChannelCombineKernel.h
deleted file mode 100644
index 32ddf152c3..0000000000
--- a/arm_compute/core/CL/kernels/CLChannelCombineKernel.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLCHANNELCOMBINEKERNEL_H
-#define ARM_COMPUTE_CLCHANNELCOMBINEKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-#include <array>
-#include <cstdint>
-
-namespace arm_compute
-{
-class ICLMultiImage;
-class ICLTensor;
-using ICLImage = ICLTensor;
-
-/** Interface for the channel combine kernel */
-class CLChannelCombineKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLChannelCombineKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLChannelCombineKernel(const CLChannelCombineKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLChannelCombineKernel &operator=(const CLChannelCombineKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLChannelCombineKernel(CLChannelCombineKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLChannelCombineKernel &operator=(CLChannelCombineKernel &&) = default;
- /** Default destructor */
- ~CLChannelCombineKernel() = default;
- /** Configure function's inputs and outputs.
- *
- * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format.
- * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format.
- * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format.
- * @param[in] plane3 The 2D plane that forms channel 3. Must be of U8 format.
- * @param[out] output The single planar output tensor.
- */
- void configure(const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output);
- /** Configure function's inputs and outputs.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format.
- * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format.
- * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format.
- * @param[in] plane3 The 2D plane that forms channel 3. Must be of U8 format.
- * @param[out] output The single planar output tensor.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output);
- /** Configure function's inputs and outputs.
- *
- * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format.
- * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format.
- * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format.
- * @param[out] output The multi planar output tensor.
- */
- void configure(const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output);
- /** Configure function's inputs and outputs.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format.
- * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format.
- * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format.
- * @param[out] output The multi planar output tensor.
- */
- void configure(const CLCompileContext &compile_context, const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- std::array<const ICLTensor *, 4> _planes;
- ICLTensor *_output;
- ICLMultiImage *_output_multi;
- std::array<uint32_t, 3> _x_subsampling;
- std::array<uint32_t, 3> _y_subsampling;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLCHANNELCOMBINEKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLChannelExtractKernel.h b/arm_compute/core/CL/kernels/CLChannelExtractKernel.h
deleted file mode 100644
index 6a0c4bb94e..0000000000
--- a/arm_compute/core/CL/kernels/CLChannelExtractKernel.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLCHANNELEXTRACTKERNEL_H
-#define ARM_COMPUTE_CLCHANNELEXTRACTKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ICLMultiImage;
-class ICLTensor;
-using ICLImage = ICLTensor;
-
-/** Interface for the channel extract kernel */
-class CLChannelExtractKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLChannelExtractKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLChannelExtractKernel(const CLChannelExtractKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLChannelExtractKernel &operator=(const CLChannelExtractKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLChannelExtractKernel(CLChannelExtractKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLChannelExtractKernel &operator=(CLChannelExtractKernel &&) = default;
- /** Default destructor */
- ~CLChannelExtractKernel() = default;
- /** Set the input and output of the kernel
- *
- * @param[in] input Source tensor. Formats supported: RGB888/RGBA8888/YUYV422/UYVY422
- * @param[in] channel Channel to extract.
- * @param[out] output Destination tensor. Must be of U8 format.
- */
- void configure(const ICLTensor *input, Channel channel, ICLTensor *output);
- /** Set the input and output of the kernel
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Formats supported: RGB888/RGBA8888/YUYV422/UYVY422
- * @param[in] channel Channel to extract.
- * @param[out] output Destination tensor. Must be of U8 format.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, Channel channel, ICLTensor *output);
- /** Set the input and output of the kernel
- *
- * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV/YUV444
- * @param[in] channel Channel to extract.
- * @param[out] output Single-planar 2D destination image. Must be of U8 format.
- */
- void configure(const ICLMultiImage *input, Channel channel, ICLImage *output);
- /** Set the input and output of the kernel
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV/YUV444
- * @param[in] channel Channel to extract.
- * @param[out] output Single-planar 2D destination image. Must be of U8 format.
- */
- void configure(const CLCompileContext &compile_context, const ICLMultiImage *input, Channel channel, ICLImage *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
- uint32_t _num_elems_processed_per_iteration;
- uint32_t _subsampling;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLCHANNELEXTRACTKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLChannelShuffleLayerKernel.h b/arm_compute/core/CL/kernels/CLChannelShuffleLayerKernel.h
deleted file mode 100644
index 14b59d325f..0000000000
--- a/arm_compute/core/CL/kernels/CLChannelShuffleLayerKernel.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLCHANNELSHUFFLELAYERKERNEL_H
-#define ARM_COMPUTE_CLCHANNELSHUFFLELAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the channel shuffle kernel */
-class CLChannelShuffleLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLChannelShuffleLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLChannelShuffleLayerKernel(const CLChannelShuffleLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLChannelShuffleLayerKernel &operator=(const CLChannelShuffleLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLChannelShuffleLayerKernel(CLChannelShuffleLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLChannelShuffleLayerKernel &operator=(CLChannelShuffleLayerKernel &&) = default;
- /** Default destructor */
- ~CLChannelShuffleLayerKernel() = default;
- /** Configure function's inputs and outputs.
- *
- * @param[in] input Input tensor. Data types supported: All.
- * @param[out] output Output tensor. Data type supported: Same as @p input
- * @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups.
- */
- void configure(const ICLTensor *input, ICLTensor *output, unsigned int num_groups);
- /** Configure function's inputs and outputs.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor. Data types supported: All.
- * @param[out] output Output tensor. Data type supported: Same as @p input
- * @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, unsigned int num_groups);
- /** Static function to check if given info will lead to a valid configuration of @ref CLChannelShuffleLayerKernel
- *
- * @param[in] input Input tensor info. Data types supported: All.
- * @param[in] output Output tensor info. Data type supported: Same as @p input
- * @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int num_groups);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLCHANNELSHUFFLELAYERKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLCol2ImKernel.h b/arm_compute/core/CL/kernels/CLCol2ImKernel.h
deleted file mode 100644
index d0528ed21a..0000000000
--- a/arm_compute/core/CL/kernels/CLCol2ImKernel.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLCOL2IMKERNEL_H
-#define ARM_COMPUTE_CLCOL2IMKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the col2im reshaping kernel.
- *
- * Rearranges each matrix column into image blocks. It's the inverse operation of @ref CLIm2ColKernel.
- *
- * For example, a vector of 9 elements can be reshaped to a block(image) of 3x3:
- *
- * @f[
- * \left( \begin{array}{ccccccccc}
- * a0 & a1 & a2 & a3 & a4 & a5 & a6 & a7 & a8 \\
- * \end{array} \right)
- * \rightarrow
- * \left( \begin{array}{ccc}
- * a0 & a1 & a2 \\
- * a3 & a4 & a5 \\
- * a6 & a7 & a8 \\
- * \end{array} \right)
- * @f]
- */
-class CLCol2ImKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLCol2ImKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLCol2ImKernel(const CLCol2ImKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLCol2ImKernel &operator=(const CLCol2ImKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLCol2ImKernel(CLCol2ImKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLCol2ImKernel &operator=(CLCol2ImKernel &&) = default;
- /** Default destructor */
- ~CLCol2ImKernel() = default;
- /** Set the input and output of the kernel.
- *
- * @param[in] input The input tensor to convert. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
- * @param[out] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM],
- * while the rest represent batch of outputs. Data types supported: Same as @p input. Data layout: NCHW
- * @param[in] convolved_dims Output convolved dimensions.
- * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution
- */
- void configure(const ICLTensor *input, ICLTensor *output, const Size2D &convolved_dims, unsigned int num_groups = 1);
- /** Set the input and output of the kernel.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input The input tensor to convert. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
- * @param[out] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM],
- * while the rest represent batch of outputs. Data types supported: Same as @p input. Data layout: NCHW
- * @param[in] convolved_dims Output convolved dimensions.
- * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Size2D &convolved_dims, unsigned int num_groups = 1);
- /** Static function to check if given info will lead to a valid configuration of @ref CLCol2ImKernel
- *
- * @param[in] input The input tensor to convert. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
- * @param[in] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM],
- * while the rest represent batch of outputs. Data types supported: Same as @p input. Data layout: NCHW
- * @param[in] convolved_dims Output convolved dimensions.
- * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &convolved_dims, unsigned int num_groups = 1);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-public:
- const ICLTensor *_input;
- ICLTensor *_output;
- Size2D _convolved_dims;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLCOL2IMKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLColorConvertKernel.h b/arm_compute/core/CL/kernels/CLColorConvertKernel.h
deleted file mode 100644
index 2bcd141863..0000000000
--- a/arm_compute/core/CL/kernels/CLColorConvertKernel.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLCOLORCONVERTKERNEL_H
-#define ARM_COMPUTE_CLCOLORCONVERTKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLMultiImage;
-class ICLTensor;
-using ICLImage = ICLTensor;
-
-/** Interface for the color convert kernel.
- *
- */
-class CLColorConvertKernel : public ICLKernel
-{
-public:
- /** Default constructor. */
- CLColorConvertKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLColorConvertKernel(const CLColorConvertKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLColorConvertKernel &operator=(const CLColorConvertKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLColorConvertKernel(CLColorConvertKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLColorConvertKernel &operator=(CLColorConvertKernel &&) = default;
- /** Default destructor. */
- ~CLColorConvertKernel() = default;
-
- /** Set the input and output of the kernel
- *
- * @param[in] input Source tensor. Formats supported: RGBA8888/UYVY422/YUYV422/RGB888
- * @param[out] output Destination tensor. Formats supported: RGB888 (if the formats of @p input are RGBA8888/UYVY422/YUYV422),
- * RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/),
- * U8 (if the formats of @p input is RGB888)
- */
- void configure(const ICLTensor *input, ICLTensor *output);
- /** Set the input and output of the kernel
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Formats supported: RGBA8888/UYVY422/YUYV422/RGB888
- * @param[out] output Destination tensor. Formats supported: RGB888 (if the formats of @p input are RGBA8888/UYVY422/YUYV422),
- * RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/),
- * U8 (if the formats of @p input is RGB888)
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
- /** Set the input and output of the kernel
- *
- * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV
- * @param[out] output Single-planar destination image. Formats supported: RGB888/RGBA8888
- */
- void configure(const ICLMultiImage *input, ICLImage *output);
- /** Set the input and output of the kernel
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV
- * @param[out] output Single-planar destination image. Formats supported: RGB888/RGBA8888
- */
- void configure(const CLCompileContext &compile_context, const ICLMultiImage *input, ICLImage *output);
- /** Set the input and output of the kernel
- *
- * @param[in] input Single-planar source image. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422
- * @param[out] output Multi-planar destination image. Formats supported: NV12/IYUV/YUV444 (if the formats of @p input are RGB888/RGB8888)
- */
- void configure(const ICLImage *input, ICLMultiImage *output);
- /** Set the input and output of the kernel
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Single-planar source image. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422
- * @param[out] output Multi-planar destination image. Formats supported: NV12/IYUV/YUV444 (if the formats of @p input are RGB888/RGB8888)
- */
- void configure(const CLCompileContext &compile_context, const ICLImage *input, ICLMultiImage *output);
- /** Set the input and output of the kernel
- *
- * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV
- * @param[out] output Multi-planar destination image. Formats supported: YUV444/IYUV (if the formats of @p input are NV12/NV21)/NV12 (if the format of @p input is IYUV)
- */
- void configure(const ICLMultiImage *input, ICLMultiImage *output);
- /** Set the input and output of the kernel
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV
- * @param[out] output Multi-planar destination image. Formats supported: YUV444/IYUV (if the formats of @p input are NV12/NV21)/NV12 (if the format of @p input is IYUV)
- */
- void configure(const CLCompileContext &compile_context, const ICLMultiImage *input, ICLMultiImage *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input; /*pointer to single planar tensor input */
- ICLTensor *_output; /*pointer to single planar tensor output */
- const ICLMultiImage *_multi_input; /*pointer to multi-planar input */
- ICLMultiImage *_multi_output; /*pointer to multi-planar output */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLCOLORCONVERTKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLComparisonKernel.h b/arm_compute/core/CL/kernels/CLComparisonKernel.h
deleted file mode 100644
index d5c5297c61..0000000000
--- a/arm_compute/core/CL/kernels/CLComparisonKernel.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLCOMPARISONKERNEL_H
-#define ARM_COMPUTE_CLCOMPARISONKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ICLTensor;
-
-/** Interface for the comparison kernel. */
-class CLComparisonKernel : public ICLKernel
-{
-public:
- /** Default constructor. */
- CLComparisonKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLComparisonKernel(const CLComparisonKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLComparisonKernel &operator=(const CLComparisonKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLComparisonKernel(CLComparisonKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLComparisonKernel &operator=(CLComparisonKernel &&) = default;
- /** Default destructor */
- ~CLComparisonKernel() = default;
- /** Set the inputs and output tensors
- *
- * @param[in] input1 Source tensor. Data types supported: All.
- * @param[in] input2 Source tensor. Data types supported: Same as @p input1.
- * @param[out] output Destination tensor. Data types supported: U8.
- * @param[in] operation Comparison operation to use.
- */
- void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ComparisonOperation operation);
- /** Set the inputs and output tensors
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input1 Source tensor. Data types supported: All.
- * @param[in] input2 Source tensor. Data types supported: Same as @p input1.
- * @param[out] output Destination tensor. Data types supported: U8.
- * @param[in] operation Comparison operation to use.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ComparisonOperation operation);
- /** Static function to check if given info will lead to a valid configuration of @ref CLComparisonKernel
- *
- * @param[in] input1 Source tensor. Data types supported: All.
- * @param[in] input2 Source tensor. Data types supported: Same as @p input1.
- * @param[in] output Destination tensor. Data types supported: U8.
- * @param[in] operation Comparison operation to use.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ComparisonOperation operation);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- const ICLTensor *_input1; /**< Source tensor 1 */
- const ICLTensor *_input2; /**< Source tensor 2 */
- ICLTensor *_output; /**< Destination tensor */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLCOMPARISONKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h b/arm_compute/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h
deleted file mode 100644
index d3e57a6738..0000000000
--- a/arm_compute/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLCONVERTFULLYCONNECTEDWEIGHTSKERNEL_H
-#define ARM_COMPUTE_CLCONVERTFULLYCONNECTEDWEIGHTSKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface to convert the 2D Fully Connected weights from NCHW to NHWC or vice versa.
- *
- * @note This function can be applied to the 2D weights used by a Fully Connected layer if:
- * - It follows a Convolution layer
- * - The data layout used by the network does not match the one the model has been trained in.
- *
- * @note This function assumes the weights are already reshaped (transposed)
- */
-class CLConvertFullyConnectedWeightsKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLConvertFullyConnectedWeightsKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLConvertFullyConnectedWeightsKernel(const CLConvertFullyConnectedWeightsKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLConvertFullyConnectedWeightsKernel &operator=(const CLConvertFullyConnectedWeightsKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLConvertFullyConnectedWeightsKernel(CLConvertFullyConnectedWeightsKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLConvertFullyConnectedWeightsKernel &operator=(CLConvertFullyConnectedWeightsKernel &&) = default;
- /** Default destructor */
- ~CLConvertFullyConnectedWeightsKernel() = default;
- /** Set the input and output tensor.
- *
- * @param[in] input Source weights tensor to convert. Must be 2 dimensional. Data types supported: All.
- * @param[out] output The converted weights tensor. Shape and Data Type: Same as @p input.
- * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer).
- * @param[in] data_layout The data layout the weights have been trained in.
- */
- void configure(const ICLTensor *input, ICLTensor *output, const TensorShape &original_input_shape, DataLayout data_layout);
- /** Set the input and output tensor.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source weights tensor to convert. Must be 2 dimensional. Data types supported: All.
- * @param[out] output The converted weights tensor. Shape and Data Type: Same as @p input.
- * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer).
- * @param[in] data_layout The data layout the weights have been trained in.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const TensorShape &original_input_shape, DataLayout data_layout);
- /** Static function to check if given info will lead to a valid configuration of @ref CLConvertFullyConnectedWeightsKernel
- *
- * @param[in] input Source weights tensor info to convert. Must be 2 dimensional. Data types supported: All.
- * @param[in] output The converted weights tensor info. Shape and Data Type: Same as @p input.
- * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer).
- * @param[in] data_layout The data layout the weights have been trained in.
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const TensorShape &original_input_shape, DataLayout data_layout);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLCONVERTFULLYCONNECTEDWEIGHTSKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLConvolutionKernel.h b/arm_compute/core/CL/kernels/CLConvolutionKernel.h
deleted file mode 100644
index b6fe51dbaa..0000000000
--- a/arm_compute/core/CL/kernels/CLConvolutionKernel.h
+++ /dev/null
@@ -1,224 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLCONVOLUTIONKERNEL_H
-#define ARM_COMPUTE_CLCONVOLUTIONKERNEL_H
-
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/****************************************************************************************\
- * Square Convolution *
-\****************************************************************************************/
-
-/** Interface for the kernel to run an arbitrary size convolution on a tensor. (Currently supports 3x3, 5x5, 7x7 and 9x9).
- * The client can supply a convolution matrix \f$ C_{m,n} \f$.
- * @f{eqnarray}{
- * k_0 &=& \frac{m}{2} \\
- * l_0 &=& \frac{n}{2} \\
- * sum &=& \sum_{k=0,l=0}^{k=m-1,l=n-1} input(x+k-k_0, y+l-l_0) C_{k,l}
- * @f}
- *
- * @note The above equation for this function is similar to the default OpenCV Filter2D function,
- * which actually computes a correlation and not a convolution.
- * In case of a real convolution the convolution matrix should be flipped both horizontally and vertically.
- */
-template <unsigned int matrix_size>
-class CLConvolutionKernel : public ICLSimple2DKernel
-{
-public:
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor, Data types supported: U8, S16.
- * @param[in] conv Convolution matrix to apply to the input tensor.
- * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined);
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor, Data types supported: U8, S16.
- * @param[in] conv Convolution matrix to apply to the input tensor.
- * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined);
-
- // Inherited methods overridden:
- BorderSize border_size() const override;
-};
-
-/** Interface for the kernel which applies a 3x3 convolution to a tensor. */
-using CLConvolution3x3Kernel = CLConvolutionKernel<3>;
-/** Interface for the kernel which applies a 5x5 convolution to a tensor. */
-using CLConvolution5x5Kernel = CLConvolutionKernel<5>;
-/** Interface for the kernel which applies a 7x7 convolution to a tensor. */
-using CLConvolution7x7Kernel = CLConvolutionKernel<7>;
-/** Interface for the kernel which applies a 9x9 convolution to a tensor. */
-using CLConvolution9x9Kernel = CLConvolutionKernel<9>;
-
-/****************************************************************************************\
- * Separable Square Convolution *
-\****************************************************************************************/
-
-/** Kernel for the Horizontal pass of a Separable Convolution. Currently support 5x5, 7x7, 9x9 */
-template <unsigned int matrix_size>
-class CLSeparableConvolutionHorKernel : public ICLSimple2DKernel
-{
-public:
- /** Default Constructor */
- CLSeparableConvolutionHorKernel();
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor, Data types supported: S16.
- * @param[in] conv Convolution matrix to apply to the input tensor.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, bool border_undefined);
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor, Data types supported: S16.
- * @param[in] conv Convolution matrix to apply to the input tensor.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, bool border_undefined);
-
- // Inherited methods overridden:
- BorderSize border_size() const override;
-
-private:
- BorderSize _border_size; /**< Border size */
-};
-
-/** Interface for the kernel which applies a horizontal pass of 5x5 convolution to a tensor. */
-using CLSeparableConvolution5x5HorKernel = CLSeparableConvolutionHorKernel<5>;
-/** Interface for the kernel which applies a horizontal pass of 7x7 convolution to a tensor. */
-using CLSeparableConvolution7x7HorKernel = CLSeparableConvolutionHorKernel<7>;
-/** Interface for the kernel which applies a horizontal pass of 9x9 convolution to a tensor. */
-using CLSeparableConvolution9x9HorKernel = CLSeparableConvolutionHorKernel<9>;
-
-/** Kernel for the Vertical pass of a Separable Convolution. Currently supports 5x5, 7x7, 9x9 */
-template <unsigned int matrix_size>
-class CLSeparableConvolutionVertKernel : public ICLSimple2DKernel
-{
-public:
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] input Source tensor. Data types supported: S16.
- * @param[out] output Destination tensor, Data types supported: U8, S16.
- * @param[in] conv Convolution matrix to apply to the input tensor.
- * @param[in] scale Scale of the convolution matrix.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- * @param[in] data_type Data type to use for intermeidate result. @sa data_type_for_convolution
- */
- void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined, DataType data_type = DataType::S32);
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: S16.
- * @param[out] output Destination tensor, Data types supported: U8, S16.
- * @param[in] conv Convolution matrix to apply to the input tensor.
- * @param[in] scale Scale of the convolution matrix.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- * @param[in] data_type Data type to use for intermeidate result. @sa data_type_for_convolution
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined, DataType data_type = DataType::S32);
-
- // Inherited methods overridden:
- BorderSize border_size() const override;
-};
-
-/** Interface for the kernel which applies a vertical pass of 5x5 convolution to a tensor. */
-using CLSeparableConvolution5x5VertKernel = CLSeparableConvolutionVertKernel<5>;
-/** Interface for the kernel which applies a vertical pass of 7x7 convolution to a tensor. */
-using CLSeparableConvolution7x7VertKernel = CLSeparableConvolutionVertKernel<7>;
-/** Interface for the kernel which applies a vertical pass of 9x9 convolution to a tensor. */
-using CLSeparableConvolution9x9VertKernel = CLSeparableConvolutionVertKernel<9>;
-
-/****************************************************************************************\
- * Rectangle Convolution *
-\****************************************************************************************/
-
-/** Kernel for the running convolution on a rectangle matrix.
- *
- * @note Supports combinations of 3,5,7 and 9.
- */
-class CLConvolutionRectangleKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLConvolutionRectangleKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLConvolutionRectangleKernel(const CLConvolutionRectangleKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLConvolutionRectangleKernel &operator=(const CLConvolutionRectangleKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLConvolutionRectangleKernel(CLConvolutionRectangleKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLConvolutionRectangleKernel &operator=(CLConvolutionRectangleKernel &&) = default;
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor, Data types supported: U8, S16.
- * @param[in] conv Convolution matrix to apply to the input tensor.
- * @param[in] width Width of convolution matrix (Number of columns)
- * @param[in] height Height of convolution matrix (Number of rows)
- * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined);
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor, Data types supported: U8, S16.
- * @param[in] conv Convolution matrix to apply to the input tensor.
- * @param[in] width Width of convolution matrix (Number of columns)
- * @param[in] height Height of convolution matrix (Number of rows)
- * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- BorderSize _border_size;
- const ICLTensor *_input;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLCONVOLUTIONKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLCopyKernel.h b/arm_compute/core/CL/kernels/CLCopyKernel.h
deleted file mode 100644
index 05dff8ed0c..0000000000
--- a/arm_compute/core/CL/kernels/CLCopyKernel.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLCOPYKERNEL_H
-#define ARM_COMPUTE_CLCOPYKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to perform a copy between two tensors */
-class CLCopyKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLCopyKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- CLCopyKernel(const CLCopyKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- CLCopyKernel &operator=(const CLCopyKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLCopyKernel(CLCopyKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLCopyKernel &operator=(CLCopyKernel &&) = default;
- /** Initialize the kernel's input, output.
- *
- * @param[in] input Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
- * @param[out] output Destination tensor. Data types supported: same as @p input.
- * @param[in] padding (Optional) Padding to be applied to the input tensor
- * @param[in] output_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr.
- */
- void configure(const ICLTensor *input, ICLTensor *output, const PaddingList &padding = PaddingList(), Window *output_window = nullptr);
- /** Initialize the kernel's input, output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
- * @param[out] output Destination tensor. Data types supported: same as @p input.
- * @param[in] padding (Optional) Padding to be applied to the input tensor
- * @param[in] output_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PaddingList &padding = PaddingList(), Window *output_window = nullptr);
- /** Static function to check if given info will lead to a valid configuration of @ref CLCopyKernel
- *
- * @param[in] input Source tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
- * @param[in] output Destination tensor info. Data types supported: same as @p input.
- * @param[in] padding (Optional) Padding to be applied to the input tensor
- * @param[in] output_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding = PaddingList(), Window *output_window = nullptr);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
- Window _output_window;
- bool _has_output_window;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLCOPYKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLCropKernel.h b/arm_compute/core/CL/kernels/CLCropKernel.h
deleted file mode 100644
index cbc2338940..0000000000
--- a/arm_compute/core/CL/kernels/CLCropKernel.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2019-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLCROPKERNEL_H
-#define ARM_COMPUTE_CLCROPKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to perform a copy between two tensors */
-class CLCropKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLCropKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- CLCropKernel(const CLCropKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- CLCropKernel &operator=(const CLCropKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLCropKernel(CLCropKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLCropKernel &operator=(CLCropKernel &&) = default;
- /** Configure kernel
- *
- * @note Supported tensor rank: up to 4
- *
- * @param[in] input Source tensor. Data type supported: All. Data layouts supported: NHWC.
- * @param[out] output Destination tensor. Data type supported: F32
- * @param[in] start Coordinates of where to start cropping the image.
- * @param[in] end Coordinates of where to end cropping the image.
- * @param[in] batch_index Fourth dimension index of the 3D image to crop in @p input.
- * @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0.
- * @param[in] output_window Output window to be used in case cropped image is being copied into a tensor. Default is nullptr.
- */
- void configure(const ICLTensor *input, ICLTensor *output, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value = 0, Window *output_window = nullptr);
- /** Configure kernel
- *
- * @note Supported tensor rank: up to 4
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data type supported: All. Data layouts supported: NHWC.
- * @param[out] output Destination tensor. Data type supported: F32
- * @param[in] start Coordinates of where to start cropping the image.
- * @param[in] end Coordinates of where to end cropping the image.
- * @param[in] batch_index Fourth dimension index of the 3D image to crop in @p input.
- * @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0.
- * @param[in] output_window Output window to be used in case cropped image is being copied into a tensor. Default is nullptr.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value = 0,
- Window *output_window = nullptr);
-
- /** Static function to check if given info will lead to a valid configuration of @ref CLStridedSliceKernel
- *
- * @note Supported tensor rank: up to 4
- *
- * @param[in] input Source tensor info. Data type supported: All. Data layouts supported: NHWC.
- * @param[in] output Destination tensor info. Data type supported: F32
- * @param[in] start Coordinates of where to start cropping the image.
- * @param[in] end Coordinates of where to end cropping the image.
- * @param[in] batch_index Fourth dimension index of the 3D image to crop in @p input.
- * @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0.
- * @param[in] output_window Output window to be used in case cropped image is being copied into a tensor. Default is nullptr.
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value = 0,
- Window *output_window = nullptr);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
- Coordinates2D _start;
- uint32_t _batch_index;
- float _extrapolation_value;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLCROPKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h b/arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h
deleted file mode 100644
index 0c65f519cc..0000000000
--- a/arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLEKERNEL_H
-#define ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLEKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the Deconvolution layer kernel on OpenCL.
- */
-class CLDeconvolutionLayerUpsampleKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLDeconvolutionLayerUpsampleKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDeconvolutionLayerUpsampleKernel(const CLDeconvolutionLayerUpsampleKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDeconvolutionLayerUpsampleKernel &operator=(const CLDeconvolutionLayerUpsampleKernel &) = delete;
- /** Default Move Constructor. */
- CLDeconvolutionLayerUpsampleKernel(CLDeconvolutionLayerUpsampleKernel &&) = default;
- /** Default move assignment operator */
- CLDeconvolutionLayerUpsampleKernel &operator=(CLDeconvolutionLayerUpsampleKernel &&) = default;
- /** Default destructor */
- ~CLDeconvolutionLayerUpsampleKernel() = default;
-
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Source tensor. Data types supported: All.
- * @param[out] output Destination tensor. Data types supported: same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
- * @param[in] info Contains padding and stride information described in @ref PadStrideInfo.
- */
- void configure(const ICLTensor *input, ICLTensor *output, const PadStrideInfo &info);
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: All.
- * @param[out] output Destination tensor. Data types supported: same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
- * @param[in] info Contains padding and stride information described in @ref PadStrideInfo.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PadStrideInfo &info);
- /** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionLayerUpsample
- *
- * @param[in] input Source tensor info. Data types supported: All.
- * @param[in] output Destination tensor info. Data types supported: same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
- * @param[in] info Contains padding and stride information described in @ref PadStrideInfo.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PadStrideInfo &info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
- PadStrideInfo _info;
- DataLayout _data_layout;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLEKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h b/arm_compute/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h
deleted file mode 100644
index 292c561e46..0000000000
--- a/arm_compute/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (c) 2019-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLDECONVOLUTIONLAYERRESHAPEOUTPUTKERNEL_H
-#define ARM_COMPUTE_CLDECONVOLUTIONLAYERRESHAPEOUTPUTKERNEL_H
-
-#include "arm_compute/core/CL/ICLSimpleKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the OpenCL kernel to be used for reshaping the tensor before returning the result of deconvolution.
- *
- * The input tensor to this OpenCL kernel is expected to be the result of a @ref CLGEMM operation between the Deconvolution input and the Deconvolution filter.
- *
- * The input tensor should have the following shape: [filter_width * filter_height * ofms, width, height, batch_size]
- *
- * The output tensor should have the following shape: [stride_x * (input_width - 1) + filter_width - 2 * padx, stride_y * (input_height - 1) + filter_height - 2 * pady, ofms, batch_size]
- *
- * For example, given a tensor with dimensions [4, 2, 2] this function returns a tensor with dimensions [1, 4, 4].
- *
- */
-class CLDeconvolutionReshapeOutputKernel : public ICLSimpleKernel
-{
-public:
- /** Default constructor */
- CLDeconvolutionReshapeOutputKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDeconvolutionReshapeOutputKernel(const CLDeconvolutionReshapeOutputKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDeconvolutionReshapeOutputKernel &operator=(const CLDeconvolutionReshapeOutputKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLDeconvolutionReshapeOutputKernel(CLDeconvolutionReshapeOutputKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLDeconvolutionReshapeOutputKernel &operator=(CLDeconvolutionReshapeOutputKernel &&) = default;
- /** Default destructor */
- ~CLDeconvolutionReshapeOutputKernel() = default;
-
- /** Initialise the kernel's source and destination.
- *
- * @param[in] input Input tensor. Supported data types: QASYMM8/QASYMM8_SIGNED/S32/F16/F32.
- * @param[in] bias Bias tensor to be added directly during the reshape operation. Supported data types: same as @p input. Supported data layouts: same as @p input.
- * @param[out] output Output tensor with the following shape: [stride_x * (input_width - 1) + filter_width - 2 * padx, stride_y * (input_height - 1) + filter_height - 2 * pady, ofms, batch_size]
- * Supported data types: same as @p input. Supported data layouts: same as @p input.
- * @param[in] input_info Deconvolution input tensor info. Supported data types: same as @p input. Supported data layouts: same as @p input.
- * @param[in] weights_info Deconvolution weights tensor info. Supported data types: same as @p input. Supported data layouts: same as @p input.
- * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo. This kernel supports only stride_x = weights.width && stride_y = weights.height. Moreover, padding is not supported.
- */
- void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const ITensorInfo *input_info, const ITensorInfo *weights_info, const PadStrideInfo &deconv_info);
- /** Initialise the kernel's source and destination.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor. Supported data types: QASYMM8/QASYMM8_SIGNED/S32/F16/F32.
- * @param[in] bias Bias tensor to be added directly during the reshape operation. Supported data types: same as @p input. Supported data layouts: same as @p input.
- * @param[out] output Output tensor with the following shape: [stride_x * (input_width - 1) + filter_width - 2 * padx, stride_y * (input_height - 1) + filter_height - 2 * pady, ofms, batch_size]
- * Supported data types: same as @p input. Supported data layouts: same as @p input.
- * @param[in] input_info Deconvolution input tensor info. Supported data types: same as @p input. Supported data layouts: same as @p input.
- * @param[in] weights_info Deconvolution weights tensor info. Supported data types: same as @p input. Supported data layouts: same as @p input.
- * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo. This kernel supports only stride_x = weights.width && stride_y = weights.height. Moreover, padding is not supported.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const ITensorInfo *input_info, const ITensorInfo *weights_info,
- const PadStrideInfo &deconv_info);
-
- /** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionReshapeOutputKernel.
- *
- * @param[in] input GEMM output tensor info to be reshaped. Supported data types: QASYMM8/QASYMM8_SIGNED/S32/F16/F32.
- * @param[in] bias (Optional) Optional bias tensor info to be added directly during the reshape operation. Supported data types: same as @p input. Supported data layouts: same as @p input.
- * @param[in] output Reshaped output tensor info. Supported data types: same as @p input. Supported data layouts: same as @p input.
- * @param[in] input_info Original input tensor info. Supported data types: same as @p input. Supported data layouts: same as @p input.
- * @param[in] weights_info Original weights tensor info output. Supported data types: same as @p input. Supported data layouts: same as @p input.
- * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo. This kernel supports only stride_x = weights.width && stride_y = weights.height. Moreover, padding is not supported.
- *
- * @return a Status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const ITensorInfo *input_info, const ITensorInfo *weights_info, const PadStrideInfo &deconv_info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- bool _add_bias;
- const ICLTensor *_bias;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLDECONVOLUTIONLAYERRESHAPEOUTPUTKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h b/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h
deleted file mode 100644
index 5fe826d090..0000000000
--- a/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H
-#define ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the depth concatenate kernel.
- * The input tensor will be concatenated into the output tensor.
- */
-class CLDepthConcatenateLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLDepthConcatenateLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDepthConcatenateLayerKernel(const CLDepthConcatenateLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDepthConcatenateLayerKernel &operator=(const CLDepthConcatenateLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLDepthConcatenateLayerKernel(CLDepthConcatenateLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLDepthConcatenateLayerKernel &operator=(CLDepthConcatenateLayerKernel &&) = default;
- /** Default destructor */
- ~CLDepthConcatenateLayerKernel() = default;
- /** Initialise the kernel's inputs and output
- *
- * @param[in] input Input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] depth_offset The offset on the Z axis.
- * @param[in,out] output Output tensor. Data types supported: Same as @p input.
- *
- * @note: The output tensor's low two dimensions can't be smaller than the input one's.
- * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2.
- *
- */
- void configure(const ICLTensor *input, unsigned int depth_offset, ICLTensor *output);
- /** Initialise the kernel's inputs and output
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] depth_offset The offset on the Z axis.
- * @param[in,out] output Output tensor. Data types supported: Same as @p input.
- *
- * @note: The output tensor's low two dimensions can't be smaller than the input one's.
- * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2.
- *
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, unsigned int depth_offset, ICLTensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLDepthConcatenateLayerKernel
- *
- * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
- * @param[in] depth_offset The offset on the Z axis.
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, unsigned int depth_offset, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
- unsigned int _depth_offset;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h b/arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h
deleted file mode 100644
index 66eb6222b2..0000000000
--- a/arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H
-#define ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H
-
-#include "arm_compute/core/CL/ICLSimple3DKernel.h"
-#include "arm_compute/core/Types.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the depth conversion kernel. */
-class CLDepthConvertLayerKernel : public ICLSimple3DKernel
-{
-public:
- /** Set the input and output of the kernel.
- *
- * Valid conversions Input -> Output :
- *
- * - QSYMM8_PER_CHANNEL -> QASYMM8 (ATTENTION: it is the user's responsibility to keep track of the quantization info in the TensorInfo meta-data)
- * - U8 -> S8, U16, S16, U32, S32, F16, F32
- * - U16 -> U8, S8, S16, U32, S32, F16, F32
- * - S16 -> U8, S8, U16, U32, S32, F16, F32
- * - U32 -> U8, S8, U16, S16, S32, F16, F32
- * - S32 -> U8, S8, U16, S16, U32, F16, F32
- * - F16 -> U8, S8, U16, S16, U32, F32
- * - F32 -> U8, S8, U16, S16, U32, F16
- *
- * @param[in] input The input tensor to convert. Data types supported: U8/S8/QSYMM8_PER_CHANNEL/U16/S16/U32/S32/F16/F32.
- * @param[out] output The output tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
- * @param[in] policy Conversion policy
- * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8.
- */
- void configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift);
- /** Set the input and output of the kernel.
- *
- * Valid conversions Input -> Output :
- *
- * - QSYMM8_PER_CHANNEL -> QASYMM8 (ATTENTION: it is the user's responsibility to keep track of the quantization info in the TensorInfo meta-data)
- * - U8 -> S8, U16, S16, U32, S32, F16, F32
- * - U16 -> U8, S8, S16, U32, S32, F16, F32
- * - S16 -> U8, S8, U16, U32, S32, F16, F32
- * - U32 -> U8, S8, U16, S16, S32, F16, F32
- * - S32 -> U8, S8, U16, S16, U32, F16, F32
- * - F16 -> U8, S8, U16, S16, U32, F32
- * - F32 -> U8, S8, U16, S16, U32, F16
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input The input tensor to convert. Data types supported: U8/S8/QSYMM8_PER_CHANNEL/U16/S16/U32/S32/F16/F32.
- * @param[out] output The output tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
- * @param[in] policy Conversion policy
- * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift);
- /** Static function to check if given info will lead to a valid configuration of @ref CLDepthConvertLayerKernel
- *
- * @param[in] input Source tensor info. Data types supported: U8/S8/QSYMM8_PER_CHANNEL/U16/S16/U32/S32/F16/F32.
- * @param[in] output Destination tensor info. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
- * @param[in] policy Conversion policy
- * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift);
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h b/arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h
deleted file mode 100644
index 87ac3c1ec1..0000000000
--- a/arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Copyright (c) 2019-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLDEPTHTOSPACELAYERKERNEL_H
-#define ARM_COMPUTE_CLDEPTHTOSPACELAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the depth to space kernel */
-class CLDepthToSpaceLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLDepthToSpaceLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDepthToSpaceLayerKernel(const CLDepthToSpaceLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDepthToSpaceLayerKernel &operator=(const CLDepthToSpaceLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLDepthToSpaceLayerKernel(CLDepthToSpaceLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLDepthToSpaceLayerKernel &operator=(CLDepthToSpaceLayerKernel &&) = default;
- /** Default destructor */
- ~CLDepthToSpaceLayerKernel() = default;
- /** Initialise the kernel's inputs and output.
- *
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
- * @param[out] output Tensor output. Data types supported: same as @p input
- * @param[in] block_shape Block shape value.
- */
- void configure(const ICLTensor *input, ICLTensor *output, int32_t block_shape);
- /** Initialise the kernel's inputs and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
- * @param[out] output Tensor output. Data types supported: same as @p input
- * @param[in] block_shape Block shape value.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t block_shape);
- /** Static function to check if given info will lead to a valid configuration of @ref CLDepthToSpaceLayerKernel.
- *
- * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All.
- * @param[in] output Tensor output info. Data types supported: same as @p input
- * @param[in] block_shape Block shape value.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input; /**< Source tensor */
- ICLTensor *_output; /**< Destination tensor */
- int32_t _block_shape; /**< Block shape */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLDEPTHTOSPACELAYERKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h
deleted file mode 100644
index 6cf0326467..0000000000
--- a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNCHWKERNEL3x3_H
-#define ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNCHWKERNEL3x3_H
-
-#include "arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the kernel to run a 3x3 depthwise convolution on a tensor when the data layout is NCHW.
- */
-class CLDepthwiseConvolutionLayer3x3NCHWKernel : public ICLDepthwiseConvolutionLayer3x3Kernel
-{
-public:
- /** Default constructor */
- CLDepthwiseConvolutionLayer3x3NCHWKernel();
- /** Initialize the function's source, destination, conv and border_size.
- *
- * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM].
- * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
- * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
- * @param[out] output Destination tensor. Data type supported: Same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for QASYMM8 supported.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization,
- * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
- * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization,
- * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
- */
- void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U),
- const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) override;
- /** Initialize the function's source, destination, conv and border_size.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM].
- * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
- * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
- * @param[out] output Destination tensor. Data type supported: Same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for QASYMM8 supported.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization,
- * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
- * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization,
- * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U),
- const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) override;
- /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3NCHWKernel
- *
- * @param[in] input Source tensor info. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] weights Weights tensor info. A 3D tensor with dimensions [3, 3, IFM].
- * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
- * @param[in] biases Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
- * @param[in] output Destination tensor. Data type supported: Same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported.
- * @param[in] gpu_target (Optional) GPU target to validate the kernel for. Defaults to midgard.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] output_multipliers (Optional) Output multipliers tensor info for quantized computations. In case of per-channel quantization,
- * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
- * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization,
- * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), GPUTarget gpu_target = GPUTarget::MIDGARD,
- const Size2D &dilation = Size2D(1U, 1U), const ITensorInfo *output_multipliers = nullptr, const ITensorInfo *output_shifts = nullptr);
-
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- unsigned int _conv_stride_x;
- unsigned int _conv_pad_top;
- unsigned int _conv_pad_left;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNCHWKERNEL3x3_H */
diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h
deleted file mode 100644
index e564cf6fe0..0000000000
--- a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNHWCKERNEL3x3_H
-#define ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNHWCKERNEL3x3_H
-
-#include "arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the kernel to run a 3x3 depthwise convolution on a tensor when the data layout is NHWC.
- */
-class CLDepthwiseConvolutionLayer3x3NHWCKernel : public ICLDepthwiseConvolutionLayer3x3Kernel
-{
-public:
- /** Default constructor */
- CLDepthwiseConvolutionLayer3x3NHWCKernel();
- /** Default move assignment operator. */
- /** Initialize the function's source, destination, conv and border_size.
- *
- * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED.
- * @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, 3, 3].
- * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
- * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
- * @param[out] output Destination tensor. Data type supported: Same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization,
- * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
- * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization,
- * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
- */
- void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U),
- const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) override;
- /** Initialize the function's source, destination, conv and border_size.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED.
- * @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, 3, 3].
- * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
- * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
- * @param[out] output Destination tensor. Data type supported: Same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization,
- * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
- * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization,
- * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U),
- const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) override;
- /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3NHWCKernel
- *
- * @param[in] input Source tensor info. DataType supported: QASYMM8/QASYMM8_SIGNED.
- * @param[in] weights Weights tensor info. A 3D tensor with dimensions [IFM, 3, 3].
- * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
- * @param[in] biases Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
- * @param[in] output Destination tensor info. Data type supported: Same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] output_multipliers (Optional) Output multipliers tensor info for quantized computations. In case of per-channel quantization,
- * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
- * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization,
- * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U),
- const ITensorInfo *output_multipliers = nullptr, const ITensorInfo *output_shifts = nullptr);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- unsigned int _num_rows_processed_per_iteration;
- unsigned int _num_planes_processed_per_iteration;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNHWCKERNEL3x3_H */
diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h
deleted file mode 100644
index 8847cf9c46..0000000000
--- a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- * Copyright (c) 2019-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H
-#define ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-#include "arm_compute/core/KernelDescriptors.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the kernel to run a MxN depthwise convolution. M and N are respectively the rows and columns of the filter
- This kernel assumes that tensor for the weights is NOT reshaped (Native version) */
-class CLDepthwiseConvolutionLayerNativeKernel : public ICLKernel
-{
-public:
- /** Default Constructor */
- CLDepthwiseConvolutionLayerNativeKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDepthwiseConvolutionLayerNativeKernel(const CLDepthwiseConvolutionLayerNativeKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDepthwiseConvolutionLayerNativeKernel &operator=(const CLDepthwiseConvolutionLayerNativeKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLDepthwiseConvolutionLayerNativeKernel(CLDepthwiseConvolutionLayerNativeKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLDepthwiseConvolutionLayerNativeKernel &operator=(CLDepthwiseConvolutionLayerNativeKernel &&) = default;
- /** Initialize the function's source, destination and parameters
- *
- * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/FP32/FP16. Data layout supported: NHWC
- * @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, N, M].
- * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
- * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
- * @param[out] output Destination tensor. Data type supported: Same as @p input.
- * @param[in] dwc_weights_info Depthwise convolution layer weights info to retrieve the number of output elements processed by each thread
- * @param[in] dwc_info Depthwise convolution layer info
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization,
- * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
- * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization,
- * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
- */
- void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const DWCWeightsKernelInfo &dwc_weights_info,
- const DWCKernelInfo &dwc_info, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const Size2D &dilation = Size2D(1U, 1U),
- const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr);
- /** Initialize the function's source, destination and parameters
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/FP32/FP16. Data layout supported: NHWC
- * @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, N, M].
- * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
- * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
- * @param[out] output Destination tensor. Data type supported: Same as @p input.
- * @param[in] dwc_weights_info Depthwise convolution layer weights info to retrieve the number of output elements processed by each thread
- * @param[in] dwc_info Depthwise convolution layer info
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization,
- * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
- * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization,
- * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const DWCWeightsKernelInfo &dwc_weights_info,
- const DWCKernelInfo &dwc_info, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const Size2D &dilation = Size2D(1U, 1U),
- const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr);
- /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayerNativeKernel
- *
- * @param[in] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/FP32/FP16. Data layout supported: NHWC
- * @param[in] weights Weights tensor info. A 3D tensor with dimensions [IFM, N, M].
- * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
- * @param[in] biases Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
- * @param[in] output Destination tensor info. Data type supported: Same as @p input.
- * @param[in] dwc_weights_info Depthwise convolution layer weights info to retrieve the number of output elements processed by each thread
- * @param[in] dwc_info Depthwise convolution layer info
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization,
- * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
- * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization,
- * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const DWCWeightsKernelInfo &dwc_weights_info,
- const DWCKernelInfo &dwc_info, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const Size2D &dilation = Size2D(1U, 1U),
- const ITensorInfo *output_multipliers = nullptr, const ITensorInfo *output_shifts = nullptr);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- const ICLTensor *_weights;
- const ICLTensor *_biases;
- ICLTensor *_output;
- unsigned int _depth_multiplier;
- const ICLTensor *_output_multipliers;
- const ICLTensor *_output_shifts;
- bool _is_quantized;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h
deleted file mode 100644
index 8dc5d32e4f..0000000000
--- a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2019-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERRESHAPEWEIGHTSKERNEL_H
-#define ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERRESHAPEWEIGHTSKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the kernel to reshape the weights of depthwise convolution. */
-class CLDepthwiseConvolutionLayerReshapeWeightsKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLDepthwiseConvolutionLayerReshapeWeightsKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDepthwiseConvolutionLayerReshapeWeightsKernel(const CLDepthwiseConvolutionLayerReshapeWeightsKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDepthwiseConvolutionLayerReshapeWeightsKernel &operator=(const CLDepthwiseConvolutionLayerReshapeWeightsKernel &) = delete;
- /** Default Move Constructor. */
- CLDepthwiseConvolutionLayerReshapeWeightsKernel(CLDepthwiseConvolutionLayerReshapeWeightsKernel &&) = default;
- /** Default move assignment operator */
- CLDepthwiseConvolutionLayerReshapeWeightsKernel &operator=(CLDepthwiseConvolutionLayerReshapeWeightsKernel &&) = default;
-
- /** Initialize the function's source and destination.
- *
- * @param[in] input The input tensor of dimension [IFM, W, H]. Data types supported: All. Data layouts supported: NHWC
- * @param[out] output The output tensor of dimension [W*H*C0, ceil(IFM/C0)]. C0 is the number of channels read by each thread. Data types supported: same as @p weights.
- * @param[in] info Depthwise convolution information to reshape the input tensor.
- */
- void configure(const ICLTensor *input, ICLTensor *output, const DepthwiseConvolutionReshapeInfo &info);
- /** Initialize the function's source and destination.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input The input tensor of dimension [IFM, W, H]. Data types supported: All. Data layouts supported: NHWC
- * @param[out] output The output tensor of dimension [W*H*C0, ceil(IFM/C0)]. C0 is the number of channels read by each thread. Data types supported: same as @p weights.
- * @param[in] info Depthwise convolution information to reshape the input tensor.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const DepthwiseConvolutionReshapeInfo &info);
-
- /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3NHWCKernel
- *
- * @param[in] input The input tensor info of dimension [IFM, W, H]. Data types supported: All. Data layouts supported: NHWC
- * @param[in] output The output tensor info of dimension [W*H*C0, ceil(IFM/C0)]. C0 is the number of channels read by each thread. Data types supported: same as @p weights.
- * @param[in] info Depthwise convolution information to reshape the input tensor.
- *
- * @return a Status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const DepthwiseConvolutionReshapeInfo &info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
-
- void configure_dot_product(const DepthwiseConvolutionReshapeInfo &info);
- void configure_generic(const DepthwiseConvolutionReshapeInfo &info);
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERRESHAPEWEIGHTSKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h b/arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h
deleted file mode 100644
index bb154f1a5b..0000000000
--- a/arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLDEQUANTIZATIONLAYERKERNEL_H
-#define ARM_COMPUTE_CLDEQUANTIZATIONLAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the dequantization layer kernel. */
-class CLDequantizationLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLDequantizationLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDequantizationLayerKernel(const CLDequantizationLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDequantizationLayerKernel &operator=(const CLDequantizationLayerKernel &) = delete;
- /** Default Move Constructor. */
- CLDequantizationLayerKernel(CLDequantizationLayerKernel &&) = default;
- /** Default move assignment operator */
- CLDequantizationLayerKernel &operator=(CLDequantizationLayerKernel &&) = default;
- /** Default destructor */
- ~CLDequantizationLayerKernel() = default;
- /** Set the input, output, min and max.
- *
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
- * @param[out] output Destination tensor. Data types supported: F16/F32.
- */
- void configure(const ICLTensor *input, ICLTensor *output);
- /** Set the input, output, min and max.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
- * @param[out] output Destination tensor. Data types supported: F16/F32.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLDequantizationLayerKernel
- *
- * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
- * @param[in] output Output tensor info. Data types supported: F16/F32.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLDEQUANTIZATIONLAYERKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLDerivativeKernel.h b/arm_compute/core/CL/kernels/CLDerivativeKernel.h
deleted file mode 100644
index cd8ae90c2d..0000000000
--- a/arm_compute/core/CL/kernels/CLDerivativeKernel.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLDERIVATIVEKERNEL_H
-#define ARM_COMPUTE_CLDERIVATIVEKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the derivative kernel. */
-class CLDerivativeKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLDerivativeKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDerivativeKernel(const CLDerivativeKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDerivativeKernel &operator=(const CLDerivativeKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLDerivativeKernel(CLDerivativeKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLDerivativeKernel &operator=(CLDerivativeKernel &&) = default;
- /** Default destructor */
- ~CLDerivativeKernel() = default;
- /** Initialise the kernel's sources, destination and border
- *
- * @note At least one of output_x or output_y must be set
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
- /** Initialise the kernel's sources, destination and border
- *
- * @note At least one of output_x or output_y must be set
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- const ICLTensor *_input; /**< Input tensor */
- ICLTensor *_output_x; /**< Output tensor - Derivate along the X direction */
- ICLTensor *_output_y; /**< Output tensor - Derivate along the Y direction */
- bool _run_derivative_x; /**< Do we need to run Derivative X ? */
- bool _run_derivative_y; /**< Do we need to run Derivative Y ? */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLDERIVATIVEKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLDilateKernel.h b/arm_compute/core/CL/kernels/CLDilateKernel.h
deleted file mode 100644
index 45f5fe0764..0000000000
--- a/arm_compute/core/CL/kernels/CLDilateKernel.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLDILATEKERNEL_H
-#define ARM_COMPUTE_CLDILATEKERNEL_H
-
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the dilate kernel.
- *
- */
-class CLDilateKernel : public ICLSimple2DKernel
-{
-public:
- /**Initialise the kernel's input and output.
- *
- * @param[in] input An input tensor. Data types supported: U8
- * @param[out] output The output tensor. Data types supported: U8.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
- /**Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input An input tensor. Data types supported: U8
- * @param[out] output The output tensor. Data types supported: U8.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined);
-
- // Inherited methods overridden:
- BorderSize border_size() const override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLDILATEKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h b/arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h
deleted file mode 100644
index 489d7c27c5..0000000000
--- a/arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLDIRECTCONVOLUTIONLAYERKERNEL_H
-#define ARM_COMPUTE_CLDIRECTCONVOLUTIONLAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the direct convolution kernel.
- */
-class CLDirectConvolutionLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLDirectConvolutionLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDirectConvolutionLayerKernel(const CLDirectConvolutionLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDirectConvolutionLayerKernel &operator=(const CLDirectConvolutionLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLDirectConvolutionLayerKernel(CLDirectConvolutionLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLDirectConvolutionLayerKernel &operator=(CLDirectConvolutionLayerKernel &&) = default;
- /** Default destructor */
- ~CLDirectConvolutionLayerKernel() = default;
- /** Set the input, weights, biases and output tensors.
- *
- * @note: DirectConvolution only works in the following configurations:
- * 1x1 convolution with stride_x = 1/2/3, stride_y = 1/2/3
- * 3x3 convolution with stride_x = 1/2, stride_y = 1/2
- * 5x5 convolution with stride_x = 1/2, stride_y = 1/2
- * 9x9 convolution with stride_x = 1/2, stride_y = 1/2, data_layout=NHWC
- *
- * @param[in] input The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
- * The 3rd dimension must be the same as the input's volume 3rd dimension.
- * Data type supported:Same as @p input.
- * @param[in] biases Biases tensor. Biases are 1D tensor with dimension [OFM].
- * Data type supported: Should match @p input data type, except for input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type
- * @param[out] output Output tensor.
- * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input.
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- */
- void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info);
- /** Set the input, weights, biases and output tensors.
- *
- * @note: DirectConvolution only works in the following configurations:
- * 1x1 convolution with stride_x = 1/2/3, stride_y = 1/2/3
- * 3x3 convolution with stride_x = 1/2, stride_y = 1/2
- * 5x5 convolution with stride_x = 1/2, stride_y = 1/2
- * 9x9 convolution with stride_x = 1/2, stride_y = 1/2, data_layout=NHWC
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
- * The 3rd dimension must be the same as the input's volume 3rd dimension.
- * Data type supported:Same as @p input.
- * @param[in] biases Biases tensor. Biases are 1D tensor with dimension [OFM].
- * Data type supported: Should match @p input data type, except for input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type
- * @param[out] output Output tensor.
- * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input.
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info);
- /** Static function to check if given info will lead to a valid configuration of @ref CLDirectConvolutionLayerKernel
- *
- * @param[in] input The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
- * The 3rd dimension must be the same as the input's volume 3rd dimension.
- * Data type supported:Same as @p input.
- * @param[in] biases Biases tensor. Biases are 1D tensor with dimension [OFM].
- * Data type supported: Should match @p input data type, except for input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type.
- * @param[in] output Output tensor.
- * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input.
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- * @param[in] target Target GPU architecture.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, const GPUTarget target);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-public:
- const ICLTensor *_input;
- const ICLTensor *_biases;
- const ICLTensor *_weights;
- ICLTensor *_output;
- DataLayout _data_layout;
- BorderSize _border_size;
- int _conv_stride_x;
- int _conv_stride_y;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLDIRECTCONVOLUTIONLAYERKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLElementWiseUnaryLayerKernel.h b/arm_compute/core/CL/kernels/CLElementWiseUnaryLayerKernel.h
deleted file mode 100644
index e190bdebbe..0000000000
--- a/arm_compute/core/CL/kernels/CLElementWiseUnaryLayerKernel.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLELEMENTWISEUNARYLAYERKERNEL_H
-#define ARM_COMPUTE_CLELEMENTWISEUNARYLAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/CL/ICLSimpleKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the elementwise unary operator */
-class CLElementWiseUnaryLayerKernel : public ICLSimpleKernel
-{
-public:
- /** Initialise the kernel's inputs, output.
- *
- * @param[in] input First tensor input. Data types supported: F16/F32.
- * @param[out] output Output tensor. Data types supported: Same as @p input.
- * @param[in] op Element wise unary operation to perform.
- */
- void configure(const ICLTensor *input, ICLTensor *output, const ElementWiseUnary &op);
- /** Initialise the kernel's inputs, output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input First tensor input. Data types supported: F16/F32.
- * @param[out] output Output tensor. Data types supported: Same as @p input.
- * @param[in] op Element wise unary operation to perform.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ElementWiseUnary &op);
- /** Static function to check if given info will lead to a valid configuration of @ref CLElementWiseUnaryLayerKernel
- *
- * @param[in] input First tensor input info. Data types supported: F16/F32.
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- * @param[in] op Element wise unary operation to perform.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ElementWiseUnary &op);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLELEMENTWISEUNARYLAYERKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h b/arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h
deleted file mode 100644
index 4d3d4bc834..0000000000
--- a/arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h
+++ /dev/null
@@ -1,224 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLELEMENTWISEOPERATIONKERNEL_H
-#define ARM_COMPUTE_CLELEMENTWISEOPERATIONKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for an element-wise operation kernel
- *
- * Element-wise operation is computed by:
- * @f[ output(x,y) = OP(input1(x,y), input2(x,y))@f]
- *
- */
-class CLElementwiseOperationKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLElementwiseOperationKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLElementwiseOperationKernel(const CLElementwiseOperationKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLElementwiseOperationKernel &operator=(const CLElementwiseOperationKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLElementwiseOperationKernel(CLElementwiseOperationKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLElementwiseOperationKernel &operator=(CLElementwiseOperationKernel &&) = default;
- /** Default destructor */
- ~CLElementwiseOperationKernel() = default;
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
- BorderSize border_size() const override;
-
-protected:
- /** The name of the operation */
- virtual std::string name() = 0;
-
- /** Initialise the kernel's output.
- *
- * @param[in] input1 First tensor input. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/F16/U32/S32/F32.
- * @param[in] input2 Second tensor input. Data types supported: Same as @p input1.
- * @param[in] output Output tensor. Data types supported: Same as @p input1.
- *
- * @return a pair of Status and Window
- */
- virtual std::pair<Status, Window> validate_and_configure_window(ITensorInfo &input1, ITensorInfo &input2, ITensorInfo &output) = 0;
-
- /** Validate the argument passed to the kernel
- *
- * @param[in] input1 First tensor input. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/F16/U32/S32/F32.
- * @param[in] input2 Second tensor input. Data types supported: Same as @p input1.
- * @param[in] output Output tensor. Data types supported: Same as @p input1.
- */
- virtual Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) = 0;
-
- /** Generate the build options for the specific kernel
- *
- * @reutrn a CLBuildOptions struct
- */
- virtual CLBuildOptions generate_build_options(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) = 0;
-
- /** Generate the identifier for tuning
- *
- * @reutrn a string
- */
- virtual std::string generate_id_for_tuning(const std::string &kernel_name, const ITensorInfo &input1, const ITensorInfo &output) = 0;
-
- /** Commmon configure function for element-wise operators with no additional options (e.g., Div, Min, Max, SquaredDiff)
- *
- */
- void configure_common(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
- /** Commmon configure function for element-wise operators with no additional options (e.g., Div, Min, Max, SquaredDiff)
- *
- */
- void configure_common(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
-
- ActivationLayerInfo _act_info;
-
-private:
- const ICLTensor *_input1; /**< Source tensor 1 */
- const ICLTensor *_input2; /**< Source tensor 2 */
- ICLTensor *_output; /**< Destination tensor */
-};
-
-/** Addition operation */
-class CLSaturatedArithmeticOperationKernel : public CLElementwiseOperationKernel
-{
-public:
- CLSaturatedArithmeticOperationKernel()
- : CLElementwiseOperationKernel(), _policy(), _op()
- {
- }
-
- /** Static function to check if given info will lead to a valid configuration of @ref CLSaturatedArithmeticOperationKernel
- *
- * @param[in] op Arithmetic operation to be executed.
- * @param[in] input1 First tensor input. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/QSYMM16/F16/U32/S32/F32.
- * @param[in] input2 Second tensor input. Data types supported: Same as @p input1.
- * @param[in] output Output tensor. Data types supported: Same as @p input1.
- * @param[in] policy Policy to use to handle overflow.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- */
- void configure(ArithmeticOperation op, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const ConvertPolicy &policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
- /** Static function to check if given info will lead to a valid configuration of @ref CLSaturatedArithmeticOperationKernel
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] op Arithmetic operation to be executed.
- * @param[in] input1 First tensor input. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/QSYMM16/F16/U32/S32/F32.
- * @param[in] input2 Second tensor input. Data types supported: Same as @p input1.
- * @param[in] output Output tensor. Data types supported: Same as @p input1.
- * @param[in] policy Policy to use to handle overflow.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- */
- void configure(const CLCompileContext &compile_context, ArithmeticOperation op, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const ConvertPolicy &policy,
- const ActivationLayerInfo &act_info = ActivationLayerInfo());
-
- /** Static function to check if given info will lead to a valid configuration of @ref CLSaturatedArithmeticOperationKernel
- *
- * @param[in] op Arithmetic operation to be executed.
- * @param[in] input1 First tensor input info. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/QSYMM16/F16/U32/S32/F32.
- * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[in] output Output tensor info. Data types supported: Same as @p input1.
- * @param[in] policy Policy to use to handle overflow.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- *
- * @return a Status
- */
- static Status validate(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ConvertPolicy &policy,
- const ActivationLayerInfo &act_info = ActivationLayerInfo());
-
-protected:
- // Inherited methods overridden:
- std::string name() override;
- std::pair<Status, Window> validate_and_configure_window(ITensorInfo &input1, ITensorInfo &input2, ITensorInfo &output) override;
- Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) override;
- CLBuildOptions generate_build_options(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) override;
- std::string generate_id_for_tuning(const std::string &kernel_name, const ITensorInfo &input1, const ITensorInfo &output) override;
-
-private:
- ConvertPolicy _policy;
- ArithmeticOperation _op;
-};
-
-class CLArithmeticOperationKernel : public CLElementwiseOperationKernel
-{
-public:
- CLArithmeticOperationKernel()
- : CLElementwiseOperationKernel(), _op()
- {
- }
-
- /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel
- *
- * @param[in] op Arithmetic operation to be executed.
- * @param[in] input1 First tensor input. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/QSYMM16/F16/U32/S32/F32.
- * @param[in] input2 Second tensor input. Data types supported: Same as @p input1.
- * @param[in] output Output tensor. Data types supported: Same as @p input1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- */
- void configure(ArithmeticOperation op, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
- /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] op Arithmetic operation to be executed.
- * @param[in] input1 First tensor input. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/QSYMM16/F16/U32/S32/F32.
- * @param[in] input2 Second tensor input. Data types supported: Same as @p input1.
- * @param[in] output Output tensor. Data types supported: Same as @p input1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- */
- void configure(const CLCompileContext &compile_context, ArithmeticOperation op, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output,
- const ActivationLayerInfo &act_info = ActivationLayerInfo());
-
- /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel
- *
- * @param[in] op Arithmetic operation to be executed.
- * @param[in] input1 First tensor input info. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/QSYMM16/F16/U32/S32/F32.
- * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[in] output Output tensor info. Data types supported: Same as @p input1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- *
- * @return a Status
- */
- static Status validate(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
-
-protected:
- // Inherited methods overridden:
- std::string name() override;
- std::pair<Status, Window> validate_and_configure_window(ITensorInfo &input1, ITensorInfo &input2, ITensorInfo &output) override;
- Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) override;
- CLBuildOptions generate_build_options(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) override;
- std::string generate_id_for_tuning(const std::string &kernel_name, const ITensorInfo &input1, const ITensorInfo &output) override;
-
-private:
- ArithmeticOperation _op;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLELEMENTWISEOPERATIONKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLErodeKernel.h b/arm_compute/core/CL/kernels/CLErodeKernel.h
deleted file mode 100644
index cbc748194c..0000000000
--- a/arm_compute/core/CL/kernels/CLErodeKernel.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLERODEKERNEL_H
-#define ARM_COMPUTE_CLERODEKERNEL_H
-
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the erode kernel.
- *
- */
-class CLErodeKernel : public ICLSimple2DKernel
-{
-public:
- /**Initialise the kernel's input and output.
- *
- * @param[in] input An input tensor. Data types supported: U8
- * @param[out] output The output tensor. Data types supported: U8.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
- /**Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input An input tensor. Data types supported: U8
- * @param[out] output The output tensor. Data types supported: U8.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined);
-
- // Inherited methods overridden:
- BorderSize border_size() const override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLERODEKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h b/arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h
deleted file mode 100644
index a8da1246bb..0000000000
--- a/arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright (c) 2019-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLFFTDIGITREVERSEKERNEL_H
-#define ARM_COMPUTE_CLFFTDIGITREVERSEKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-#include "arm_compute/core/KernelDescriptors.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ICLTensor;
-
-/** Interface for the digit reverse operation kernel. */
-class CLFFTDigitReverseKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLFFTDigitReverseKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLFFTDigitReverseKernel(const CLFFTDigitReverseKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLFFTDigitReverseKernel &operator=(const CLFFTDigitReverseKernel &) = delete;
- /** Default Move Constructor. */
- CLFFTDigitReverseKernel(CLFFTDigitReverseKernel &&) = default;
- /** Default move assignment operator */
- CLFFTDigitReverseKernel &operator=(CLFFTDigitReverseKernel &&) = default;
- /** Default destructor */
- ~CLFFTDigitReverseKernel() = default;
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. Data types supported: F32.
- * @param[out] output Destination tensor. Data type supported: same as @p input
- * @param[in] idx Digit reverse index tensor. Data type supported: U32
- * @param[in] config Kernel configuration.
- */
- void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *idx, const FFTDigitReverseKernelInfo &config);
- /** Set the input and output tensors.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: F32.
- * @param[out] output Destination tensor. Data type supported: same as @p input
- * @param[in] idx Digit reverse index tensor. Data type supported: U32
- * @param[in] config Kernel configuration.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *idx, const FFTDigitReverseKernelInfo &config);
- /** Static function to check if given info will lead to a valid configuration of @ref CLFFTDigitReverseKernel
- *
- * @param[in] input Source tensor info. Data types supported: F32.
- * @param[in] output Destination tensor info. Data type supported: same as @p input
- * @param[in] idx Digit reverse index tensor info. Data type supported: U32
- * @param[in] config Kernel configuration.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *idx, const FFTDigitReverseKernelInfo &config);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
- const ICLTensor *_idx;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLFFTDIGITREVERSEKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h b/arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h
deleted file mode 100644
index e3f53462d9..0000000000
--- a/arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (c) 2019-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLFFTRADIXSTAGEKERNEL_H
-#define ARM_COMPUTE_CLFFTRADIXSTAGEKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-#include "arm_compute/core/KernelDescriptors.h"
-
-#include <set>
-
-namespace arm_compute
-{
-// Forward declarations
-class ICLTensor;
-
-/** Interface for the FFT radix stage kernel. */
-class CLFFTRadixStageKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLFFTRadixStageKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLFFTRadixStageKernel(const CLFFTRadixStageKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLFFTRadixStageKernel &operator=(const CLFFTRadixStageKernel &) = delete;
- /** Default Move Constructor. */
- CLFFTRadixStageKernel(CLFFTRadixStageKernel &&) = default;
- /** Default move assignment operator */
- CLFFTRadixStageKernel &operator=(CLFFTRadixStageKernel &&) = default;
- /** Default destructor */
- ~CLFFTRadixStageKernel() = default;
- /** Set the input and output tensors.
- *
- * @note If the output tensor is nullptr, the FFT will be performed in-place
- *
- * @param[in,out] input Source tensor. Data types supported: F32.
- * @param[out] output Destination tensor. Can be nullptr. Data type supported: same as @p input
- * @param[in] config FFT descriptor metadata.
- */
- void configure(ICLTensor *input, ICLTensor *output, const FFTRadixStageKernelInfo &config);
- /** Set the input and output tensors.
- *
- * @note If the output tensor is nullptr, the FFT will be performed in-place
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in,out] input Source tensor. Data types supported: F32.
- * @param[out] output Destination tensor. Can be nullptr. Data type supported: same as @p input
- * @param[in] config FFT descriptor metadata.
- */
- void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const FFTRadixStageKernelInfo &config);
- /** Static function to check if given info will lead to a valid configuration of @ref CLFFTRadixStageKernel
- *
- * @param[in] input Source tensor info. Data types supported: F32.
- * @param[in] output Destination tensor info. Can be nullptr. Data type supported: same as @p input
- * @param[in] config FFT descriptor metadata.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const FFTRadixStageKernelInfo &config);
- /** Returns the radix that are support by the FFT kernel
- *
- * @return A set of supported radix
- */
- static std::set<unsigned int> supported_radix();
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- ICLTensor *_input;
- ICLTensor *_output;
- bool _run_in_place;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLFFTRADIXSTAGEKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLFFTScaleKernel.h b/arm_compute/core/CL/kernels/CLFFTScaleKernel.h
deleted file mode 100644
index d0d2b7613c..0000000000
--- a/arm_compute/core/CL/kernels/CLFFTScaleKernel.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (c) 2019-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLFFTSCALEKERNEL_H
-#define ARM_COMPUTE_CLFFTSCALEKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-#include "arm_compute/core/KernelDescriptors.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ICLTensor;
-
-/** Interface for the inverse fft scale kernel. */
-class CLFFTScaleKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLFFTScaleKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLFFTScaleKernel(const CLFFTScaleKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLFFTScaleKernel &operator=(const CLFFTScaleKernel &) = delete;
- /** Default Move Constructor. */
- CLFFTScaleKernel(CLFFTScaleKernel &&) = default;
- /** Default move assignment operator */
- CLFFTScaleKernel &operator=(CLFFTScaleKernel &&) = default;
- /** Default destructor */
- ~CLFFTScaleKernel() = default;
- /** Set the input and output tensors.
- *
- * @param[in,out] input Source tensor. Data types supported: F32.
- * @param[out] output Destination tensor. Data type supported: same as @p input
- * @param[in] config Kernel configuration
- */
- void configure(ICLTensor *input, ICLTensor *output, const FFTScaleKernelInfo &config);
- /** Set the input and output tensors.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in,out] input Source tensor. Data types supported: F32.
- * @param[out] output Destination tensor. Data type supported: same as @p input
- * @param[in] config Kernel configuration
- */
- void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const FFTScaleKernelInfo &config);
- /** Static function to check if given info will lead to a valid configuration of @ref CLFFTScaleKernel
- *
- * @param[in] input Source tensor info. Data types supported: F32.
- * @param[in] output Destination tensor info. Data type supported: same as @p input
- * @param[in] config Kernel configuration
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const FFTScaleKernelInfo &config);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- ICLTensor *_input;
- ICLTensor *_output;
- bool _run_in_place;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLFFTSCALEKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLFastCornersKernel.h b/arm_compute/core/CL/kernels/CLFastCornersKernel.h
deleted file mode 100644
index 1a0d4e36a5..0000000000
--- a/arm_compute/core/CL/kernels/CLFastCornersKernel.h
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLFASTCORNERSKERNEL_H
-#define ARM_COMPUTE_CLFASTCORNERSKERNEL_H
-
-#include "arm_compute/core/CL/ICLArray.h"
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-#include <cstdint>
-
-namespace cl
-{
-class Buffer;
-}
-
-namespace arm_compute
-{
-class ICLTensor;
-using ICLImage = ICLTensor;
-
-/** CL kernel to perform fast corners */
-class CLFastCornersKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLFastCornersKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLFastCornersKernel(const CLFastCornersKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLFastCornersKernel &operator=(const CLFastCornersKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLFastCornersKernel(CLFastCornersKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLFastCornersKernel &operator=(CLFastCornersKernel &&) = default;
- /** Default destructor */
- ~CLFastCornersKernel() = default;
-
- /** Initialise the kernel.
- *
- * @param[in] input Source image. Data types supported: U8.
- * @param[out] output Output image. Data types supported: U8.
- * @param[in] threshold Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3.
- * @param[in] non_max_suppression True if non-maxima suppresion is applied, false otherwise.
- * @param[in] border_mode Strategy to use for borders.
- */
- void configure(const ICLImage *input, ICLImage *output, float threshold, bool non_max_suppression, BorderMode border_mode);
- /** Initialise the kernel.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source image. Data types supported: U8.
- * @param[out] output Output image. Data types supported: U8.
- * @param[in] threshold Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3.
- * @param[in] non_max_suppression True if non-maxima suppresion is applied, false otherwise.
- * @param[in] border_mode Strategy to use for borders.
- */
- void configure(const CLCompileContext &compile_context, const ICLImage *input, ICLImage *output, float threshold, bool non_max_suppression, BorderMode border_mode);
-
- // Inherited methods overridden
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- const ICLImage *_input;
- ICLImage *_output;
-};
-
-/** CL kernel to copy keypoints information to ICLKeyPointArray and counts the number of key points */
-class CLCopyToArrayKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLCopyToArrayKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLCopyToArrayKernel(const CLCopyToArrayKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLCopyToArrayKernel &operator=(const CLCopyToArrayKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLCopyToArrayKernel(CLCopyToArrayKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLCopyToArrayKernel &operator=(CLCopyToArrayKernel &&) = default;
- /** Default destructor */
- ~CLCopyToArrayKernel() = default;
-
- /** Initialise the kernel.
- *
- * @param[in] input Source image. Data types supported: U8.
- * @param[in] update_number Flag to indicate whether we need to update the number of corners
- * @param[out] corners Array of keypoints to store the results.
- * @param[out] num_buffers Number of keypoints to store the results.
- */
- void configure(const ICLImage *input, bool update_number, ICLKeyPointArray *corners, cl::Buffer *num_buffers);
- /** Initialise the kernel.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source image. Data types supported: U8.
- * @param[in] update_number Flag to indicate whether we need to update the number of corners
- * @param[out] corners Array of keypoints to store the results.
- * @param[out] num_buffers Number of keypoints to store the results.
- */
- void configure(const CLCompileContext &compile_context, const ICLImage *input, bool update_number, ICLKeyPointArray *corners, cl::Buffer *num_buffers);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLImage *_input; /**< source image */
- ICLKeyPointArray *_corners; /**< destination array */
- cl::Buffer *_num_buffer; /**< CL memory to record number of key points in the array */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLFASTCORNERSKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLFillBorderKernel.h b/arm_compute/core/CL/kernels/CLFillBorderKernel.h
deleted file mode 100644
index d00ea55a83..0000000000
--- a/arm_compute/core/CL/kernels/CLFillBorderKernel.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLFILLBORDERKERNEL_H
-#define ARM_COMPUTE_CLFILLBORDERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/PixelValue.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for filling the border of a kernel */
-class CLFillBorderKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLFillBorderKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLFillBorderKernel(const CLFillBorderKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLFillBorderKernel &operator=(const CLFillBorderKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLFillBorderKernel(CLFillBorderKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLFillBorderKernel &operator=(CLFillBorderKernel &&) = default;
- /** Default destructor */
- ~CLFillBorderKernel() = default;
-
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in,out] tensor Tensor to process Data types supported: U8/QASYMM8/S8/QASYMM8_SIGNED/U16/S16/U32/S32/F16/F32.
- * @param[in] border_size Size of the border to fill in elements.
- * @param[in] border_mode Border mode to use for the convolution.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(ICLTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue());
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in,out] tensor Tensor to process Data types supported: U8/QASYMM8/S8/QASYMM8_SIGNED/U16/S16/U32/S32/F16/F32.
- * @param[in] border_size Size of the border to fill in elements.
- * @param[in] border_mode Border mode to use for the convolution.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(const CLCompileContext &compile_context, ICLTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue());
-
- /** Function to set the constant value on fill border kernel depending on type.
- *
- * @param[in] idx Index of the kernel argument to set.
- * @param[in] constant_border_value Constant value to use for borders if border_mode is set to CONSTANT.
- */
- template <class T>
- void set_constant_border(unsigned int idx, const PixelValue &constant_border_value);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- bool is_parallelisable() const override;
-
-private:
- ICLTensor *_tensor;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLFILLBORDERKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLFlattenLayerKernel.h b/arm_compute/core/CL/kernels/CLFlattenLayerKernel.h
deleted file mode 100644
index ab009e1aa8..0000000000
--- a/arm_compute/core/CL/kernels/CLFlattenLayerKernel.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLFLATTENLAYERKERNEL_H
-#define ARM_COMPUTE_CLFLATTENLAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL interface for the flatten kernel.*/
-class CLFlattenLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLFlattenLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLFlattenLayerKernel(const CLFlattenLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLFlattenLayerKernel &operator=(const CLFlattenLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLFlattenLayerKernel(CLFlattenLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLFlattenLayerKernel &operator=(CLFlattenLayerKernel &&) = default;
- /** Set the input and output of the kernel.
- *
- * @param[in] input First input tensor to flatten with at least 3 dimensions.
- * The dimensions above the third will be interpreted as batches. Data types supported: All.
- * @param[out] output Output tensor with shape [w*h*d, input_batches] where:
- * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input
- */
- void configure(const ICLTensor *input, ICLTensor *output);
- /** Set the input and output of the kernel.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input First input tensor to flatten with at least 3 dimensions.
- * The dimensions above the third will be interpreted as batches. Data types supported: All.
- * @param[out] output Output tensor with shape [w*h*d, input_batches] where:
- * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLFlattenLayerKernel
- *
- * @param[in] input First input tensor to flatten with at least 3 dimensions.
- * The dimensions above the third will be interpreted as batches. Data types supported: All.
- * @param[out] output Output tensor with shape [w*h*d, input_batches] where:
- * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-public:
- const ICLTensor *_input;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLFLATTENLAYERKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLFloorKernel.h b/arm_compute/core/CL/kernels/CLFloorKernel.h
deleted file mode 100644
index 4d1ed789db..0000000000
--- a/arm_compute/core/CL/kernels/CLFloorKernel.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLFLOORKERNEL_H
-#define ARM_COMPUTE_CLFLOORKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to perform a floor operation */
-class CLFloorKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLFloorKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLFloorKernel(const CLFloorKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLFloorKernel &operator=(const CLFloorKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLFloorKernel(CLFloorKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLFloorKernel &operator=(CLFloorKernel &&) = default;
- /** Default destructor */
- ~CLFloorKernel() = default;
- /** Set the source, destination of the kernel
- *
- * @param[in] input Source tensor. Data type supported: F16/F32.
- * @param[out] output Destination tensor. Same as @p input
- */
- void configure(const ICLTensor *input, ICLTensor *output);
-
- /** Set the source, destination of the kernel
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data type supported: F16/F32.
- * @param[out] output Destination tensor. Same as @p input
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
-
- /** Static function to check if given info will lead to a valid configuration of @ref CLFloorKernel
- *
- * @param[in] input Source tensor info. Data type supported: F16/F32.
- * @param[in] output Destination tensor info. Same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLFLOORKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLFuseBatchNormalizationKernel.h b/arm_compute/core/CL/kernels/CLFuseBatchNormalizationKernel.h
deleted file mode 100644
index 2fe6b223ca..0000000000
--- a/arm_compute/core/CL/kernels/CLFuseBatchNormalizationKernel.h
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLFUSEBATCHNORMALIZATIONKERNEL_H
-#define ARM_COMPUTE_CLFUSEBATCHNORMALIZATIONKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ICLTensor;
-
-/** OpenCL kernel to fuse the batch normalization node to a preceding convolution node */
-class CLFuseBatchNormalizationKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLFuseBatchNormalizationKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLFuseBatchNormalizationKernel(const CLFuseBatchNormalizationKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLFuseBatchNormalizationKernel &operator=(const CLFuseBatchNormalizationKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLFuseBatchNormalizationKernel(CLFuseBatchNormalizationKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLFuseBatchNormalizationKernel &operator=(CLFuseBatchNormalizationKernel &&) = default;
- /** Default destructor */
- ~CLFuseBatchNormalizationKernel() = default;
- /** Set the source, destination of the kernel
- *
- * @param[in] input_weights Input weights tensor for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC
- * @param[in] bn_mean Batch normalization layer mean tensor. Same as @p input_weights
- * @param[in] bn_var Batch normalization layer variance tensor. Same as @p input_weights
- * @param[out] fused_weights Output fused weights tensor. It can be a nullptr in case of in-place computation. Same as @p input_weights
- * @param[out] fused_bias Output fused bias tensor. It can be a nullptr in case of in-place computation and input_bias != nullptr. Same as @p input_weights
- * @param[in] input_bias (Optional) Input bias tensor for convolution or depthwise convolution layer. It can be a nullptr in case the bias tensor is not required. Same as @p input_weights
- * @param[in] bn_beta (Optional) Batch normalization layer beta tensor. It can be a nullptr in case the beta tensor is not required. Same as @p input_weights
- * @note if nullptr, bn_beta is set to 0.0
- * @param[in] bn_gamma (Optional) Batch normalization layer gamma tensor. It can be a nullptr in case the gamma tensor is not required. Same as @p input_weights
- * @note if nullptr, bn_gamma is set to 1.0
- * @param[in] epsilon (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f.
- * @param[in] fbn_type (Optional) Fused batch normalization type. Defaults to CONVOLUTION.
- */
- void configure(const ICLTensor *input_weights, const ICLTensor *bn_mean, const ICLTensor *bn_var, ICLTensor *fused_weights, ICLTensor *fused_bias,
- const ICLTensor *input_bias = nullptr, const ICLTensor *bn_beta = nullptr, const ICLTensor *bn_gamma = nullptr,
- float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION);
- /** Set the source, destination of the kernel
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input_weights Input weights tensor for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC
- * @param[in] bn_mean Batch normalization layer mean tensor. Same as @p input_weights
- * @param[in] bn_var Batch normalization layer variance tensor. Same as @p input_weights
- * @param[out] fused_weights Output fused weights tensor. It can be a nullptr in case of in-place computation. Same as @p input_weights
- * @param[out] fused_bias Output fused bias tensor. It can be a nullptr in case of in-place computation and input_bias != nullptr. Same as @p input_weights
- * @param[in] input_bias (Optional) Input bias tensor for convolution or depthwise convolution layer. It can be a nullptr in case the bias tensor is not required. Same as @p input_weights
- * @param[in] bn_beta (Optional) Batch normalization layer beta tensor. It can be a nullptr in case the beta tensor is not required. Same as @p input_weights
- * @note if nullptr, bn_beta is set to 0.0
- * @param[in] bn_gamma (Optional) Batch normalization layer gamma tensor. It can be a nullptr in case the gamma tensor is not required. Same as @p input_weights
- * @note if nullptr, bn_gamma is set to 1.0
- * @param[in] epsilon (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f.
- * @param[in] fbn_type (Optional) Fused batch normalization type. Defaults to CONVOLUTION.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input_weights, const ICLTensor *bn_mean, const ICLTensor *bn_var, ICLTensor *fused_weights, ICLTensor *fused_bias,
- const ICLTensor *input_bias = nullptr, const ICLTensor *bn_beta = nullptr, const ICLTensor *bn_gamma = nullptr,
- float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION);
- /** Static function to check if given info will lead to a valid configuration of @ref CLFuseBatchNormalizationKernel
- *
- * @param[in] input_weights Input weights tensor info for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC
- * @param[in] bn_mean Batch normalization layer mean tensor info. Same as @p input_weights
- * @param[in] bn_var Batch normalization layer variance tensor info. Same as @p input_weights
- * @param[in] fused_weights Output fused weights tensor info. It can be a nullptr in case of in-place computation. Same as @p input_weights
- * @param[in] fused_bias Output fused bias tensor info. It can be a nullptr in case of in-place computation and input_bias != nullptr. Same as @p input_weights
- * @param[in] input_bias (Optional) Input bias tensor info for convolution or depthwise convolution layer. It can be a nullptr in case the bias tensor is not required. Same as @p input_weights
- * @param[in] bn_beta (Optional) Batch normalization layer beta tensor info. It can be a nullptr in case the beta tensor is not required. Same as @p input_weights
- * @note if nullptr, bn_beta is set to 0.0
- * @param[in] bn_gamma (Optional) Batch normalization layer gamma tensor info. It can be a nullptr in case the gamma tensor is not required. Same as @p input_weights
- * @note if nullptr, bn_gamma is set to 1.0
- * @param[in] epsilon (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f.
- * @param[in] fbn_type (Optional) Fused batch normalization type. Defaults to CONVOLUTION.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input_weights, const ITensorInfo *bn_mean, const ITensorInfo *bn_var,
- const ITensorInfo *fused_weights, const ITensorInfo *fused_bias,
- const ITensorInfo *input_bias = nullptr, const ITensorInfo *bn_beta = nullptr, const ITensorInfo *bn_gamma = nullptr,
- float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input_weights;
- const ICLTensor *_input_bias;
- const ICLTensor *_bn_mean;
- const ICLTensor *_bn_var;
- const ICLTensor *_bn_gamma;
- const ICLTensor *_bn_beta;
- ICLTensor *_fused_weights;
- ICLTensor *_fused_bias;
- float _epsilon;
- bool _run_in_place_weights;
- bool _run_in_place_bias;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLFUSEBATCHNORMALIZATIONKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h
deleted file mode 100644
index 15fd20842e..0000000000
--- a/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2019-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYNATIVEKERNEL_H
-#define ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYNATIVEKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to multiply matrices with QASYMM8/QASYMM8_SIGNED data type */
-class CLGEMMLowpMatrixMultiplyNativeKernel : public ICLKernel
-{
-public:
- /** Default Constructor */
- CLGEMMLowpMatrixMultiplyNativeKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMLowpMatrixMultiplyNativeKernel(const CLGEMMLowpMatrixMultiplyNativeKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMLowpMatrixMultiplyNativeKernel &operator=(const CLGEMMLowpMatrixMultiplyNativeKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLGEMMLowpMatrixMultiplyNativeKernel(CLGEMMLowpMatrixMultiplyNativeKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLGEMMLowpMatrixMultiplyNativeKernel &operator=(CLGEMMLowpMatrixMultiplyNativeKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED
- * @param[in] input1 Input tensor containing the RHS matrix. Data type supported: same as @p input0
- * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: S32
- * @param[in] lhs_info LHS matrix information used to retrieve the number of rows to be processed by each thread
- * lhs_info.m0: 2,3,4,5,6,7,8
- * lhs_info.k0: 2,3,4,8,16
- * @param[in] rhs_info RHS matrix information used to retrieve the number of columns to be processed by each thread
- * rhs_info.n0: 2,3,4,8,16
- * rhs_info.k0: same as lhs_info.k0
- * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
- */
- void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const GEMMReshapeInfo &gemm_info);
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED
- * @param[in] input1 Input tensor containing the RHS matrix. Data type supported: same as @p input0
- * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: S32
- * @param[in] lhs_info LHS matrix information used to retrieve the number of rows to be processed by each thread
- * lhs_info.m0: 2,3,4,5,6,7,8
- * lhs_info.k0: 2,3,4,8,16
- * @param[in] rhs_info RHS matrix information used to retrieve the number of columns to be processed by each thread
- * rhs_info.n0: 2,3,4,8,16
- * rhs_info.k0: same as lhs_info.k0
- * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info,
- const GEMMReshapeInfo &gemm_info);
- /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixMultiplyNativeKernel
- *
- * @param[in] input0 Input tensor info for the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED
- * @param[in] input1 Input tensor info for the RHS matrix. Data type supported: same as @p input0
- * @param[in] output Output tensor info. Data type supported: S32
- * @param[in] lhs_info LHS matrix information used to retrieve the number of rows to be processed by each thread
- * lhs_info.m0: 2,3,4,5,6,7,8
- * lhs_info.k0: 2,3,4,8,16
- * @param[in] rhs_info RHS matrix information used to retrieve the number of columns to be processed by each thread
- * rhs_info.n0: 2,3,4,8,16
- * rhs_info.k0: same as lhs_info.k0
- * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info,
- const GEMMReshapeInfo &gemm_info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input0;
- const ICLTensor *_input1;
- ICLTensor *_output;
- bool _slide_matrix_b;
- bool _reinterpret_input_as_3d;
- bool _reinterpret_output_as_3d;
- bool _use_dummy_work_items;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYNATIVEKERNEL_H*/
diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h
deleted file mode 100644
index 43526b7c41..0000000000
--- a/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright (c) 2019-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYRESHAPEDKERNEL_H
-#define ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYRESHAPEDKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to multiply matrices when both the input matrices LHS (input0) and RHS (input1) have been reshaped
- *
- * @note The input matrices @p input0 and @p input1 must be reshaped through @ref CLGEMMReshapeLHSMatrixKernel and @ref CLGEMMReshapeRHSMatrixKernel
- */
-class CLGEMMLowpMatrixMultiplyReshapedKernel : public ICLKernel
-{
-public:
- /** Default Constructor */
- CLGEMMLowpMatrixMultiplyReshapedKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMLowpMatrixMultiplyReshapedKernel(const CLGEMMLowpMatrixMultiplyReshapedKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMLowpMatrixMultiplyReshapedKernel &operator=(const CLGEMMLowpMatrixMultiplyReshapedKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLGEMMLowpMatrixMultiplyReshapedKernel(CLGEMMLowpMatrixMultiplyReshapedKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLGEMMLowpMatrixMultiplyReshapedKernel &operator=(CLGEMMLowpMatrixMultiplyReshapedKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in] input0 Input tensor containing the LHS reshaped matrix. Data type supported: QASYMM8/QASYMM8_SIGNED. The number of dimensions for the LHS matrix must be less or equal than 4.
- * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3.
- * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: S32
- * @param[in] lhs_info LHS matrix information used for reshaping the input0 tensor. Only the following values are supported:
- * lhs_info.m0: 2,3,4,5,6,7,8
- * lhs_info.k0: 2,3,4,8,16
- * lhs_info.transpose: false
- * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported:
- * rhs_info.n0: 2,3,4,8,16
- * rhs_info.k0: same as lhs_info.k0
- * rhs_info.transpose: true
- * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
- *
- * @note lhs_info.k0 must be equal to rhs_info.k0
- */
- void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const GEMMReshapeInfo &gemm_info);
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input0 Input tensor containing the LHS reshaped matrix. Data type supported: QASYMM8/QASYMM8_SIGNED. The number of dimensions for the LHS matrix must be less or equal than 4.
- * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3.
- * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: S32
- * @param[in] lhs_info LHS matrix information used for reshaping the input0 tensor. Only the following values are supported:
- * lhs_info.m0: 2,3,4,5,6,7,8
- * lhs_info.k0: 2,3,4,8,16
- * lhs_info.transpose: false
- * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported:
- * rhs_info.n0: 2,3,4,8,16
- * rhs_info.k0: same as lhs_info.k0
- * rhs_info.transpose: true
- * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
- *
- * @note lhs_info.k0 must be equal to rhs_info.k0
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info,
- const GEMMReshapeInfo &gemm_info);
- /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixMultiplyReshapedKernel
- *
- * @param[in] input0 Input tensor info containing the LHS reshaped matrix. Data type supported: QASYMM8/QASYMM8_SIGNED. The number of dimensions for the LHS matrix must be less or equal than 4.
- * @param[in] input1 Input tensor info containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3.
- * @param[in] output Output tensor info. Data type supported: S32
- * @param[in] lhs_info LHS matrix information used for reshaping the input0 tensor. Only the following values are supported:
- * lhs_info.m0: 2,3,4,5,6,7,8
- * lhs_info.k0: 2,3,4,8,16
- * lhs_info.transpose: false
- * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported:
- * rhs_info.n0: 2,3,4,8,16
- * rhs_info.k0: 2,3,4,8,16
- * rhs_info.transpose: true
- * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
- *
- * @note lhs_info.k0 must be equal to rhs_info.k0
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info,
- const GEMMReshapeInfo &gemm_info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input0;
- const ICLTensor *_input1;
- ICLTensor *_output;
- bool _slide_matrix_b;
- bool _reinterpret_output_as_3d;
- unsigned int _k;
- bool _use_dummy_work_items;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYRESHAPEDKERNEL_H*/
diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h
deleted file mode 100644
index 1aba6c0398..0000000000
--- a/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * Copyright (c) 2019-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H
-#define ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/KernelDescriptors.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to multiply matrices with QASYMM8 data type when only the input matrix RHS (input1) has been reshaped
- *
- * @note The input matrix input1 must be reshaped through @ref CLGEMMReshapeRHSMatrixKernel
- * @note For fused output stage, only GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT type is supported
- */
-class CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel : public ICLKernel
-{
-public:
- /** Default Constructor */
- CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel(const CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel &operator=(const CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel(CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel &operator=(CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED
- * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL
- * @param[out] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/S32.
- * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices, output stage information and RHS/LHS info.
- * Only the following values are supported for LHS info:
- * lhs_info.m0: 2,3,4,5,6,7,8
- * lhs_info.k0: 2,3,4,8,16
- * Only the following values are supported for RHS info:
- * rhs_info.n0: 2,3,4,8,16
- * rhs_info.k0: same as lhs_info.k0
- * rhs_info.transpose: true
- * @param[in] vector_sum_col (Optional) Input row-vector of sums of all the entries in each column of matrix B.
- * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: S32
- * @param[in] vector_sum_row (Optional) Input row-vector of sums of all the entries in each row of matrix A.
- * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: S32
- * @param[in] bias (Optional) Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: S32.
- * @param[in] output_multipliers (Optional) Output multipliers tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM).
- * Supported data types: S32.
- * @param[in] output_shifts (Optional) Output shifts tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM).
- * Supported data types: S32.
- */
- void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMKernelInfo &gemm_info, const ICLTensor *vector_sum_col = nullptr,
- const ICLTensor *vector_sum_row = nullptr, const ICLTensor *bias = nullptr, const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr);
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED
- * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0
- * @param[out] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/S32.
- * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices, output stage information and RHS/LHS info.
- * Only the following values are supported for LHS info:
- * lhs_info.m0: 2,3,4,5,6,7,8
- * lhs_info.k0: 2,3,4,8,16
- * Only the following values are supported for RHS info:
- * rhs_info.n0: 2,3,4,8,16
- * rhs_info.k0: same as lhs_info.k0
- * rhs_info.transpose: true
- * @param[in] vector_sum_col (Optional) Input row-vector of sums of all the entries in each column of matrix B.
- * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: S32
- * @param[in] vector_sum_row (Optional) Input row-vector of sums of all the entries in each row of matrix A.
- * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: S32
- * @param[in] bias (Optional) Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: S32.
- * @param[in] output_multipliers (Optional) Output multipliers tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM).
- * Supported data types: S32.
- * @param[in] output_shifts (Optional) Output shifts tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM).
- * Supported data types: S32.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMKernelInfo &gemm_info, const ICLTensor *vector_sum_col = nullptr,
- const ICLTensor *vector_sum_row = nullptr, const ICLTensor *bias = nullptr, const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr);
- /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel
- *
- * @param[in] input0 Input tensor info for the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED
- * @param[in] input1 Input tensor info for the RHS reshaped matrix. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL
- * @param[in] output Output tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/S32.
- * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices, output stage information and RHS/LHS info.
- * Only the following values are supported for LHS info:
- * lhs_info.m0: 2,3,4,5,6,7,8
- * lhs_info.k0: 2,3,4,8,16
- * Only the following values are supported for RHS info:
- * rhs_info.n0: 2,3,4,8,16
- * rhs_info.k0: same as lhs_info.k0
- * rhs_info.transpose: true
- * @param[in] vector_sum_col (Optional) Input row-vector info of sums of all the entries in each column of matrix B.
- * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: S32
- * @param[in] vector_sum_row (Optional) Input row-vector info of sums of all the entries in each row of matrix A.
- * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: S32
- * @param[in] bias (Optional) Biases tensor info. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: S32.
- * @param[in] output_multipliers (Optional) Output multipliers tensor info. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM).
- * Supported data types: S32.
- * @param[in] output_shifts (Optional) Output shifts tensor info. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM).
- * Supported data types: S32.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, const GEMMKernelInfo &gemm_info, const ITensorInfo *vector_sum_col = nullptr,
- const ITensorInfo *vector_sum_row = nullptr, const ITensorInfo *bias = nullptr, const ITensorInfo *output_multipliers = nullptr,
- const ITensorInfo *output_shifts = nullptr);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input0;
- const ICLTensor *_input1;
- ICLTensor *_output;
- const ICLTensor *_vector_sum_col;
- const ICLTensor *_vector_sum_row;
- const ICLTensor *_bias;
- const ICLTensor *_output_multipliers;
- const ICLTensor *_output_shifts;
- bool _slide_matrix_b;
- bool _reinterpret_input_as_3d;
- bool _reinterpret_output_as_3d;
- bool _use_dummy_work_items;
- bool _is_quantized_per_channel;
- bool _fuse_output_stage;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H */ \ No newline at end of file
diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h
deleted file mode 100644
index bc982c6120..0000000000
--- a/arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMLOWPOFFSETCONTRIBUTIONKERNEL_H
-#define ARM_COMPUTE_CLGEMMLOWPOFFSETCONTRIBUTIONKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel used to add the offset contribution after the matrix multiplication. The computation is performed in-place
- *
- * This kernel takes a final int32 accumulator value (the output of the matrix multiplication),
- * and adds to it the offset contribution of matrix A and matrix B in-place.
- *
- * The final result is:
- *
- * mm_result[i][k] = mm_result[i][k] +
- * (vector_sum_col[k] * a_offset) +
- * (vector_sum_row[i] * b_offset) +
- * (a_offset * b_offset * k)
- *
- */
-class CLGEMMLowpOffsetContributionKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLGEMMLowpOffsetContributionKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- CLGEMMLowpOffsetContributionKernel(const CLGEMMLowpOffsetContributionKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- CLGEMMLowpOffsetContributionKernel &operator=(const CLGEMMLowpOffsetContributionKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLGEMMLowpOffsetContributionKernel(CLGEMMLowpOffsetContributionKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLGEMMLowpOffsetContributionKernel &operator=(CLGEMMLowpOffsetContributionKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in, out] mm_result Input tensor containing the result of the matrix multiplication. Data type supported: S32
- * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B.
- * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result
- * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A.
- * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[in] k Number of matrix A columns or Matrix B rows
- * @param[in] a_offset Offset to be added to each element of the matrix A.
- * @param[in] b_offset Offset to be added to each element of the matrix B.
- */
- void configure(ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, int32_t k, int32_t a_offset, int32_t b_offset);
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in, out] mm_result Input tensor containing the result of the matrix multiplication. Data type supported: S32
- * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B.
- * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result
- * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A.
- * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[in] k Number of matrix A columns or Matrix B rows
- * @param[in] a_offset Offset to be added to each element of the matrix A.
- * @param[in] b_offset Offset to be added to each element of the matrix B.
- */
- void configure(const CLCompileContext &compile_context, ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, int32_t k, int32_t a_offset,
- int32_t b_offset);
- /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpOffsetContributionKernel
- *
- * @param[in] mm_result Input tensor containing the result of @ref CLGEMMLowpOffsetContributionKernel. Data type supported: S32
- * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B.
- * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result
- * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A.
- * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[in] a_offset Offset to be added to each element of the matrix A.
- * @param[in] b_offset Offset to be added to each element of the matrix B.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *mm_result, const ITensorInfo *vector_sum_col, const ITensorInfo *vector_sum_row, const ITensorInfo *bias, int32_t a_offset, int32_t b_offset);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_vector_sum_col;
- const ICLTensor *_vector_sum_row;
- ICLTensor *_mm_result;
- const ICLTensor *_bias;
-};
-} // namespace arm_compute
-
-#endif /* ARM_COMPUTE_CLGEMMLOWPOFFSETCONTRIBUTIONKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h
deleted file mode 100644
index 583b388d45..0000000000
--- a/arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMLOWPOFFSETCONTRIBUTIONOUTPUTSTAGEKERNEL_H
-#define ARM_COMPUTE_CLGEMMLOWPOFFSETCONTRIBUTIONOUTPUTSTAGEKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel used to add the offset contribution after the matrix multiplication and perform the output stage.
- *
- * This kernel takes a final int32 accumulator value (the output of the matrix multiplication), adds to it the offset contribution
- * of matrix A and matrix B and performs the output stage defined by the output_stage argument
- *
- * @note For quantized computations the output data type for auto-initialization must be passed as part of the @ref GEMMLowpOutputStageInfo.
- */
-class CLGEMMLowpOffsetContributionOutputStageKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLGEMMLowpOffsetContributionOutputStageKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- CLGEMMLowpOffsetContributionOutputStageKernel(const CLGEMMLowpOffsetContributionOutputStageKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- CLGEMMLowpOffsetContributionOutputStageKernel &operator=(const CLGEMMLowpOffsetContributionOutputStageKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLGEMMLowpOffsetContributionOutputStageKernel(CLGEMMLowpOffsetContributionOutputStageKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLGEMMLowpOffsetContributionOutputStageKernel &operator=(CLGEMMLowpOffsetContributionOutputStageKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in] mm_result Input tensor containing the result of the matrix multiplication. Data type supported: S32
- * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B.
- * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result
- * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A.
- * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[out] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED.
- * @param[in] k Number of matrix A columns or Matrix B rows
- * @param[in] a_offset Offset to be added to each element of the matrix A.
- * @param[in] b_offset Offset to be added to each element of the matrix B.
- * @param[in] output_stage GEMMLowp output stage info
- * @param[in] output_multipliers Output multipliers tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM).
- * Supported data types: S32
- * @param[in] output_shifts Output shifts tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM).
- * Supported data types: S32
- */
- void configure(const ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, ICLTensor *output, int32_t k, int32_t a_offset, int32_t b_offset,
- const GEMMLowpOutputStageInfo &output_stage, const ICLTensor *output_multipliers, const ICLTensor *output_shifts);
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] mm_result Input tensor containing the result of the matrix multiplication. Data type supported: S32
- * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B.
- * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result
- * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A.
- * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[out] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED.
- * @param[in] k Number of matrix A columns or Matrix B rows
- * @param[in] a_offset Offset to be added to each element of the matrix A.
- * @param[in] b_offset Offset to be added to each element of the matrix B.
- * @param[in] output_stage GEMMLowp output stage info
- * @param[in] output_multipliers Output multipliers tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM).
- * Supported data types: S32
- * @param[in] output_shifts Output shifts tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM).
- * Supported data types: S32
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, ICLTensor *output, int32_t k,
- int32_t a_offset, int32_t b_offset,
- const GEMMLowpOutputStageInfo &output_stage, const ICLTensor *output_multipliers, const ICLTensor *output_shifts);
- /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpOffsetContributionKernel
- *
- * @param[in] mm_result Input tensor containing the result of @ref CLGEMMLowpOffsetContributionKernel. Data type supported: S32
- * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B.
- * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result
- * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A.
- * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[in] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED.
- * @param[in] a_offset Offset to be added to each element of the matrix A.
- * @param[in] b_offset Offset to be added to each element of the matrix B.
- * @param[in] output_stage GEMMLowp output stage info
- * @param[in] output_multipliers Output multipliers tensor info. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM).
- * Supported data types: S32
- * @param[in] output_shifts Output shifts tensor info. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM).
- * Supported data types: S32
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *mm_result, const ITensorInfo *vector_sum_col, const ITensorInfo *vector_sum_row, const ITensorInfo *bias, const ITensorInfo *output, int32_t a_offset,
- int32_t b_offset, const GEMMLowpOutputStageInfo &output_stage, const ITensorInfo *output_multipliers, const ITensorInfo *output_shifts);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_mm_result;
- const ICLTensor *_vector_sum_col;
- const ICLTensor *_vector_sum_row;
- const ICLTensor *_bias;
- ICLTensor *_output;
- const ICLTensor *_output_multipliers;
- const ICLTensor *_output_shifts;
- bool _is_quantized_per_channel;
-};
-} // namespace arm_compute
-
-#endif /* ARM_COMPUTE_CLGEMMLOWPOFFSETCONTRIBUTIONOUTPUTSTAGEKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h
deleted file mode 100644
index 1e9fde8376..0000000000
--- a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright (c) 2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEBYFLOATKERNEL_H
-#define ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEBYFLOATKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ICLTensor;
-
-/** OpenCL kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8/QASYMM8_SIGNED
- *
- * This kernel takes a final int32 accumulator value (the output of the matrix multiplication), and processes it to obtain the final QASYMM8/QASYMM8_SIGNED value.
- * The following computations will be performed by the kernel:
- *
- * -# Compute fixed point multiplication between each entry of input by result_fixedpoint_multiplier
- * -# Add bias to final result if bias tensor is not a nullptr
- * -# Requantize
- * -# Add offset to each result
- * -# Clamp the value between the specified min and max bounds
- * -# Clamp the resulting int32 values to
- * - to the [0..255] range and cast to QASYMM8.
- * - to the [-128..127] range and cast to QASYMM8_SIGNED.
- */
-class CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel(const CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel &operator=(const CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel(CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel &operator=(CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED
- * @param[in] info Output stage info. Used to pass the quantized output data type
- */
- void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *info);
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED
- * @param[in] info Output stage info. Used to pass the quantized output data type
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *info);
- /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel
- *
- * @param[in] input Input tensor. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED
- * @param[in] info Output stage info. Used to pass the quantized output data type
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- const ICLTensor *_bias;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEBYFLOATKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h
deleted file mode 100644
index 766ef9a820..0000000000
--- a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEKERNEL_H
-#define ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8/QASYMM8_SIGNED
- *
- * This kernel takes a final int32 accumulator value (the output of the matrix multiplication), and processes it to obtain the final QASYMM8/QASYMM8_SIGNED value.
- * The following computations will be performed by the kernel:
- *
- * -# Add offset terms to final result
- * -# Multiply each entry of result by result_mult_int
- * -# Add bias to final result if bias tensor is not a nullptr
- * -# Shift the int32 accumulator by result_shift
- * -# Clamp the value between the specified min and max bounds
- * -# Clamp the resulting int32 values:
- * -# -to the [0..255] range and cast to QASYMM8.
- * -# -to the [-128..127] range and cast to QASYMM8/SIGNED.
- *
- */
-class CLGEMMLowpQuantizeDownInt32ScaleKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLGEMMLowpQuantizeDownInt32ScaleKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- CLGEMMLowpQuantizeDownInt32ScaleKernel(const CLGEMMLowpQuantizeDownInt32ScaleKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- CLGEMMLowpQuantizeDownInt32ScaleKernel &operator=(const CLGEMMLowpQuantizeDownInt32ScaleKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLGEMMLowpQuantizeDownInt32ScaleKernel(CLGEMMLowpQuantizeDownInt32ScaleKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLGEMMLowpQuantizeDownInt32ScaleKernel &operator=(CLGEMMLowpQuantizeDownInt32ScaleKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED
- * @param[in] output_stage GEMMLowp output stage metadata.
- */
- void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *output_stage);
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED
- * @param[in] output_stage GEMMLowp output stage metadata.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *output_stage);
- /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ScaleKernel
- *
- * @param[in] input Input tensor. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED
- * @param[in] output_stage GEMMLowp output stage metadata.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *output_stage);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- const ICLTensor *_bias;
- ICLTensor *_output;
- const GEMMLowpOutputStageInfo *_output_stage;
-};
-} // namespace arm_compute
-
-#endif /* ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEKERNEL_H */ \ No newline at end of file
diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h
deleted file mode 100644
index 6f58150037..0000000000
--- a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2019-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32TOINT16SCALEBYFIXEDPOINTKERNEL_H
-#define ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32TOINT16SCALEBYFIXEDPOINTKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** CL kernel used to quantize down the int32 accumulator values of GEMMLowp to QSYMM16
- *
- * This kernel takes a final int32 accumulator value (the output of the matrix multiplication), and processes it to obtain the final QSYMM16 value.
- * The following computations will be performed by the kernel:
- *
- * -# Compute fixed point multiplication between each entry of input by result_fixedpoint_multiplier
- * -# Add bias to final result if bias tensor is not a nullptr
- * -# Round to nearest division by a power-of-two using result_shift
- * -# Clamp the value between the specified min and max bounds
- * -# Clamp the resulting int32 values to the [-32768, 32767] range and cast to QSYMM16.
- *
- */
-class CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel(const CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &operator=(const CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel(CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &operator=(CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[out] output Output tensor. Data type supported: Data type supported: QSYMM16
- * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
- * @param[in] result_shift Integer value used to round to nearest division by a power-of-two the result after the fixed point multiplication
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to 0.
- * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16.
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to 0.
- */
- void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int min = 0, int max = 0);
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[out] output Output tensor. Data type supported: Data type supported: QSYMM16
- * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
- * @param[in] result_shift Integer value used to round to nearest division by a power-of-two the result after the fixed point multiplication
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to 0.
- * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16.
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to 0.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int min = 0, int max = 0);
- /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel
- *
- * @param[in] input Input tensor info. Data type supported: S32
- * @param[in] bias Biases tensor info. Only shared biases supported and it can be a nullptr if the biases addition is not required.
- * Biases are 1D tensor info with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[in] output Output tensor info. Data type supported: Data type supported: QSYMM16
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to 0.
- * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to 0.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- const ICLTensor *_bias;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32TOINT16SCALEBYFIXEDPOINTKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h
deleted file mode 100644
index 0c237be34c..0000000000
--- a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Copyright (c) 2019-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32TOINT8SCALEBYFIXEDPOINTKERNEL_H
-#define ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32TOINT8SCALEBYFIXEDPOINTKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8_SIGNED
- *
- * This kernel takes a final int32 accumulator value (the output of the matrix multiplication), and processes it to obtain the final QASYMM8_SIGNED value.
- * The following computations will be performed by the kernel:
- *
- * -# Compute fixed point multiplication between each entry of input by result_fixedpoint_multiplier
- * -# Add bias to final result if bias tensor is not a nullptr
- * -# Round to nearest division by a power-of-two using result_shift
- * -# Add offset to each result
- * -# Clamp the value between the specified min and max bounds
- * -# Clamp the resulting int32 values to the [-128..127] range and cast to QASYMM8_SIGNED.
- */
-class CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel(const CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &operator=(const CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel(CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &operator=(CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8_SIGNED
- * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
- * @param[in] result_shift Integer value used to round to nearest division by a power-of-two the result after the fixed point multiplication
- * @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8_SIGNED
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to 0
- * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED. Defaults to 0
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions
- */
- void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift,
- int min = 0, int max = 0);
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8_SIGNED
- * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
- * @param[in] result_shift Integer value used to round to nearest division by a power-of-two the result after the fixed point multiplication
- * @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8_SIGNED
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to 0
- * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED. Defaults to 0
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift,
- int min = 0, int max = 0);
- /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel
- *
- * @param[in] input Input tensor. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8_SIGNED
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED
- * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- const ICLTensor *_bias;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32TOINT8SCALEBYFIXEDPOINTKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h
deleted file mode 100644
index cb3e12e34d..0000000000
--- a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32TOUINT8SCALEBYFIXEDPOINTKERNEL_H
-#define ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32TOUINT8SCALEBYFIXEDPOINTKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8
- *
- * This kernel takes a final int32 accumulator value (the output of the matrix multiplication), and processes it to obtain the final QASYMM8 value.
- * The following computations will be performed by the kernel:
- *
- * -# Compute fixed point multiplication between each entry of input by result_fixedpoint_multiplier
- * -# Add bias to final result if bias tensor is not a nullptr
- * -# Round to nearest division by a power-of-two using result_shift
- * -# Add offset to each result
- * -# Clamp the value between the specified min and max bounds
- * -# Clamp the resulting int32 values to the [0..255] range and cast to QASYMM8.
- */
-class CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel(const CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &operator=(const CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel(CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &operator=(CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8
- * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
- * @param[in] result_shift Integer value used to round to nearest division by a power-of-two the result after the fixed point multiplication
- * @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8
- * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions
- */
- void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift,
- int min = 0, int max = 0);
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8
- * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
- * @param[in] result_shift Integer value used to round to nearest division by a power-of-two the result after the fixed point multiplication
- * @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8
- * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift,
- int min = 0, int max = 0);
- /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel
- *
- * @param[in] input Input tensor. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8
- * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- const ICLTensor *_bias;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32TOUINT8SCALEBYFIXEDPOINTKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h
deleted file mode 100644
index 857b1c7952..0000000000
--- a/arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMLOWREDUCTIONKERNEL_H
-#define ARM_COMPUTE_CLGEMMLOWREDUCTIONKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-struct GEMMLowpReductionKernelInfo;
-
-/** Common interface for all OpenCL reduction kernels */
-class ICLGEMMLowpReductionKernel : public ICLKernel
-{
-public:
- /** Constructor */
- ICLGEMMLowpReductionKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- ICLGEMMLowpReductionKernel(const ICLGEMMLowpReductionKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- ICLGEMMLowpReductionKernel &operator=(const ICLGEMMLowpReductionKernel &) = delete;
- /** Allow instances of this class to be moved */
- ICLGEMMLowpReductionKernel(ICLGEMMLowpReductionKernel &&) = default;
- /** Allow instances of this class to be moved */
- ICLGEMMLowpReductionKernel &operator=(ICLGEMMLowpReductionKernel &&) = default;
-
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor. Data type supported: S8
- * @param[out] output Output row-vector of sums of all the entries in each row/col of input tensor. Data type supported: S32
- * @param[in] info Kernel metadata:
- * - k Number of matrix columns/rows depending on the type of reduction.
- * - is_reshaped True if the matrix has been reshaped.
- * - scalar Scalar value to multiply each reduced column/row by.
- * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value.
- */
- virtual void configure(const ICLTensor *input, ICLTensor *output, const GEMMLowpReductionKernelInfo &info) = 0;
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor. Data type supported: S8
- * @param[out] output Output row-vector of sums of all the entries in each row/col of input tensor. Data type supported: S32
- * @param[in] info Kernel metadata:
- * - k Number of matrix columns/rows depending on the type of reduction.
- * - is_reshaped True if the matrix has been reshaped.
- * - scalar Scalar value to multiply each reduced column/row by.
- * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value.
- */
- virtual void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const GEMMLowpReductionKernelInfo &info) = 0;
-
-protected:
- const ICLTensor *_input;
- ICLTensor *_output;
-};
-
-/** OpenCL kernel used to compute the row-vectors of sums of all the entries in each row of Matrix A.
- *
- * @note This stage is needed to handle the offset of matrix product
- * https://github.com/google/gemmlowp/blob/master/doc/low-precision.md
- */
-class CLGEMMLowpMatrixAReductionKernel : public ICLGEMMLowpReductionKernel
-{
-public:
- /** Initialise the kernel's input and output.
- *
- * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED
- * @param[out] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32
- * @param[in] info Kernel metadata:
- * - k Number of matrix columns/rows depending on the type of reduction.
- * - is_reshaped True if the matrix has been reshaped.
- * - scalar Scalar value to multiply each reduced column/row by.
- * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value.
- */
- void configure(const ICLTensor *mtx_a, ICLTensor *vector_sum_row, const GEMMLowpReductionKernelInfo &info) override;
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED
- * @param[out] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32
- * @param[in] info Kernel metadata:
- * - k Number of matrix columns/rows depending on the type of reduction.
- * - is_reshaped True if the matrix has been reshaped.
- * - scalar Scalar value to multiply each reduced column/row by.
- * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *mtx_a, ICLTensor *vector_sum_row, const GEMMLowpReductionKernelInfo &info) override;
- /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixAReductionKernel
- *
- * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED
- * @param[in] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32
- * @param[in] info Kernel metadata:
- * - k Number of matrix columns/rows depending on the type of reduction.
- * - is_reshaped True if the matrix has been reshaped.
- * - scalar Scalar value to multiply each reduced column/row by.
- * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *mtx_a, const ITensorInfo *vector_sum_row, const GEMMLowpReductionKernelInfo &info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-};
-
-/** OpenCL kernel used to compute the row-vectors of sums of all the entries in each column of Matrix B.
- *
- * @note This stage is needed to handle the offset of matrix product
- * https://github.com/google/gemmlowp/blob/master/doc/low-precision.md
- */
-class CLGEMMLowpMatrixBReductionKernel : public ICLGEMMLowpReductionKernel
-{
-public:
- /** Initialise the kernel's input and output.
- *
- * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED
- * @param[out] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32
- * @param[in] info Kernel metadata:
- * - k Number of matrix columns/rows depending on the type of reduction.
- * - is_reshaped True if the matrix has been reshaped.
- * - scalar Scalar value to multiply each reduced column/row by.
- * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value.
- */
- void configure(const ICLTensor *mtx_b, ICLTensor *vector_sum_col, const GEMMLowpReductionKernelInfo &info) override;
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED
- * @param[out] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32
- * @param[in] info Kernel metadata:
- * - k Number of matrix columns/rows depending on the type of reduction.
- * - is_reshaped True if the matrix has been reshaped.
- * - scalar Scalar value to multiply each reduced column/row by.
- * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *mtx_b, ICLTensor *vector_sum_col, const GEMMLowpReductionKernelInfo &info) override;
- /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixBReductionKernel
- *
- * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED
- * @param[in] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32
- * @param[in] info Kernel metadata:
- * - k Number of matrix columns/rows depending on the type of reduction.
- * - is_reshaped True if the matrix has been reshaped.
- * - scalar Scalar value to multiply each reduced column/row by.
- * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *mtx_b, const ITensorInfo *vector_sum_col, const GEMMLowpReductionKernelInfo &info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLGEMMLOWREDUCTIONKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h
deleted file mode 100644
index df2f6f4ad1..0000000000
--- a/arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMMATRIXACCUMULATEBIASESKERNEL_H
-#define ARM_COMPUTE_CLGEMMMATRIXACCUMULATEBIASESKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-/** Interface to add a bias to each row of the input tensor
- *
- */
-class CLGEMMMatrixAccumulateBiasesKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLGEMMMatrixAccumulateBiasesKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMMatrixAccumulateBiasesKernel(const CLGEMMMatrixAccumulateBiasesKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMMatrixAccumulateBiasesKernel &operator=(const CLGEMMMatrixAccumulateBiasesKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLGEMMMatrixAccumulateBiasesKernel(CLGEMMMatrixAccumulateBiasesKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLGEMMMatrixAccumulateBiasesKernel &operator=(CLGEMMMatrixAccumulateBiasesKernel &&) = default;
- /** Set the accumulate buffer and the biases of the kernel.
- *
- * @param[in, out] accum The accumulate tensor to convert. Data types supported: F16/F32
- * @param[in] biases The shared biases tensor to append. It must be 1D tensor. Data types supported: Same as @p input
- */
- void configure(ICLTensor *accum, const ICLTensor *biases);
- /** Set the accumulate buffer and the biases of the kernel.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in, out] accum The accumulate tensor to convert. Data types supported: F16/F32
- * @param[in] biases The shared biases tensor to append. It must be 1D tensor. Data types supported: Same as @p input
- */
- void configure(const CLCompileContext &compile_context, ICLTensor *accum, const ICLTensor *biases);
- /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixAccumulateBiasesKernel
- *
- * @param[in] accum The accumulate tensor to convert. Data types supported: F16/F32
- * @param[in] biases The shared biases tensor to append. It must be 1D tensor. Data types supported: Same as @p input
- * @param[in] gpu_target GPU target
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *accum, const ITensorInfo *biases, GPUTarget gpu_target);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- ICLTensor *_accum;
- const ICLTensor *_biases;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLGEMMMATRIXACCUMULATEBIASESKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h
deleted file mode 100644
index 6085b34bcb..0000000000
--- a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMMATRIXMULTIPLYKERNEL_H
-#define ARM_COMPUTE_CLGEMMMATRIXMULTIPLYKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to multiply two input matrices "A" and "B" and add a martix "C" if provided. All elements of the output matrix will be multiplied by alpha. In case matrix C is passed, it will be added to the previous result.
- * For the matrix C, the broadcast addition is supported if the flag "broadcast_bias" is set in the GEMMReshapeInfo object
- *
- * @note If the input tensors @p input0 and @p input1 have been reshaped respectively with @ref CLGEMMReshapeLHSMatrixKernel" and @ref CLGEMMReshapeRHSMatrixKernel,
- * the flag @p is_interleaved_transposed must be set to true
- *
- * @attention @p input1 tensor must have at least 2 dimensions (matrix)
- *
- */
-class CLGEMMMatrixMultiplyKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLGEMMMatrixMultiplyKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMMatrixMultiplyKernel(const CLGEMMMatrixMultiplyKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMMatrixMultiplyKernel &operator=(const CLGEMMMatrixMultiplyKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLGEMMMatrixMultiplyKernel(CLGEMMMatrixMultiplyKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLGEMMMatrixMultiplyKernel &operator=(CLGEMMMatrixMultiplyKernel &&) = default;
- /** Initialise the kernel's input, output and alpha
- *
- * @param[in] input0 Input tensor containing the Matrix A. Data types supported: F16/F32
- * @param[in] input1 Input tensor containing the Matrix B. Data type supported: same as @p input0
- * @param[in] input2 Input tensor containing the Matrix C (bias). Can be nullptr. Data type supported: same as @p input0
- * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
- * @param[in] alpha Weight of the matrix product
- * @param[in] beta (Optional) Weight of vector C. Default value is 0. Only beta = 1 is currently supported.
- * @param[in] is_interleaved_transposed (Optional) True if input0 and input1 have been reshaped respectively using @ref CLGEMMReshapeLHSMatrixKernel and @ref CLGEMMReshapeRHSMatrixKernel
- * @param[in] reshape_info (Optional) GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped
- * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy
- * @param[in] activation_info (Optional) Activation to apply after the matrix multiplication
- *
- */
- void configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta = 0.f,
- bool is_interleaved_transposed = true, const GEMMReshapeInfo &reshape_info = GEMMReshapeInfo(), bool fp_mixed_precision = false, const ActivationLayerInfo &activation_info = ActivationLayerInfo());
- /** Initialise the kernel's input, output and alpha
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input0 Input tensor containing the Matrix A. Data types supported: F16/F32
- * @param[in] input1 Input tensor containing the Matrix B. Data type supported: same as @p input0
- * @param[in] input2 Input tensor containing the Matrix C (bias). Can be nullptr. Data type supported: same as @p input0
- * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
- * @param[in] alpha Weight of the matrix product
- * @param[in] beta (Optional) Weight of vector C. Default value is 0. Only beta = 1 is currently supported.
- * @param[in] is_interleaved_transposed (Optional) True if input0 and input1 have been reshaped respectively using @ref CLGEMMReshapeLHSMatrixKernel and @ref CLGEMMReshapeRHSMatrixKernel
- * @param[in] reshape_info (Optional) GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped
- * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy
- * @param[in] activation_info (Optional) Activation to apply after the matrix multiplication
- *
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta = 0.f,
- bool is_interleaved_transposed = true, const GEMMReshapeInfo &reshape_info = GEMMReshapeInfo(), bool fp_mixed_precision = false, const ActivationLayerInfo &activation_info = ActivationLayerInfo());
- /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyKernel
- *
- * @param[in] input0 Input tensor containing the Matrix A info. Data types supported: F16/F32
- * @param[in] input1 Input tensor containing the Matrix B info. Data type supported: same as @p input0
- * @param[in] input2 Input tensor containing the Matrix C (bias) info. Can be nullptr. Data type supported: same as @p input0
- * @param[in] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
- * @param[in] alpha Weight of the matrix product
- * @param[in] beta Weight of vector C. Default value is 0. Only beta = 1 is currently supported.
- * @param[in] is_interleaved_transposed True if input0 and input1 have been reshaped respectively using @ref CLGEMMReshapeLHSMatrixKernel and @ref CLGEMMReshapeRHSMatrixKernel
- * @param[in] reshape_info GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped
- * @param[in] gpu_target GPU Target
- * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy
- * @param[in] activation_info (Optional) Activation to apply after the matrix multiplication
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float alpha, float beta,
- bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info, GPUTarget gpu_target, bool fp_mixed_precision = false, const ActivationLayerInfo &activation_info = ActivationLayerInfo());
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-public:
- const ICLTensor *_input0;
- const ICLTensor *_input1;
- const ICLTensor *_input2;
- ICLTensor *_output;
- bool _slide_matrix_b;
- bool _reinterpret_input_as_3d;
- bool _reinterpret_output_as_3d;
- bool _add_bias;
- bool _broadcast_bias;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLGEMMMATRIXMULTIPLYKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h
deleted file mode 100644
index c711a3d1f9..0000000000
--- a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * Copyright (c) 2019-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMMATRIXMULTIPLYNATIVEKERNEL_H
-#define ARM_COMPUTE_CLGEMMMATRIXMULTIPLYNATIVEKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-#include "arm_compute/core/KernelDescriptors.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to multiply matrices when neither of the input matrices have been reshaped */
-class CLGEMMMatrixMultiplyNativeKernel : public ICLKernel
-{
-public:
- /** Default Constructor */
- CLGEMMMatrixMultiplyNativeKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMMatrixMultiplyNativeKernel(const CLGEMMMatrixMultiplyNativeKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMMatrixMultiplyNativeKernel &operator=(const CLGEMMMatrixMultiplyNativeKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLGEMMMatrixMultiplyNativeKernel(CLGEMMMatrixMultiplyNativeKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLGEMMMatrixMultiplyNativeKernel &operator=(CLGEMMMatrixMultiplyNativeKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in] input0 Input tensor for the LHS matrix. Data type supported: F32. The number of dimensions for the LHS matrix must be less or equal than 4.
- * @param[in] input1 Input tensor for the RHS matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3.
- * @param[in] input2 Input tensor containing the bias matrix. Data type supported: same as @p input0.
- * @param[out] output Output tensor info. Data type supported: same as @p input0
- * @param[in] alpha Weight of the matrix product
- * @param[in] beta Weight of the matrix bias
- * @param[in] lhs_info LHS matrix information used to retrieve the number of rows and accumulations to be processed by each thread. Only the following values are supported:
- * lhs_info.m0: 1,2,3,4,5,6,7,8
- * lhs_info.k0: 2,3,4,8,16
- * @param[in] rhs_info RHS matrix information used to retrieve the number of columns and accumulations to be processed by each thread. Only the following values are supported:
- * rhs_info.n0: 2,3,4,8,16
- * rhs_info.k0: same of lhs_info.k0
- * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
- */
- void configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info,
- const GEMMRHSMatrixInfo &rhs_info,
- const GEMMKernelInfo &gemm_info);
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input0 Input tensor for the LHS matrix. Data type supported: F32. The number of dimensions for the LHS matrix must be less or equal than 4.
- * @param[in] input1 Input tensor for the RHS matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3.
- * @param[in] input2 Input tensor containing the bias matrix. Data type supported: same as @p input0.
- * @param[out] output Output tensor info. Data type supported: same as @p input0
- * @param[in] alpha Weight of the matrix product
- * @param[in] beta Weight of the matrix bias
- * @param[in] lhs_info LHS matrix information used to retrieve the number of rows and accumulations to be processed by each thread. Only the following values are supported:
- * lhs_info.m0: 1,2,3,4,5,6,7,8
- * lhs_info.k0: 2,3,4,8,16
- * @param[in] rhs_info RHS matrix information used to retrieve the number of columns and accumulations to be processed by each thread. Only the following values are supported:
- * rhs_info.n0: 2,3,4,8,16
- * rhs_info.k0: same of lhs_info.k0
- * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta,
- const GEMMLHSMatrixInfo &lhs_info,
- const GEMMRHSMatrixInfo &rhs_info,
- const GEMMKernelInfo &gemm_info);
- /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyNativeKernel
- *
- * @param[in] input0 Input tensor info for the LHS matrix. Data type supported: F32. The number of dimensions for the LHS matrix must be less or equal than 4.
- * @param[in] input1 Input tensor info for the RHS matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3.
- * @param[in] input2 Input tensor info containing the bias matrix. Data type supported: same as @p input0.
- * @param[in] output Output tensor info. Data type supported: same as @p input0
- * @param[in] alpha Weight of the matrix product
- * @param[in] beta Weight of the matrix bias
- * @param[in] lhs_info LHS matrix information used to retrieve the number of rows and accumulations to be processed by each thread. Only the following values are supported:
- * lhs_info.m0: 1,2,3,4,5,6,7,8
- * lhs_info.k0: 2,3,4,8,16
- * @param[in] rhs_info RHS matrix information used to retrieve the number of columns and accumulations to be processed by each thread. Only the following values are supported:
- * rhs_info.n0: 2,3,4,8,16
- * rhs_info.k0: same of lhs_info.k0
- * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info,
- const GEMMRHSMatrixInfo &rhs_info,
- const GEMMKernelInfo &gemm_info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input0;
- const ICLTensor *_input1;
- const ICLTensor *_input2;
- ICLTensor *_output;
- bool _slide_matrix_b;
- bool _reinterpret_input_as_3d;
- bool _reinterpret_output_as_3d;
- bool _use_dummy_work_items;
- bool _add_bias;
- bool _broadcast_bias;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLGEMMMATRIXMULTIPLYNATIVEKERNEL_H*/
diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h
deleted file mode 100644
index ee8e57fa8c..0000000000
--- a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDKERNEL_H
-#define ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-#include "arm_compute/core/KernelDescriptors.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to multiply matrices when both the input matrices LHS (input0) and RHS (input1) have been reshaped
- *
- * @note The input matrices @p input0 and @p input1 must be reshaped through @ref CLGEMMReshapeLHSMatrixKernel and @ref CLGEMMReshapeRHSMatrixKernel
- */
-class CLGEMMMatrixMultiplyReshapedKernel : public ICLKernel
-{
-public:
- /** Default Constructor */
- CLGEMMMatrixMultiplyReshapedKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMMatrixMultiplyReshapedKernel(const CLGEMMMatrixMultiplyReshapedKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMMatrixMultiplyReshapedKernel &operator=(const CLGEMMMatrixMultiplyReshapedKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLGEMMMatrixMultiplyReshapedKernel(CLGEMMMatrixMultiplyReshapedKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLGEMMMatrixMultiplyReshapedKernel &operator=(CLGEMMMatrixMultiplyReshapedKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @note The F16 computation also supports mixed precision through the gemm_info.fp_mixed_precision flag.
- * Mixed precision combines different floating precisions during the computation, in particular, F32 for the accumulations and F16 for the
- * multiplications. i.e. float c = (half)a * (half)b
- *
- * @param[in] input0 Input tensor containing the LHS reshaped matrix. Data type supported: F16/F32. The number of dimensions for the LHS matrix must be less or equal than 4
- * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3
- * @param[in] input2 Input tensor containing the bias matrix. Data type supported: same as @p input0.
- * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
- * @param[in] alpha Weight of the matrix product
- * @param[in] beta Weight of the matrix bias
- * @param[in] lhs_info LHS matrix information used for reshaping the input0 tensor. Only the following values are supported:
- * lhs_info.m0: 2,3,4,5,6,7,8
- * lhs_info.k0: 2,3,4,8,16
- * lhs_info.transpose: false
- * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported:
- * rhs_info.n0: 2,3,4,8,16
- * rhs_info.k0: 2,3,4,8,16
- * rhs_info.transpose: true
- * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
- *
- * @note lhs_info.k0 must be equal to rhs_info.k0
- */
- void configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info,
- const GEMMRHSMatrixInfo &rhs_info,
- const GEMMKernelInfo &gemm_info);
- /** Initialise the kernel's input and output.
- *
- * @note The F16 computation also supports mixed precision through the gemm_info.fp_mixed_precision flag.
- * Mixed precision combines different floating precisions during the computation, in particular, F32 for the accumulations and F16 for the
- * multiplications. i.e. float c = (half)a * (half)b
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input0 Input tensor containing the LHS reshaped matrix. Data type supported: F16/F32. The number of dimensions for the LHS matrix must be less or equal than 4
- * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3
- * @param[in] input2 Input tensor containing the bias matrix. Data type supported: same as @p input0.
- * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
- * @param[in] alpha Weight of the matrix product
- * @param[in] beta Weight of the matrix bias
- * @param[in] lhs_info LHS matrix information used for reshaping the input0 tensor. Only the following values are supported:
- * lhs_info.m0: 2,3,4,5,6,7,8
- * lhs_info.k0: 2,3,4,8,16
- * lhs_info.transpose: false
- * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported:
- * rhs_info.n0: 2,3,4,8,16
- * rhs_info.k0: 2,3,4,8,16
- * rhs_info.transpose: true
- * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
- *
- * @note lhs_info.k0 must be equal to rhs_info.k0
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta,
- const GEMMLHSMatrixInfo &lhs_info,
- const GEMMRHSMatrixInfo &rhs_info,
- const GEMMKernelInfo &gemm_info);
- /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyReshapedKernel
- *
- * @param[in] input0 Input tensor containing the LHS reshaped matrix. Data type supported: F16/F32. The number of dimensions for the LHS matrix must be less or equal than 4
- * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3
- * @param[in] input2 Input tensor info containing the bias matrix. Data type supported: same as @p input0.
- * @param[in] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
- * @param[in] alpha Weight of the matrix product
- * @param[in] beta Weight of the matrix bias
- * @param[in] lhs_info LHS matrix information used for reshaping the input0 tensor. Only the following values are supported:
- * lhs_info.m0: 2,3,4,5,6,7,8
- * lhs_info.k0: 2,3,4,8,16
- * lhs_info.transpose: false
- * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported:
- * rhs_info.n0: 2,3,4,8,16
- * rhs_info.k0: 2,3,4,8,16
- * rhs_info.transpose: true
- * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
- *
- * @note lhs_info.k0 must be equal to rhs_info.k0
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info,
- const GEMMRHSMatrixInfo &rhs_info,
- const GEMMKernelInfo &gemm_info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input0;
- const ICLTensor *_input1;
- const ICLTensor *_input2;
- ICLTensor *_output;
- bool _slide_matrix_b;
- bool _reinterpret_output_as_3d;
- unsigned int _k;
- bool _use_dummy_work_items;
- bool _add_bias;
- bool _broadcast_bias;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDKERNEL_H*/ \ No newline at end of file
diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h
deleted file mode 100644
index f7d314a039..0000000000
--- a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Copyright (c) 2019-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H
-#define ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-#include "arm_compute/core/KernelDescriptors.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to multiply matrices when only the input matrix RHS (input1) has been reshaped
- *
- * @note The input matrix input1 must be reshaped through @ref CLGEMMReshapeRHSMatrixKernel
- */
-class CLGEMMMatrixMultiplyReshapedOnlyRHSKernel : public ICLKernel
-{
-public:
- /** Default Constructor */
- CLGEMMMatrixMultiplyReshapedOnlyRHSKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMMatrixMultiplyReshapedOnlyRHSKernel(const CLGEMMMatrixMultiplyReshapedOnlyRHSKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMMatrixMultiplyReshapedOnlyRHSKernel &operator=(const CLGEMMMatrixMultiplyReshapedOnlyRHSKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLGEMMMatrixMultiplyReshapedOnlyRHSKernel(CLGEMMMatrixMultiplyReshapedOnlyRHSKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLGEMMMatrixMultiplyReshapedOnlyRHSKernel &operator=(CLGEMMMatrixMultiplyReshapedOnlyRHSKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: F16/F32. The number of dimensions for the LHS matrix must be less or equal than 4.
- * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3.
- * @param[in] input2 Input tensor containing the bias matrix. Data type supported: same as @p input0.
- * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
- * @param[in] alpha Weight of the matrix product
- * @param[in] beta Weight of the matrix bias
- * @param[in] lhs_info LHS matrix information used to retrieve the number of rows to be processed by each thread. Only the following values are supported:
- * lhs_info.m0: 1,2,3,4,5,6,7,8
- * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported:
- * rhs_info.k0: 2,3,4,8,16
- * rhs_info.n0: 2,3,4,8,16
- * rhs_info.transpose: true,false
- * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
- */
- void configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info,
- const GEMMRHSMatrixInfo &rhs_info,
- const GEMMKernelInfo &gemm_info);
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: F16/F32. The number of dimensions for the LHS matrix must be less or equal than 4.
- * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3.
- * @param[in] input2 Input tensor containing the bias matrix. Data type supported: same as @p input0.
- * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
- * @param[in] alpha Weight of the matrix product
- * @param[in] beta Weight of the matrix bias
- * @param[in] lhs_info LHS matrix information used to retrieve the number of rows to be processed by each thread. Only the following values are supported:
- * lhs_info.m0: 1,2,3,4,5,6,7,8
- * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported:
- * rhs_info.k0: 2,3,4,8,16
- * rhs_info.n0: 2,3,4,8,16
- * rhs_info.transpose: true,false
- * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta,
- const GEMMLHSMatrixInfo &lhs_info,
- const GEMMRHSMatrixInfo &rhs_info,
- const GEMMKernelInfo &gemm_info);
- /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel
- *
- * @param[in] input0 Input tensor info for the LHS matrix. Data type supported: F16/F32. The number of dimensions for the LHS matrix must be less or equal than 4.
- * @param[in] input1 Input tensor info for the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3.
- * @param[in] input2 Input tensor info containing the bias matrix. Data type supported: same as @p input0.
- * @param[in] output Output tensor info. Data type supported: same as @p input0
- * @param[in] alpha Weight of the matrix product
- * @param[in] beta Weight of the matrix bias
- * @param[in] lhs_info LHS matrix information used to retrieve the number of rows to be processed by each thread. Only the following values are supported:
- * lhs_info.m0: 1,2,3,4,5,6,7,8
- * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported:
- * rhs_info.k0: 2,3,4,8,16
- * rhs_info.n0: 2,3,4,8,16
- * rhs_info.transpose: true,false
- * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info,
- const GEMMRHSMatrixInfo &rhs_info,
- const GEMMKernelInfo &gemm_info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input0;
- const ICLTensor *_input1;
- const ICLTensor *_input2;
- ICLTensor *_output;
- bool _slide_matrix_b;
- bool _reinterpret_input_as_3d;
- bool _reinterpret_output_as_3d;
- bool _use_dummy_work_items;
- bool _add_bias;
- bool _broadcast_bias;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H*/
diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h
deleted file mode 100644
index 6d70b4b0c2..0000000000
--- a/arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMMATRIXVECTORMULTIPLYKERNEL_H
-#define ARM_COMPUTE_CLGEMMMATRIXVECTORMULTIPLYKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the GEMM matrix vector multiply kernel. **/
-class CLGEMMMatrixVectorMultiplyKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLGEMMMatrixVectorMultiplyKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMMatrixVectorMultiplyKernel(const CLGEMMMatrixVectorMultiplyKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMMatrixVectorMultiplyKernel &operator=(const CLGEMMMatrixVectorMultiplyKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLGEMMMatrixVectorMultiplyKernel(CLGEMMMatrixVectorMultiplyKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLGEMMMatrixVectorMultiplyKernel &operator=(CLGEMMMatrixVectorMultiplyKernel &&) = default;
- /** Set the input and output of the kernel.
- *
- * @param[in] input0 The reshaped input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
- * @param[in] input1 The 2D reshaped weights tensor. Data type supported: Same as @p input.
- * @param[out] output The output 2D tensor. Data types supported: Same as @p input, S32 for QASYMM8/QASYMM8_SIGNED.
- */
- void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output);
- /** Set the input and output of the kernel.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input0 The reshaped input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
- * @param[in] input1 The 2D reshaped weights tensor. Data type supported: Same as @p input.
- * @param[out] output The output 2D tensor. Data types supported: Same as @p input, S32 for QASYMM8/QASYMM8_SIGNED.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixVectorMultiplyKernel
- *
- * @param[in] input0 The reshaped input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
- * @param[in] input1 The 2D reshaped weights tensor info. Data type supported: Same as @p input.
- * @param[in] output The output 2D tensor info. Data types supported: Same as @p input, S32 for QASYMM8/QASYMM8_SIGNED.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- const ICLTensor *_input0;
- const ICLTensor *_input1;
- ICLTensor *_output;
- int _num_rows_read_per_iteration;
- BorderSize _border_size;
-};
-} // arm_compute
-#endif /*ARM_COMPUTE_CLGEMMMATRIXVECTORMULTIPLYKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h b/arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h
deleted file mode 100644
index fe77fcb428..0000000000
--- a/arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMRESHAPELHSMATRIXKERNEL_H
-#define ARM_COMPUTE_CLGEMMRESHAPELHSMATRIXKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to reshape the LHS matrix when performing the matrix multiplication.
- * In particular, this function splits the input matrix in blocks of size M0xK0 (defined through GEMMLHSInfo) and
- * stores each one in the output matrix unrolling the values
- */
-class CLGEMMReshapeLHSMatrixKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLGEMMReshapeLHSMatrixKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMReshapeLHSMatrixKernel(const CLGEMMReshapeLHSMatrixKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMReshapeLHSMatrixKernel &operator=(const CLGEMMReshapeLHSMatrixKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLGEMMReshapeLHSMatrixKernel(CLGEMMReshapeLHSMatrixKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLGEMMReshapeLHSMatrixKernel &operator=(CLGEMMReshapeLHSMatrixKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor. Data types supported: All
- * @param[out] output Output tensor. Data type supported: same as @p input
- * @param[in] lhs_info LHS matrix information to be used for reshaping. This object contains all the necessary
- * information to reshape the input tensor. Only the following values are supported:
- * lhs_info.m0: 2,3,4,5,6,7,8
- * lhs_info.k0: 2,3,4,8,16
- * lhs_info.v0: greater than 0
- * lhs_info.transpose: true, false
- * lhs_info.interleave: true, false
- * @param[in] reinterpret_input_as_3d (Optional) True if the input has to be reinterpreted as 3D tensor
- */
- void configure(const ICLTensor *input, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, bool reinterpret_input_as_3d = false);
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor. Data types supported: All
- * @param[out] output Output tensor. Data type supported: same as @p input
- * @param[in] lhs_info LHS matrix information to be used for reshaping. This object contains all the necessary
- * information to reshape the input tensor. Only the following values are supported:
- * lhs_info.m0: 2,3,4,5,6,7,8
- * lhs_info.k0: 2,3,4,8,16
- * lhs_info.v0: greater than 0
- * lhs_info.transpose: true, false
- * lhs_info.interleave: true, false
- * @param[in] reinterpret_input_as_3d (Optional) True if the input has to be reinterpreted as 3D tensor
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, bool reinterpret_input_as_3d = false);
- /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMReshapeLHSMatrixKernel
- *
- * @param[in] input Input tensor info. Data types supported: All
- * @param[in] output Output tensor info which stores the interleaved matrix. Data type supported: same as @p input.
- * @param[in] lhs_info LHS matrix information to be used for reshaping. This object contains all the necessary
- * information to reshape the input tensor. Only the following values are supported:
- * lhs_info.m0: 2,3,4,5,6,7,8
- * lhs_info.k0: 2,3,4,8,16
- * lhs_info.v0: greater than 0
- * lhs_info.transpose: true, false
- * lhs_info.interleave: true, false
- * @param[in] reinterpret_input_as_3d True if the input has to be reinterpreted as 3D tensor
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const GEMMLHSMatrixInfo &lhs_info, bool reinterpret_input_as_3d);
-
- // Inherited methods overridden
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
- bool _reinterpret_input_as_3d;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLGEMMRESHAPELHSMATRIXKERNEL_H */ \ No newline at end of file
diff --git a/arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h b/arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h
deleted file mode 100644
index 0e6352bdbb..0000000000
--- a/arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMRESHAPERHSMATRIXKERNEL_H
-#define ARM_COMPUTE_CLGEMMRESHAPERHSMATRIXKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to reshape the RHS matrix when performing the matrix multiplication
- * In particular, this kernel splits the input matrix in blocks of size K0xN0 and stores each one in
- * the output matrix unrolling the values */
-class CLGEMMReshapeRHSMatrixKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLGEMMReshapeRHSMatrixKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMReshapeRHSMatrixKernel(const CLGEMMReshapeRHSMatrixKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMReshapeRHSMatrixKernel &operator=(const CLGEMMReshapeRHSMatrixKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLGEMMReshapeRHSMatrixKernel(CLGEMMReshapeRHSMatrixKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLGEMMReshapeRHSMatrixKernel &operator=(CLGEMMReshapeRHSMatrixKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor. Data types supported: All
- * @param[out] output Output tensor. Data type supported: same as @p input
- * @param[in] rhs_info RHS matrix information to be used for reshaping. This object contains all the necessary
- * information to reshape the input tensor. Only the following values are supported:
- * rhs_info.n0: 2,3,4,8,16
- * rhs_info.k0: 1,2,3,4,8,16 (k0 = 1 only if rhs_info.transpose = false)
- * rhs_info.h0: greater than 0
- * rhs_info.transpose: true, false
- * rhs_info.interleave: true, false
- */
- void configure(const ICLTensor *input, ICLTensor *output, const GEMMRHSMatrixInfo &rhs_info);
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor. Data types supported: All
- * @param[out] output Output tensor. Data type supported: same as @p input
- * @param[in] rhs_info RHS matrix information to be used for reshaping. This object contains all the necessary
- * information to reshape the input tensor. Only the following values are supported:
- * rhs_info.n0: 2,3,4,8,16
- * rhs_info.k0: 1,2,3,4,8,16 (k0 = 1 only if rhs_info.transpose = false)
- * rhs_info.h0: greater than 0
- * rhs_info.transpose: true, false
- * rhs_info.interleave: true, false
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const GEMMRHSMatrixInfo &rhs_info);
- /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMReshapeRHSMatrixKernel
- *
- * @param[in] input Input tensor info. Data types supported: All
- * @param[in] output Output tensor info which stores the interleaved matrix. Data type supported: same as @p input.
- * @param[in] rhs_info RHS matrix information to be used for reshaping. This object contains all the necessary
- * information to reshape the input tensor. Only the following values are supported:
- * rhs_info.n0: 2,3,4,8,16
- * rhs_info.k0: 1,2,3,4,8,16 (k0 = 1 only if rhs_info.transpose = false)
- * rhs_info.h0: greater than 0
- * rhs_info.transpose: true, false
- * rhs_info.interleave: true, false
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const GEMMRHSMatrixInfo &rhs_info);
-
- // Inherited methods overridden
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLGEMMRESHAPERHSMATRIXKERNEL_H */ \ No newline at end of file
diff --git a/arm_compute/core/CL/kernels/CLGatherKernel.h b/arm_compute/core/CL/kernels/CLGatherKernel.h
deleted file mode 100644
index b7539536e9..0000000000
--- a/arm_compute/core/CL/kernels/CLGatherKernel.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGATHERKERNEL_H
-#define ARM_COMPUTE_CLGATHERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the kernel to perform tensor reshaping */
-class CLGatherKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLGatherKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGatherKernel(const CLGatherKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGatherKernel &operator=(const CLGatherKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLGatherKernel(CLGatherKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLGatherKernel &operator=(CLGatherKernel &&) = default;
- /** Default destructor */
- ~CLGatherKernel() = default;
- /** Initialise the kernel's inputs and outputs
- *
- * @param[in] input Source tensor. Supported tensor rank: up to 4. Data type supported: All.
- * @param[in] indices Indices tensor. Supported tensor rank: up to 1. Must be one of the following types: U32/S32. Each value must be in range [0, input.shape[@p axis])
- * @param[out] output Destination tensor. Data type supported: Same as @p input
- * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative values wrap around. Defaults to 0
- */
- void configure(const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis = 0);
- /** Initialise the kernel's inputs and outputs
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Supported tensor rank: up to 4. Data type supported: All.
- * @param[in] indices Indices tensor. Supported tensor rank: up to 1. Must be one of the following types: U32/S32. Each value must be in range [0, input.shape[@p axis])
- * @param[out] output Destination tensor. Data type supported: Same as @p input
- * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative values wrap around. Defaults to 0
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis = 0);
-
- /** Static function to check if given info will lead to a valid configuration of @ref CLGatherKernel
- *
- * @param[in] input Source tensor info. Supported tensor rank: up to 4. Data type supported: All.
- * @param[in] indices Indices tensor info. Supported tensor rank: up to 4. Must be one of the following types: U32/S32. Each value must be in range [0, input.shape[@p axis])
- * @param[in] output Destination tensor info. Data type supported: Same as @p input
- * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative values wrap around. Defaults to 0
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, int axis = 0);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input; /**< Source tensor */
- const ICLTensor *_indices; /**< Indices tensor */
- ICLTensor *_output; /**< Destination tensor */
- int _axis; /**< Axis index */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLGATHERKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h b/arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h
deleted file mode 100644
index 6a9d3eaa4d..0000000000
--- a/arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGAUSSIAN3X3KERNEL_H
-#define ARM_COMPUTE_CLGAUSSIAN3X3KERNEL_H
-
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the Gaussian 3x3 filter kernel.
- *
- */
-class CLGaussian3x3Kernel : public ICLSimple2DKernel
-{
-public:
- /** Initialise the kernel's input and output.
- *
- * @param[in] input An input tensor. Data types supported: U8
- * @param[out] output The output tensor. Data types supported: U8.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input An input tensor. Data types supported: U8
- * @param[out] output The output tensor. Data types supported: U8.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined);
-
- // Inherited methods overridden:
- BorderSize border_size() const override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLGAUSSIAN3X3KERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h b/arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h
deleted file mode 100644
index d8730e0c92..0000000000
--- a/arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGAUSSIAN5X5KERNEL_H
-#define ARM_COMPUTE_CLGAUSSIAN5X5KERNEL_H
-
-#include "arm_compute/core/CL/kernels/CLConvolutionKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the kernel to run the horizontal pass of 5x5 Gaussian filter on a tensor. */
-class CLGaussian5x5HorKernel : public CLSeparableConvolution5x5HorKernel
-{
-public:
- /** Initialise the kernel's source, destination and border.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor. Data types supported: S16.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
- /** Initialise the kernel's source, destination and border.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor. Data types supported: S16.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined);
-
-private:
- //Make the configure method of the parent class private
- using CLSeparableConvolution5x5HorKernel::configure;
-};
-
-/** Interface for the kernel to run the vertical pass of 5x5 Gaussian filter on a tensor. */
-class CLGaussian5x5VertKernel : public CLSeparableConvolution5x5VertKernel
-{
-public:
- /** Initialise the kernel's source, destination and border.
- *
- * @param[in] input Input tensor(output of horizontal pass). Data types supported: S16.
- * @param[out] output Destination tensor. Data types supported: U8.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
- /** Initialise the kernel's source, destination and border.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor(output of horizontal pass). Data types supported: S16.
- * @param[out] output Destination tensor. Data types supported: U8.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined);
-
-private:
- //Make the configure method of the parent class private
- using CLSeparableConvolution5x5VertKernel::configure;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLGAUSSIAN5X5KERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h b/arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h
deleted file mode 100644
index 34cd062dae..0000000000
--- a/arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGAUSSIANPYRAMIDKERNEL_H
-#define ARM_COMPUTE_CLGAUSSIANPYRAMIDKERNEL_H
-
-#include "arm_compute/core/CL/ICLSimpleKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to perform a Gaussian filter and half scaling across width (horizontal pass) */
-class CLGaussianPyramidHorKernel : public ICLSimpleKernel
-{
-public:
- /** Default constructor */
- CLGaussianPyramidHorKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGaussianPyramidHorKernel(const CLGaussianPyramidHorKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGaussianPyramidHorKernel &operator=(const CLGaussianPyramidHorKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLGaussianPyramidHorKernel(CLGaussianPyramidHorKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLGaussianPyramidHorKernel &operator=(CLGaussianPyramidHorKernel &&) = default;
- /** Default destructor */
- ~CLGaussianPyramidHorKernel() = default;
-
- /** Initialise the kernel's source, destination and border mode.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor. Output should have half the input width. Data types supported: U16.
- */
- void configure(const ICLTensor *input, ICLTensor *output);
- /** Initialise the kernel's source, destination and border mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor. Output should have half the input width. Data types supported: U16.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- int _l2_load_offset;
-};
-
-/** OpenCL kernel to perform a Gaussian filter and half scaling across height (vertical pass) */
-class CLGaussianPyramidVertKernel : public ICLSimpleKernel
-{
-public:
- /** Default constructor */
- CLGaussianPyramidVertKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGaussianPyramidVertKernel(const CLGaussianPyramidVertKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGaussianPyramidVertKernel &operator=(const CLGaussianPyramidVertKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLGaussianPyramidVertKernel(CLGaussianPyramidVertKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLGaussianPyramidVertKernel &operator=(CLGaussianPyramidVertKernel &&) = default;
- /** Default destructor */
- ~CLGaussianPyramidVertKernel() = default;
-
- /** Initialise the kernel's source, destination and border mode.
- *
- * @param[in] input Source tensor. Data types supported: U16.
- * @param[out] output Destination tensor. Output should have half the input height. Data types supported: U8.
- */
- void configure(const ICLTensor *input, ICLTensor *output);
- /** Initialise the kernel's source, destination and border mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U16.
- * @param[out] output Destination tensor. Output should have half the input height. Data types supported: U8.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- int _t2_load_offset;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLGAUSSIANPYRAMIDKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLGenerateProposalsLayerKernel.h b/arm_compute/core/CL/kernels/CLGenerateProposalsLayerKernel.h
deleted file mode 100644
index 46dc16d6d5..0000000000
--- a/arm_compute/core/CL/kernels/CLGenerateProposalsLayerKernel.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2019-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGENERATEPROPOSALSLAYERKERNEL_H
-#define ARM_COMPUTE_CLGENERATEPROPOSALSLAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for Compute All Anchors kernel */
-class CLComputeAllAnchorsKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLComputeAllAnchorsKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLComputeAllAnchorsKernel(const CLComputeAllAnchorsKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLComputeAllAnchorsKernel &operator=(const CLComputeAllAnchorsKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLComputeAllAnchorsKernel(CLComputeAllAnchorsKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLComputeAllAnchorsKernel &operator=(CLComputeAllAnchorsKernel &&) = default;
- /** Default destructor */
- ~CLComputeAllAnchorsKernel() = default;
-
- /** Set the input and output tensors.
- *
- * @param[in] anchors Source tensor. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: QSYMM16/F16/F32
- * @param[out] all_anchors Destination tensor. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input
- * @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo
- *
- */
- void configure(const ICLTensor *anchors, ICLTensor *all_anchors, const ComputeAnchorsInfo &info);
- /** Set the input and output tensors.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] anchors Source tensor. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: QSYMM16/F16/F32
- * @param[out] all_anchors Destination tensor. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input
- * @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo
- *
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *anchors, ICLTensor *all_anchors, const ComputeAnchorsInfo &info);
-
- /** Static function to check if given info will lead to a valid configuration of @ref CLComputeAllAnchorsKernel
- *
- * @param[in] anchors Source tensor info. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: QSYMM16/F16/F32
- * @param[in] all_anchors Destination tensor info. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input
- * @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo
- *
- * @return a Status
- */
- static Status validate(const ITensorInfo *anchors, const ITensorInfo *all_anchors, const ComputeAnchorsInfo &info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_anchors;
- ICLTensor *_all_anchors;
-};
-} // arm_compute
-#endif // ARM_COMPUTE_CLGENERATEPROSPOSALSLAYERKERNEL_H
diff --git a/arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h b/arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h
deleted file mode 100644
index 046950551d..0000000000
--- a/arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLHOGDESCRIPTORKERNEL_H
-#define ARM_COMPUTE_CLHOGDESCRIPTORKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/IHOG.h"
-#include "arm_compute/core/Size2D.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** OpenCL kernel to perform HOG Orientation Binning */
-class CLHOGOrientationBinningKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLHOGOrientationBinningKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLHOGOrientationBinningKernel(const CLHOGOrientationBinningKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLHOGOrientationBinningKernel &operator=(const CLHOGOrientationBinningKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLHOGOrientationBinningKernel(CLHOGOrientationBinningKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLHOGOrientationBinningKernel &operator=(CLHOGOrientationBinningKernel &&) = default;
- /** Default destructor */
- ~CLHOGOrientationBinningKernel() = default;
-
- /** Initialise the kernel's inputs, output and HOG's metadata
- *
- * @param[in] input_magnitude Input tensor which stores the magnitude of the gradient for each pixel. Data type supported: S16.
- * @param[in] input_phase Input tensor which stores the phase of the gradient for each pixel. Data type supported: U8
- * @param[out] output Output tensor which stores the local HOG for each cell. DataType supported: F32. Number of channels supported: equal to the number of histogram bins per cell
- * @param[in] hog_info HOG's metadata
- */
- void configure(const ICLTensor *input_magnitude, const ICLTensor *input_phase, ICLTensor *output, const HOGInfo *hog_info);
- /** Initialise the kernel's inputs, output and HOG's metadata
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input_magnitude Input tensor which stores the magnitude of the gradient for each pixel. Data type supported: S16.
- * @param[in] input_phase Input tensor which stores the phase of the gradient for each pixel. Data type supported: U8
- * @param[out] output Output tensor which stores the local HOG for each cell. DataType supported: F32. Number of channels supported: equal to the number of histogram bins per cell
- * @param[in] hog_info HOG's metadata
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input_magnitude, const ICLTensor *input_phase, ICLTensor *output, const HOGInfo *hog_info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input_magnitude;
- const ICLTensor *_input_phase;
- ICLTensor *_output;
- Size2D _cell_size;
-};
-
-/** OpenCL kernel to perform HOG block normalization */
-class CLHOGBlockNormalizationKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLHOGBlockNormalizationKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLHOGBlockNormalizationKernel(const CLHOGBlockNormalizationKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLHOGBlockNormalizationKernel &operator=(const CLHOGBlockNormalizationKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLHOGBlockNormalizationKernel(CLHOGBlockNormalizationKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLHOGBlockNormalizationKernel &operator=(CLHOGBlockNormalizationKernel &&) = default;
- /** Default destructor */
- ~CLHOGBlockNormalizationKernel() = default;
-
- /** Initialise the kernel's input, output and HOG's metadata
- *
- * @param[in] input Input tensor which stores the local HOG for each cell. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per cell
- * @param[out] output Output tensor which stores the normalised blocks. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block
- * @param[in] hog_info HOG's metadata
- */
- void configure(const ICLTensor *input, ICLTensor *output, const HOGInfo *hog_info);
- /** Initialise the kernel's input, output and HOG's metadata
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor which stores the local HOG for each cell. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per cell
- * @param[out] output Output tensor which stores the normalised blocks. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block
- * @param[in] hog_info HOG's metadata
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const HOGInfo *hog_info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
- Size2D _num_cells_per_block_stride;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLHOGDESCRIPTORKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLHOGDetectorKernel.h b/arm_compute/core/CL/kernels/CLHOGDetectorKernel.h
deleted file mode 100644
index 681c212cc5..0000000000
--- a/arm_compute/core/CL/kernels/CLHOGDetectorKernel.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLHOGDETECTORKERNEL_H
-#define ARM_COMPUTE_CLHOGDETECTORKERNEL_H
-
-#include "arm_compute/core/CL/ICLArray.h"
-#include "arm_compute/core/CL/ICLHOG.h"
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/CL/OpenCL.h"
-
-namespace cl
-{
-class Buffer;
-}
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to perform HOG detector kernel using linear SVM */
-class CLHOGDetectorKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLHOGDetectorKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLHOGDetectorKernel(const CLHOGDetectorKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLHOGDetectorKernel &operator=(const CLHOGDetectorKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLHOGDetectorKernel(CLHOGDetectorKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLHOGDetectorKernel &operator=(CLHOGDetectorKernel &&) = default;
- /** Default destructor */
- ~CLHOGDetectorKernel() = default;
-
- /** Initialise the kernel's input, HOG data-object, detection window, the stride of the detection window, the threshold and index of the object to detect
- *
- * @param[in] input Input tensor which stores the HOG descriptor obtained with @ref CLHOGOrientationBinningKernel. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block
- * @param[in] hog HOG data object used by @ref CLHOGOrientationBinningKernel and @ref CLHOGBlockNormalizationKernel
- * @param[out] detection_windows Array of @ref DetectionWindow. This array stores all the detected objects
- * @param[in] num_detection_windows Number of detected objects
- * @param[in] detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions.
- * It must be multiple of the hog->info()->block_stride()
- * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane
- * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to
- */
- void configure(const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, cl::Buffer *num_detection_windows, const Size2D &detection_window_stride, float threshold = 0.0f,
- uint16_t idx_class = 0);
- /** Initialise the kernel's input, HOG data-object, detection window, the stride of the detection window, the threshold and index of the object to detect
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor which stores the HOG descriptor obtained with @ref CLHOGOrientationBinningKernel. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block
- * @param[in] hog HOG data object used by @ref CLHOGOrientationBinningKernel and @ref CLHOGBlockNormalizationKernel
- * @param[out] detection_windows Array of @ref DetectionWindow. This array stores all the detected objects
- * @param[in] num_detection_windows Number of detected objects
- * @param[in] detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions.
- * It must be multiple of the hog->info()->block_stride()
- * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane
- * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, cl::Buffer *num_detection_windows,
- const Size2D &detection_window_stride, float threshold = 0.0f,
- uint16_t idx_class = 0);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue);
-
-private:
- const ICLTensor *_input;
- ICLDetectionWindowArray *_detection_windows;
- cl::Buffer *_num_detection_windows;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLHOGDETECTORKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLHarrisCornersKernel.h b/arm_compute/core/CL/kernels/CLHarrisCornersKernel.h
deleted file mode 100644
index a13119b82c..0000000000
--- a/arm_compute/core/CL/kernels/CLHarrisCornersKernel.h
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLHARRISCORNERSKERNEL_H
-#define ARM_COMPUTE_CLHARRISCORNERSKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ICLTensor;
-using ICLImage = ICLTensor;
-
-/** Interface for the harris score kernel.
- *
- * @note The implementation supports 3, 5, and 7 for the block_size.
- */
-class CLHarrisScoreKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLHarrisScoreKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLHarrisScoreKernel(const CLHarrisScoreKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLHarrisScoreKernel &operator=(const CLHarrisScoreKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLHarrisScoreKernel(CLHarrisScoreKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLHarrisScoreKernel &operator=(CLHarrisScoreKernel &&) = default;
- /** Default destructor */
- ~CLHarrisScoreKernel() = default;
-
- /** Setup the kernel parameters
- *
- * @param[in] input1 Source image (gradient X). Data types supported S16, S32. (Must be the same as input2)
- * @param[in] input2 Source image (gradient Y). Data types supported S16, S32. (Must be the same as input1)
- * @param[out] output Destination image (harris score). Data types supported F32
- * @param[in] block_size The block window size used to compute the Harris Corner score. Supports: 3, 5 and 7
- * @param[in] norm_factor Normalization factor to use accordingly with the gradient size (Must be different from 0)
- * @param[in] strength_thresh Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel).
- * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLImage *input1, const ICLImage *input2, ICLImage *output,
- int32_t block_size, float norm_factor, float strength_thresh, float sensitivity,
- bool border_undefined);
- /** Setup the kernel parameters
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input1 Source image (gradient X). Data types supported S16, S32. (Must be the same as input2)
- * @param[in] input2 Source image (gradient Y). Data types supported S16, S32. (Must be the same as input1)
- * @param[out] output Destination image (harris score). Data types supported F32
- * @param[in] block_size The block window size used to compute the Harris Corner score. Supports: 3, 5 and 7
- * @param[in] norm_factor Normalization factor to use accordingly with the gradient size (Must be different from 0)
- * @param[in] strength_thresh Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel).
- * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLImage *input1, const ICLImage *input2, ICLImage *output,
- int32_t block_size, float norm_factor, float strength_thresh, float sensitivity,
- bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-protected:
- const ICLImage *_input1; /**< Source image - Gx component */
- const ICLImage *_input2; /**< Source image - Gy component */
- ICLImage *_output; /**< Source image - Harris score */
- float _sensitivity; /**< Sensitivity value */
- float _strength_thresh; /**< Threshold value */
- float _norm_factor; /**< Normalization factor */
- BorderSize _border_size; /**< Border size */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLHARRISCORNERSKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h b/arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h
deleted file mode 100644
index 524e5ea997..0000000000
--- a/arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2019-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_CLHEIGHTCONCATENATELAYERKERNEL_H
-#define ARM_COMPUTE_CLHEIGHTCONCATENATELAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the height concatenate kernel.
- * The input tensor will be concatenated into the output tensor.
- */
-class CLHeightConcatenateLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLHeightConcatenateLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLHeightConcatenateLayerKernel(const CLHeightConcatenateLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLHeightConcatenateLayerKernel &operator=(const CLHeightConcatenateLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLHeightConcatenateLayerKernel(CLHeightConcatenateLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLHeightConcatenateLayerKernel &operator=(CLHeightConcatenateLayerKernel &&) = default;
- /** Default destructor */
- ~CLHeightConcatenateLayerKernel() = default;
- /** Initialise the kernel's inputs and output
- *
- * @param[in] input Input tensor. Data types supported: All.
- * @param[in] height_offset The starting offset on the Y axis for the output tensor.
- * @param[out] output Output tensor. Data types supported: Same as @p input.
- *
- */
- void configure(const ICLTensor *input, unsigned int height_offset, ICLTensor *output);
- /** Initialise the kernel's inputs and output
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor. Data types supported: All.
- * @param[in] height_offset The starting offset on the Y axis for the output tensor.
- * @param[out] output Output tensor. Data types supported: Same as @p input.
- *
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, unsigned int height_offset, ICLTensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLHeightConcatenateLayerKernel
- *
- * @param[in] input Input tensor info. Data types supported: All.
- * @param[in] height_offset The starting offset on the Y axis for the output tensor.
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, unsigned int height_offset, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
- unsigned int _height_offset;
- unsigned int _num_elems_processed_per_iteration;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLHEIGHTCONCATENATELAYERKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLHistogramKernel.h b/arm_compute/core/CL/kernels/CLHistogramKernel.h
deleted file mode 100644
index 9cd374711b..0000000000
--- a/arm_compute/core/CL/kernels/CLHistogramKernel.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLHISTOGRAMKERNEL_H
-#define ARM_COMPUTE_CLHISTOGRAMKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLDistribution1D;
-class ICLTensor;
-using ICLImage = ICLTensor;
-
-/** Interface to run the histogram kernel. This kernel processes the part of image with width can be divided by 16.
- * If the image width is not a multiple of 16, remaining pixels have to be processed with the @ref CLHistogramBorderKernel
- */
-class CLHistogramKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLHistogramKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLHistogramKernel(const CLHistogramKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLHistogramKernel &operator=(const CLHistogramKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLHistogramKernel(CLHistogramKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLHistogramKernel &operator=(CLHistogramKernel &&) = default;
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] input Source image. Data types supported: U8.
- * @param[out] output Destination distribution.
- */
- void configure(const ICLImage *input, ICLDistribution1D *output);
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source image. Data types supported: U8.
- * @param[out] output Destination distribution.
- */
- void configure(const CLCompileContext &compile_context, const ICLImage *input, ICLDistribution1D *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLImage *_input;
- ICLDistribution1D *_output;
-};
-
-/** Interface to run the histogram kernel to handle the leftover part of image
- *
- */
-class CLHistogramBorderKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLHistogramBorderKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLHistogramBorderKernel(const CLHistogramBorderKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLHistogramBorderKernel &operator=(const CLHistogramBorderKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLHistogramBorderKernel(CLHistogramBorderKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLHistogramBorderKernel &operator=(CLHistogramBorderKernel &&) = default;
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] input Source image. Data types supported: U8.
- * @param[out] output Destination distribution.
- */
- void configure(const ICLImage *input, ICLDistribution1D *output);
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source image. Data types supported: U8.
- * @param[out] output Destination distribution.
- */
- void configure(const CLCompileContext &compile_context, const ICLImage *input, ICLDistribution1D *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLImage *_input;
- ICLDistribution1D *_output;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLHISTOGRAMKERNEL_H*/
diff --git a/arm_compute/core/CL/kernels/CLIm2ColKernel.h b/arm_compute/core/CL/kernels/CLIm2ColKernel.h
deleted file mode 100644
index 61f2a3d489..0000000000
--- a/arm_compute/core/CL/kernels/CLIm2ColKernel.h
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLIM2COLKERNEL_H
-#define ARM_COMPUTE_CLIM2COLKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Size2D.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the im2col reshape kernel.
- *
- * Rearranges image blocks into columns. It is used to strip out each convolution block to a single column.
- * It is used to transform a convolution to a plain matrix multiplication.
- *
- * For example taking into account the image below and assuming 3x3 image blocks with stride of 1 we have:
- * @f[
- * \left( \begin{array}{cccc}
- * a00 & a01 & a02 & a03 \\
- * a10 & a11 & a12 & a13 \\
- * a20 & a21 & a22 & a23 \\
- * a30 & a31 & a32 & a33 \\
- * \end{array} \right)
- * =
- * \left( \begin{array}{ccccccccc}
- * a00 & a01 & a02 & a10 & a11 & a12 & a20 & a21 & a22 \\
- * a01 & a02 & a03 & a11 & a12 & a13 & a21 & a22 & a23 \\
- * a10 & a11 & a12 & a20 & a21 & a22 & a30 & a31 & a32 \\
- * a11 & a12 & a13 & a21 & a22 & a23 & a31 & a32 & a33 \\
- * \end{array} \right)
- * @f]
- */
-class CLIm2ColKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLIm2ColKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLIm2ColKernel(const CLIm2ColKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLIm2ColKernel &operator=(const CLIm2ColKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLIm2ColKernel(CLIm2ColKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLIm2ColKernel &operator=(CLIm2ColKernel &&) = default;
- /** Set the input and output of the kernel.
- *
- * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
- * @param[out] output The output tensor. First 2 lower dimensions represent a transform of each 3D input,
- * while every dimension above represents a batch. Data types supported: Same as @p input
- * @param[in] kernel_dims The kernel dimensions (width and height).
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- * @param[in] has_bias In case biases are provided expands the matrix with 1.
- * This is valid only for non-quantized inputs.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution.
- * Number of groups other than 1 is only supported for NCHW data layout.
- * Number of groups should be multiple to the number of channels.
- */
- void configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation = Size2D(1U, 1U),
- unsigned int num_groups = 1);
- /** Set the input and output of the kernel.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
- * @param[out] output The output tensor. First 2 lower dimensions represent a transform of each 3D input,
- * while every dimension above represents a batch. Data types supported: Same as @p input
- * @param[in] kernel_dims The kernel dimensions (width and height).
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- * @param[in] has_bias In case biases are provided expands the matrix with 1.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias,
- const Size2D &dilation = Size2D(1U, 1U),
- unsigned int num_groups = 1);
- /** Static function to check if given info will lead to a valid configuration of @ref CLIm2ColKernel
- *
- * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
- * @param[in] output The output tensor. First 2 lower dimensions represent a transform of each 3D input,
- * while every dimension above represents a batch. Data types supported: Same as @p input
- * @param[in] kernel_dims The kernel dimensions (width and height).
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- * @param[in] has_bias In case biases are provided expands the matrix with 1.
- * This is valid only for non-quantized inputs.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution.
- * Number of groups other than 1 is only supported for NCHW data layout.
- * Number of groups should be multiple to the number of channels.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation = Size2D(1U, 1U),
- unsigned int num_groups = 1);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-public:
- const ICLTensor *_input;
- ICLTensor *_output;
- DataLayout _data_layout;
- std::pair<unsigned int, unsigned int> _convolved_dims;
- unsigned int _num_elems_processed_per_iteration;
- Size2D _kernel_dims;
- PadStrideInfo _conv_info;
- unsigned int _num_groups;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLIM2COLKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernel.h b/arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernel.h
deleted file mode 100644
index 014dce1759..0000000000
--- a/arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernel.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2019-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYERKERNEL_H
-#define ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-#include "arm_compute/core/KernelDescriptors.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ICLTensor;
-
-/** Interface for performing an instance normalization */
-class CLInstanceNormalizationLayerKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLInstanceNormalizationLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLInstanceNormalizationLayerKernel(const CLInstanceNormalizationLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLInstanceNormalizationLayerKernel &operator=(const CLInstanceNormalizationLayerKernel &) = delete;
- /** Default Move Constructor. */
- CLInstanceNormalizationLayerKernel(CLInstanceNormalizationLayerKernel &&) = default;
- /** Default move assignment operator */
- CLInstanceNormalizationLayerKernel &operator=(CLInstanceNormalizationLayerKernel &&) = default;
- /** Default destructor */
- ~CLInstanceNormalizationLayerKernel() = default;
-
- /** Set the input and output tensors.
- *
- * @param[in, out] input Source tensor. Data types supported: F16/F32. Data layout supported: NCHW, NHWC
- * In case of @p output tensor = nullptr this tensor will store the result of the normalization.
- * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input.
- * @param[in] info Kernel meta-data descriptor
- */
- void configure(ICLTensor *input, ICLTensor *output, const InstanceNormalizationLayerKernelInfo &info);
- /** Set the input and output tensors.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in, out] input Source tensor. Data types supported: F16/F32. Data layout supported: NCHW, NHWC
- * In case of @p output tensor = nullptr this tensor will store the result of the normalization.
- * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input.
- * @param[in] info Kernel meta-data descriptor
- */
- void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const InstanceNormalizationLayerKernelInfo &info);
-
- /** Static function to check if given info will lead to a valid configuration of @ref CLInstanceNormalizationLayer.
- *
- * @param[in] input Source tensor info. Data types supported: F16/F32. Data layout supported: NHWC, NCHW
- * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p input.
- * @param[in] info Kernel meta-data descriptor
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const InstanceNormalizationLayerKernelInfo &info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- ICLTensor *_input;
- ICLTensor *_output;
- bool _run_in_place;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYERKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLIntegralImageKernel.h b/arm_compute/core/CL/kernels/CLIntegralImageKernel.h
deleted file mode 100644
index 6b6076a917..0000000000
--- a/arm_compute/core/CL/kernels/CLIntegralImageKernel.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLINTEGRALIMAGEKERNEL_H
-#define ARM_COMPUTE_CLINTEGRALIMAGEKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface to run the horizontal pass of the integral image kernel. */
-class CLIntegralImageHorKernel : public ICLSimple2DKernel
-{
-public:
- /** Initialise the kernel's input and output.
- *
- * @param[in] input An input tensor. Data types supported: U8
- * @param[out] output Destination tensor, Data types supported: U32.
- */
- void configure(const ICLTensor *input, ICLTensor *output);
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input An input tensor. Data types supported: U8
- * @param[out] output Destination tensor, Data types supported: U32.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
-};
-
-/** Interface to run the vertical pass of the integral image kernel. */
-class CLIntegralImageVertKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLIntegralImageVertKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLIntegralImageVertKernel(const CLIntegralImageVertKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLIntegralImageVertKernel &operator=(const CLIntegralImageVertKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLIntegralImageVertKernel(CLIntegralImageVertKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLIntegralImageVertKernel &operator=(CLIntegralImageVertKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in,out] in_out The input/output tensor. Data types supported: U32
- */
- void configure(ICLTensor *in_out);
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in,out] in_out The input/output tensor. Data types supported: U32
- */
- void configure(const CLCompileContext &compile_context, ICLTensor *in_out);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- ICLTensor *_in_out;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLINTEGRALIMAGEKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h b/arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h
deleted file mode 100644
index 169910b70d..0000000000
--- a/arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLL2NORMALIZELAYERKERNEL_H
-#define ARM_COMPUTE_CLL2NORMALIZELAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for performing a L2 normalize on a given axis given the square sum of it in this axis */
-class CLL2NormalizeLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLL2NormalizeLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLL2NormalizeLayerKernel(const CLL2NormalizeLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLL2NormalizeLayerKernel &operator=(const CLL2NormalizeLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLL2NormalizeLayerKernel(CLL2NormalizeLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLL2NormalizeLayerKernel &operator=(CLL2NormalizeLayerKernel &&) = default;
- /** Default destructor */
- ~CLL2NormalizeLayerKernel() = default;
-
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC.
- * @param[in] sum Sum values tensor. Data types supported: same as @p input.
- * Sum will have the same number of dimensions as input.
- * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input.
- * Output will have the same number of dimensions as input.
- * @param[in] axis Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2
- * @param[in] epsilon Lower bound value for the normalization.
- */
- void configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, int axis, float epsilon);
- /** Set the input and output tensors.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC.
- * @param[in] sum Sum values tensor. Data types supported: same as @p input.
- * Sum will have the same number of dimensions as input.
- * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input.
- * Output will have the same number of dimensions as input.
- * @param[in] axis Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2
- * @param[in] epsilon Lower bound value for the normalization.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, int axis, float epsilon);
-
- /** Static function to check if given info will lead to a valid configuration of @ref CLL2NormalizeLayerKernel.
- *
- * @param[in] input Source tensor info. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC.
- * @param[in] sum Sum values tensor info. Data types supported: same as @p input.
- * Sum will have the same number of dimensions as input.
- * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p input.
- * Output will have the same number of dimensions as input.
- * @param[in] axis Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2
- * @param[in] epsilon Lower bound value for the normalization.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, int axis, float epsilon);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- const ICLTensor *_sum;
- ICLTensor *_output;
- unsigned int _actual_axis;
- float _epsilon;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLL2NORMALIZELAYERKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLLKTrackerKernel.h b/arm_compute/core/CL/kernels/CLLKTrackerKernel.h
deleted file mode 100644
index f94602c381..0000000000
--- a/arm_compute/core/CL/kernels/CLLKTrackerKernel.h
+++ /dev/null
@@ -1,240 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLLKTRACKERKERNEL_H
-#define ARM_COMPUTE_CLLKTRACKERKERNEL_H
-
-#include "arm_compute/core/CL/ICLArray.h"
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-#include <cstddef>
-#include <cstdint>
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Internal keypoint structure for Lucas-Kanade Optical Flow */
-struct CLLKInternalKeypoint
-{
- float x{ 0.f }; /**< x coordinate of the keypoint */
- float y{ 0.f }; /**< y coordinate of the keypoint */
- float tracking_status{ 0.f }; /**< the tracking status of the keypoint */
- float dummy{ 0.f }; /**< Dummy field, to make sure the data structure 128-bit align, so that GPU can use vload4 */
-};
-
-/** Structure for storing Spatial Gradient Matrix and the minimum eigenvalue for each keypoint */
-struct CLCoefficientTable
-{
- float A11; /**< iA11 * FLT_SCALE */
- float A12; /**< iA11 * FLT_SCALE */
- float A22; /**< iA11 * FLT_SCALE */
- float min_eig; /**< Minimum eigenvalue */
-};
-
-/** Structure for storing ival, ixval and iyval for each point inside the window */
-struct CLOldValue
-{
- int16_t ival; /**< ival extracts from old image */
- int16_t ixval; /**< ixval extracts from scharr Gx image */
- int16_t iyval; /**< iyval extracts from scharr Gy image */
- int16_t dummy; /**< Dummy field, to make sure the data structure 128-bit align, so that GPU can use vload4 */
-};
-
-/** Interface for OpenCL Array of Internal Key Points. */
-using ICLLKInternalKeypointArray = ICLArray<CLLKInternalKeypoint>;
-/** Interface for OpenCL Array of Coefficient Tables. */
-using ICLCoefficientTableArray = ICLArray<CLCoefficientTable>;
-/** Interface for OpenCL Array of Old Values. */
-using ICLOldValArray = ICLArray<CLOldValue>;
-
-/** Interface to run the initialization step of LKTracker */
-class CLLKTrackerInitKernel : public ICLKernel
-{
-public:
- /** Initialise the kernel input and output
- *
- * @param[in] old_points Pointer to the @ref ICLKeyPointArray storing old key points
- * @param[in] new_points_estimates Pointer to the @ref ICLKeyPointArray storing new estimates key points
- * @param[out] old_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint old points
- * @param[out] new_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint new points
- * @param[in] use_initial_estimate The flag to indicate whether the initial estimated position should be used
- * @param[in] level The pyramid level
- * @param[in] num_levels The number of pyramid levels
- * @param[in] pyramid_scale Scale factor used for generating the pyramid
- */
- void configure(const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates,
- ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal,
- bool use_initial_estimate, size_t level, size_t num_levels, float pyramid_scale);
- /** Initialise the kernel input and output
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] old_points Pointer to the @ref ICLKeyPointArray storing old key points
- * @param[in] new_points_estimates Pointer to the @ref ICLKeyPointArray storing new estimates key points
- * @param[out] old_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint old points
- * @param[out] new_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint new points
- * @param[in] use_initial_estimate The flag to indicate whether the initial estimated position should be used
- * @param[in] level The pyramid level
- * @param[in] num_levels The number of pyramid levels
- * @param[in] pyramid_scale Scale factor used for generating the pyramid
- */
- void configure(const CLCompileContext &compile_context, const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates,
- ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal,
- bool use_initial_estimate, size_t level, size_t num_levels, float pyramid_scale);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-};
-
-/** Interface to run the finalize step of LKTracker, where it truncates the coordinates stored in new_points array */
-class CLLKTrackerFinalizeKernel : public ICLKernel
-{
-public:
- /** Initialise the kernel input and output
- *
- * @param[in] new_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint new points
- * @param[out] new_points Pointer to the @ref ICLKeyPointArray storing new key points
- */
- void configure(ICLLKInternalKeypointArray *new_points_internal, ICLKeyPointArray *new_points);
- /** Initialise the kernel input and output
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] new_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint new points
- * @param[out] new_points Pointer to the @ref ICLKeyPointArray storing new key points
- */
- void configure(const CLCompileContext &compile_context, ICLLKInternalKeypointArray *new_points_internal, ICLKeyPointArray *new_points);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-};
-
-/** Interface to run the first stage of LKTracker, where A11, A12, A22, min_eig, ival, ixval and iyval are computed */
-class CLLKTrackerStage0Kernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLLKTrackerStage0Kernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLLKTrackerStage0Kernel(const CLLKTrackerStage0Kernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLLKTrackerStage0Kernel &operator=(const CLLKTrackerStage0Kernel &) = delete;
- /** Allow instances of this class to be moved */
- CLLKTrackerStage0Kernel(CLLKTrackerStage0Kernel &&) = default;
- /** Allow instances of this class to be moved */
- CLLKTrackerStage0Kernel &operator=(CLLKTrackerStage0Kernel &&) = default;
- /** Initialise the kernel input and output
- *
- * @param[in] old_input Pointer to the input old tensor. Data types supported: U8
- * @param[in] old_scharr_gx Pointer to the input scharr X tensor. Data types supported: S16
- * @param[in] old_scharr_gy Pointer to the input scharr Y tensor. Data types supported: S16
- * @param[in] old_points_internal Pointer to the array of CLLKInternalKeypoint old points
- * @param[in, out] new_points_internal Pointer to the array of CLLKInternalKeypoint new points
- * @param[out] coeff_table Pointer to the array holding the Spatial Gradient coefficients
- * @param[out] old_ival Pointer to the array holding internal values
- * @param[in] window_dimension The size of the window on which to perform the algorithm
- * @param[in] level The pyramid level
- */
- void configure(const ICLTensor *old_input, const ICLTensor *old_scharr_gx, const ICLTensor *old_scharr_gy,
- ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal,
- ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival,
- size_t window_dimension, size_t level);
- /** Initialise the kernel input and output
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] old_input Pointer to the input old tensor. Data types supported: U8
- * @param[in] old_scharr_gx Pointer to the input scharr X tensor. Data types supported: S16
- * @param[in] old_scharr_gy Pointer to the input scharr Y tensor. Data types supported: S16
- * @param[in] old_points_internal Pointer to the array of CLLKInternalKeypoint old points
- * @param[in, out] new_points_internal Pointer to the array of CLLKInternalKeypoint new points
- * @param[out] coeff_table Pointer to the array holding the Spatial Gradient coefficients
- * @param[out] old_ival Pointer to the array holding internal values
- * @param[in] window_dimension The size of the window on which to perform the algorithm
- * @param[in] level The pyramid level
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *old_input, const ICLTensor *old_scharr_gx, const ICLTensor *old_scharr_gy,
- ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal,
- ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival,
- size_t window_dimension, size_t level);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_old_input;
- const ICLTensor *_old_scharr_gx;
- const ICLTensor *_old_scharr_gy;
-};
-
-/** Interface to run the second stage of LKTracker, where the motion vectors of the given points are computed */
-class CLLKTrackerStage1Kernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLLKTrackerStage1Kernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLLKTrackerStage1Kernel(const CLLKTrackerStage1Kernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLLKTrackerStage1Kernel &operator=(const CLLKTrackerStage1Kernel &) = delete;
- /** Allow instances of this class to be moved */
- CLLKTrackerStage1Kernel(CLLKTrackerStage1Kernel &&) = default;
- /** Allow instances of this class to be moved */
- CLLKTrackerStage1Kernel &operator=(CLLKTrackerStage1Kernel &&) = default;
- /** Initialise the kernel input and output
- *
- * @param[in] new_input Pointer to the input new tensor. Data types supported: U8
- * @param[in, out] new_points_internal Pointer to the array of CLLKInternalKeypoint for new points
- * @param[in] coeff_table Pointer to the array holding the Spatial Gradient coefficients
- * @param[in] old_ival Pointer to the array holding internal values
- * @param[in] termination The criteria to terminate the search of each keypoint.
- * @param[in] epsilon The error for terminating the algorithm
- * @param[in] num_iterations The maximum number of iterations before terminating the algorithm
- * @param[in] window_dimension The size of the window on which to perform the algorithm
- * @param[in] level The pyramid level
- */
- void configure(const ICLTensor *new_input, ICLLKInternalKeypointArray *new_points_internal, ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival,
- Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, size_t level);
- /** Initialise the kernel input and output
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] new_input Pointer to the input new tensor. Data types supported: U8
- * @param[in, out] new_points_internal Pointer to the array of CLLKInternalKeypoint for new points
- * @param[in] coeff_table Pointer to the array holding the Spatial Gradient coefficients
- * @param[in] old_ival Pointer to the array holding internal values
- * @param[in] termination The criteria to terminate the search of each keypoint.
- * @param[in] epsilon The error for terminating the algorithm
- * @param[in] num_iterations The maximum number of iterations before terminating the algorithm
- * @param[in] window_dimension The size of the window on which to perform the algorithm
- * @param[in] level The pyramid level
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *new_input, ICLLKInternalKeypointArray *new_points_internal, ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival,
- Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, size_t level);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_new_input;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLLKTRACKERKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h b/arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h
deleted file mode 100644
index e68160f96d..0000000000
--- a/arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLLOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H
-#define ARM_COMPUTE_CLLOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to multiply each row of first tensor with low 2 dimensions of second tensor.
- *
- * @attention The second input tensor must have at least 2 dimensions (matrix)
- *
- */
-class CLLocallyConnectedMatrixMultiplyKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLLocallyConnectedMatrixMultiplyKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLLocallyConnectedMatrixMultiplyKernel(const CLLocallyConnectedMatrixMultiplyKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLLocallyConnectedMatrixMultiplyKernel &operator=(const CLLocallyConnectedMatrixMultiplyKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLLocallyConnectedMatrixMultiplyKernel(CLLocallyConnectedMatrixMultiplyKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLLocallyConnectedMatrixMultiplyKernel &operator=(CLLocallyConnectedMatrixMultiplyKernel &&) = default;
- /** Initialise the kernel's input, output and alpha
- *
- * @param[in] input0 First input tensor. Data types supported: F32
- * @param[in] input1 Second input tensor. Data type supported: same as @p input0
- * @param[out] output Output tensor to store the result. Data type supported: same as @p input0
- */
- void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output);
- /** Initialise the kernel's input, output and alpha
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input0 First input tensor. Data types supported: F32
- * @param[in] input1 Second input tensor. Data type supported: same as @p input0
- * @param[out] output Output tensor to store the result. Data type supported: same as @p input0
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLLocallyConnectedMatrixMultiplyKernel
- *
- * @param[in] input0 First input tensor info. Data types supported: F32
- * @param[in] input1 Second input tensor info. Data type supported: same as @p input0
- * @param[in] output Output tensor info. Data type supported: same as @p input0
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input0;
- const ICLTensor *_input1;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLLOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h b/arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h
deleted file mode 100644
index e0de3e7636..0000000000
--- a/arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLMAGNITUDEPHASEKERNEL_H
-#define ARM_COMPUTE_CLMAGNITUDEPHASEKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Template interface for the kernel to compute magnitude and phase.
- *
- */
-class CLMagnitudePhaseKernel : public ICLKernel
-{
-public:
- /** Default constructor. */
- CLMagnitudePhaseKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLMagnitudePhaseKernel(const CLMagnitudePhaseKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLMagnitudePhaseKernel &operator=(const CLMagnitudePhaseKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLMagnitudePhaseKernel(CLMagnitudePhaseKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLMagnitudePhaseKernel &operator=(CLMagnitudePhaseKernel &&) = default;
- /** Initialise the kernel's input, output.
- *
- * @note At least one of output1 or output2 must be set.
- *
- * @param[in] gx The input gradient X tensor. Data types supported: S16.
- * @param[in] gy The input gradient Y tensor. Data types supported: S16.
- * @param[out] magnitude (Optional) The output tensor - Magnitude. Data types supported: S16.
- * @param[out] phase (Optional) The output tensor - Phase. Data types supported: U8.
- * @param[in] mag_type (Optional) Magnitude calculation type. Default: L2NORM.
- * @param[in] phase_type (Optional) Phase calculation type. Default: SIGNED.
- */
- void configure(const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase,
- MagnitudeType mag_type = MagnitudeType::L2NORM, PhaseType phase_type = PhaseType::SIGNED);
- /** Initialise the kernel's input, output.
- *
- * @note At least one of output1 or output2 must be set.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] gx The input gradient X tensor. Data types supported: S16.
- * @param[in] gy The input gradient Y tensor. Data types supported: S16.
- * @param[out] magnitude (Optional) The output tensor - Magnitude. Data types supported: S16.
- * @param[out] phase (Optional) The output tensor - Phase. Data types supported: U8.
- * @param[in] mag_type (Optional) Magnitude calculation type. Default: L2NORM.
- * @param[in] phase_type (Optional) Phase calculation type. Default: SIGNED.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase,
- MagnitudeType mag_type = MagnitudeType::L2NORM, PhaseType phase_type = PhaseType::SIGNED);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_gx; /**< Input gradient X. */
- const ICLTensor *_gy; /**< Input gradient Y. */
- ICLTensor *_magnitude; /**< Output - Magnitude. */
- ICLTensor *_phase; /**< Output - Phase. */
- bool _run_mag; /**< Calculate magnitude ? */
- bool _run_phase; /**< Calculate phase ? */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLMAGNITUDEPHASEKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLMeanStdDevKernel.h b/arm_compute/core/CL/kernels/CLMeanStdDevKernel.h
deleted file mode 100644
index 96b4c4ea60..0000000000
--- a/arm_compute/core/CL/kernels/CLMeanStdDevKernel.h
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLMEANSTDDEVKERNEL_H
-#define ARM_COMPUTE_CLMEANSTDDEVKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace cl
-{
-class Buffer;
-}
-
-namespace arm_compute
-{
-class ICLTensor;
-using ICLImage = ICLTensor;
-
-/** Interface for the kernel to calculate mean and standard deviation of input image pixels. */
-class CLMeanStdDevKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLMeanStdDevKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLMeanStdDevKernel(const CLMeanStdDevKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLMeanStdDevKernel &operator=(const CLMeanStdDevKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLMeanStdDevKernel(CLMeanStdDevKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLMeanStdDevKernel &operator=(CLMeanStdDevKernel &&) = default;
- /** Initialise the kernel's input and outputs.
- *
- * @param[in] input Input image. Data types supported: U8.
- * @param[out] mean Input average pixel value.
- * @param[out] global_sum Keeps global sum of pixel values (Buffer size: 1 cl_ulong).
- * @param[out] stddev (Optional) Output standard deviation of pixel values.
- * @param[out] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values (Buffer size: 1 cl_ulong).
- */
- void configure(const ICLImage *input, float *mean, cl::Buffer *global_sum, float *stddev = nullptr, cl::Buffer *global_sum_squared = nullptr);
- /** Initialise the kernel's input and outputs.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input image. Data types supported: U8.
- * @param[out] mean Input average pixel value.
- * @param[out] global_sum Keeps global sum of pixel values (Buffer size: 1 cl_ulong).
- * @param[out] stddev (Optional) Output standard deviation of pixel values.
- * @param[out] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values (Buffer size: 1 cl_ulong).
- */
- void configure(const CLCompileContext &compile_context, const ICLImage *input, float *mean, cl::Buffer *global_sum, float *stddev = nullptr, cl::Buffer *global_sum_squared = nullptr);
- /** Static function to check if given info will lead to a valid configuration of @ref CLMeanStdDevKernel.
- *
- * @param[in] input Input image info. Data types supported: U8.
- * @param[in] mean Input average pixel value.
- * @param[in] global_sum Keeps global sum of pixel values.
- * @param[in] stddev (Optional) Output standard deviation of pixel values.
- * @param[in] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, float *mean, cl::Buffer *global_sum, float *stddev = nullptr, cl::Buffer *global_sum_squared = nullptr);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
- BorderSize border_size() const override;
-
-private:
- const ICLImage *_input;
- float *_mean;
- float *_stddev;
- cl::Buffer *_global_sum;
- cl::Buffer *_global_sum_squared;
- BorderSize _border_size;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLMEANSTDDEVKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLMeanStdDevNormalizationKernel.h b/arm_compute/core/CL/kernels/CLMeanStdDevNormalizationKernel.h
deleted file mode 100644
index ff0c96e168..0000000000
--- a/arm_compute/core/CL/kernels/CLMeanStdDevNormalizationKernel.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2019-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLMEANSTDDEVNORMALIZATIONKERNEL_H
-#define ARM_COMPUTE_CLMEANSTDDEVNORMALIZATIONKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the kernel to normalize the input 2D tensor across the first dimension with respect to mean and standard deviation of the same dimension. */
-class CLMeanStdDevNormalizationKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLMeanStdDevNormalizationKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLMeanStdDevNormalizationKernel(const CLMeanStdDevNormalizationKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLMeanStdDevNormalizationKernel &operator=(const CLMeanStdDevNormalizationKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLMeanStdDevNormalizationKernel(CLMeanStdDevNormalizationKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLMeanStdDevNormalizationKernel &operator=(CLMeanStdDevNormalizationKernel &&) = default;
- /** Default destructor */
- ~CLMeanStdDevNormalizationKernel() = default;
- /** Initialise the kernel's input and outputs.
- *
- * @note If the output tensor is a nullptr, the normalization will be performed in-place.
- *
- * @param[in, out] input Source tensor with 2 dimensions. In case of @p output tensor = nullptr,
- * this tensor will store the result of the normalization. Data types supported: F16/F32.
- * @param[out] output (Optional) Destination tensor. It can be nullptr in case of in-place computation. Data type supported: same as @p input
- * @param[in] epsilon (Optional) Small float to avoid division by zero in case of zero standard deviation. Defaults to 1e-8.
- */
- void configure(ICLTensor *input, ICLTensor *output = nullptr, float epsilon = 1e-8f);
- /** Initialise the kernel's input and outputs.
- *
- * @note If the output tensor is a nullptr, the normalization will be performed in-place.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in, out] input Source tensor with 2 dimensions. In case of @p output tensor = nullptr,
- * this tensor will store the result of the normalization. Data types supported: F16/F32.
- * @param[out] output (Optional) Destination tensor. It can be nullptr in case of in-place computation. Data type supported: same as @p input
- * @param[in] epsilon (Optional) Small float to avoid division by zero in case of zero standard deviation. Defaults to 1e-8.
- */
- void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output = nullptr, float epsilon = 1e-8f);
- /** Static function to check if given info will lead to a valid configuration of @ref CLMeanStdDevNormalizationKernel
- *
- * @param[in] input Source tensor info with 2 dimensions. In case of @p output tensor info = nullptr,
- * this tensor will store the result of the normalization. Data types supported: F16/F32.
- * @param[in] output (Optional) Destination tensor info. It can be nullptr in case of in-place computation. Data type supported: same as @p input
- * @param[in] epsilon (Optional) Small float to avoid division by zero in case of zero standard deviation. Defaults to 1e-8.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output = nullptr, float epsilon = 1e-8f);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- ICLTensor *_input;
- ICLTensor *_output;
- bool _run_in_place;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLMEANSTDDEVNORMALIZATIONKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLMedian3x3Kernel.h b/arm_compute/core/CL/kernels/CLMedian3x3Kernel.h
deleted file mode 100644
index c68ab07781..0000000000
--- a/arm_compute/core/CL/kernels/CLMedian3x3Kernel.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLMEDIAN3X3KERNEL_H
-#define ARM_COMPUTE_CLMEDIAN3X3KERNEL_H
-
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the median 3x3 filter kernel.
- *
- */
-class CLMedian3x3Kernel : public ICLSimple2DKernel
-{
-public:
- /** Initialise the kernel's input and output.
- *
- * @param[in] input An input tensor. Data types supported: U8
- * @param[out] output The output tensor. Data types supported: U8.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input An input tensor. Data types supported: U8
- * @param[out] output The output tensor. Data types supported: U8.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined);
-
- // Inherited methods overridden:
- BorderSize border_size() const override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLMEDIAN3X3KERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLMemsetKernel.h b/arm_compute/core/CL/kernels/CLMemsetKernel.h
deleted file mode 100644
index 430bc1d4f2..0000000000
--- a/arm_compute/core/CL/kernels/CLMemsetKernel.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLMEMSETKERNEL_H
-#define ARM_COMPUTE_CLMEMSETKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/PixelValue.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for filling the planes of a tensor */
-class CLMemsetKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLMemsetKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLMemsetKernel(const CLMemsetKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLMemsetKernel &operator=(const CLMemsetKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLMemsetKernel(CLMemsetKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLMemsetKernel &operator=(CLMemsetKernel &&) = default;
- /** Default destructor */
- ~CLMemsetKernel() = default;
-
- /** Initialise the kernel's tensor and filling value
- *
- * @param[in,out] tensor Input tensor to fill. Supported data types: All.
- * @param[in] constant_value The value used to fill the planes of the tensor
- * @param[in] window Window to be used in case setting only part of a tensor. Default is nullptr.
- */
- void configure(ICLTensor *tensor, const PixelValue &constant_value, Window *window = nullptr);
- /** Initialise the kernel's tensor and filling value
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in,out] tensor Input tensor to fill. Supported data types: All.
- * @param[in] constant_value The value used to fill the planes of the tensor
- * @param[in] window Window to be used in case setting only part of a tensor. Default is nullptr.
- */
- void configure(const CLCompileContext &compile_context, ICLTensor *tensor, const PixelValue &constant_value, Window *window = nullptr);
- /** Static function to check if given info will lead to a valid configuration of @ref CLMemsetKernel
- *
- * @param[in] tensor Source tensor info. Data types supported: All.
- * @param[in] constant_value The value used to fill the planes of the tensor
- * @param[in] window Window to be used in case setting only part of a tensor. Default is nullptr.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *tensor, const PixelValue &constant_value, Window *window = nullptr);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- ICLTensor *_tensor;
- Window _full_window;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLMEMSETRKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLMinMaxLayerKernel.h b/arm_compute/core/CL/kernels/CLMinMaxLayerKernel.h
deleted file mode 100644
index 5f9685f303..0000000000
--- a/arm_compute/core/CL/kernels/CLMinMaxLayerKernel.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLMINMAXLAYERKERNEL_H
-#define ARM_COMPUTE_CLMINMAXLAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the kernel to perform min max search on a 3D tensor.
- */
-class CLMinMaxLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLMinMaxLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLMinMaxLayerKernel(const CLMinMaxLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLMinMaxLayerKernel &operator=(const CLMinMaxLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLMinMaxLayerKernel(CLMinMaxLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLMinMaxLayerKernel &operator=(CLMinMaxLayerKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor with at least 3 dimensions. The dimensions over the third will be interpreted as batches.Data types supported: F32.
- * @param[out] output Output tensor with shape [2, batches, ...] which stores the minimum and maximum values for each 3D input tensor.
- * The dimensions over the second must match the batched dimensions of the input tensor. Data types supported: F32.
- */
- void configure(const ICLTensor *input, ICLTensor *output);
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor with at least 3 dimensions. The dimensions over the third will be interpreted as batches.Data types supported: F32.
- * @param[out] output Output tensor with shape [2, batches, ...] which stores the minimum and maximum values for each 3D input tensor.
- * The dimensions over the second must match the batched dimensions of the input tensor. Data types supported: F32.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLMinMaxLayerKernel
- *
- * @param[in] input Input tensor info. Data types supported: F32.
- * @param[in] output Output tensor info with shape [2, batches, ...] which stores the minimum and maximum values for each 3D input tensor.
- * The dimensions over the second must match the batched dimensions of the input tensor. Data types supported: F32.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-
- /** Resets global minimum and maximum
- *
- * @param[in,out] queue Command queue on which to map and unmap the min_max tensor
- */
- void reset(cl::CommandQueue &queue);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLMINMAXLAYERKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h b/arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h
deleted file mode 100644
index afb134fa59..0000000000
--- a/arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLMINMAXLOCATIONKERNEL_H
-#define ARM_COMPUTE_CLMINMAXLOCATIONKERNEL_H
-
-#include "arm_compute/core/CL/ICLArray.h"
-#include "arm_compute/core/CL/ICLKernel.h"
-
-#include <array>
-
-namespace arm_compute
-{
-class ICLTensor;
-using ICLImage = ICLTensor;
-
-/** Interface for the kernel to perform min max search on an image.
- */
-class CLMinMaxKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLMinMaxKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLMinMaxKernel(const CLMinMaxKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLMinMaxKernel &operator=(const CLMinMaxKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLMinMaxKernel(CLMinMaxKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLMinMaxKernel &operator=(CLMinMaxKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input Image. Data types supported: U8/S16/F32.
- * @param[out] min_max Buffer of 2 elements to store the min value at position 0 and the max value at position 1. Data type supported: S32 if input type is U8/S16, F32 if input type is F32.
- */
- void configure(const ICLImage *input, cl::Buffer *min_max);
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input Image. Data types supported: U8/S16/F32.
- * @param[out] min_max Buffer of 2 elements to store the min value at position 0 and the max value at position 1. Data type supported: S32 if input type is U8/S16, F32 if input type is F32.
- */
- void configure(const CLCompileContext &compile_context, const ICLImage *input, cl::Buffer *min_max);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input; /**< Input image. */
- cl::Buffer *_min_max; /**< Minimum/maximum value. */
- std::array<int, 2> _data_type_max_min; /**< Maximum and minimum data type value respectively. */
-};
-
-/** Interface for the kernel to find min max locations of an image.
- */
-class CLMinMaxLocationKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLMinMaxLocationKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLMinMaxLocationKernel(const CLMinMaxLocationKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLMinMaxLocationKernel &operator=(const CLMinMaxLocationKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLMinMaxLocationKernel(CLMinMaxLocationKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLMinMaxLocationKernel &operator=(CLMinMaxLocationKernel &&) = default;
- /** Initialise the kernel's input and outputs.
- *
- * @note When locations of min and max occurrences are requested, the reported number of locations is limited to the given array size.
- *
- * @param[in] input Input image. Data types supported: U8/S16/F32.
- * @param[out] min_max Buffer of 2 elements to store the min value at position 0 and the max value at position 1. Data type supported: S32 if input type is U8/S16, F32 if input type is F32.
- * @param[out] min_max_count Buffer of 2 elements to store the min value occurrences at position 0 and the max value occurrences at position 1. Data type supported: S32
- * @param[out] min_loc (Optional) Array of Coordinates2D used to store minimum value locations.
- * @param[out] max_loc (Optional) Array of Coordinates2D used to store maximum value locations.
- */
- void configure(const ICLImage *input, cl::Buffer *min_max, cl::Buffer *min_max_count,
- ICLCoordinates2DArray *min_loc = nullptr, ICLCoordinates2DArray *max_loc = nullptr);
- /** Initialise the kernel's input and outputs.
- *
- * @note When locations of min and max occurrences are requested, the reported number of locations is limited to the given array size.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input image. Data types supported: U8/S16/F32.
- * @param[out] min_max Buffer of 2 elements to store the min value at position 0 and the max value at position 1. Data type supported: S32 if input type is U8/S16, F32 if input type is F32.
- * @param[out] min_max_count Buffer of 2 elements to store the min value occurrences at position 0 and the max value occurrences at position 1. Data type supported: S32
- * @param[out] min_loc (Optional) Array of Coordinates2D used to store minimum value locations.
- * @param[out] max_loc (Optional) Array of Coordinates2D used to store maximum value locations.
- */
- void configure(const CLCompileContext &compile_context, const ICLImage *input, cl::Buffer *min_max, cl::Buffer *min_max_count,
- ICLCoordinates2DArray *min_loc = nullptr, ICLCoordinates2DArray *max_loc = nullptr);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLImage *_input; /**< Input image. */
- cl::Buffer *_min_max_count; /**< Minimum/maximum value occurrences. */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLMINMAXLOCATIONKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h b/arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h
deleted file mode 100644
index 1f337356e9..0000000000
--- a/arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLNONLINEARFILTERKERNEL_H
-#define ARM_COMPUTE_CLNONLINEARFILTERKERNEL_H
-
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
-#include "arm_compute/core/Types.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the kernel to apply a non-linear filter */
-class CLNonLinearFilterKernel : public ICLSimple2DKernel
-{
-public:
- /** Default constructor */
- CLNonLinearFilterKernel();
- /** Set the source, destination and border mode of the kernel
- *
- * @param[in] input Source tensor. Data types supported: U8
- * @param[out] output Destination tensor. Data types supported: U8
- * @param[in] function Non linear function to perform
- * @param[in] mask_size Mask size. Supported sizes: 3, 5
- * @param[in] pattern Mask pattern
- * @param[in] mask The given mask. Will be used only if pattern is specified to PATTERN_OTHER
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function,
- unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask,
- bool border_undefined);
- /** Set the source, destination and border mode of the kernel
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8
- * @param[out] output Destination tensor. Data types supported: U8
- * @param[in] function Non linear function to perform
- * @param[in] mask_size Mask size. Supported sizes: 3, 5
- * @param[in] pattern Mask pattern
- * @param[in] mask The given mask. Will be used only if pattern is specified to PATTERN_OTHER
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function,
- unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask,
- bool border_undefined);
-
- // Inherited methods overridden:
- BorderSize border_size() const override;
-
-private:
- BorderSize _border_size; /**< Border size */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLNONLINEARFILTERKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h b/arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h
deleted file mode 100644
index a256bc798d..0000000000
--- a/arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLNONMAXIMASUPPRESSION3x3KERNEL_H
-#define ARM_COMPUTE_CLNONMAXIMASUPPRESSION3x3KERNEL_H
-
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface to perform Non-Maxima suppression over a 3x3 window using OpenCL
- *
- * @note Used by @ref CLFastCorners and @ref CLHarrisCorners
- */
-class CLNonMaximaSuppression3x3Kernel : public ICLSimple2DKernel
-{
-public:
- /** Initialise the kernel's sources, destinations and border mode.
- *
- * @param[in] input Source tensor. Data types supported: U8, F32. (Must be the same as the output tensor)
- * @param[out] output Destination tensor. Data types supported: U8, F32. (Must be the same as the input tensor)
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
- /** Initialise the kernel's sources, destinations and border mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8, F32. (Must be the same as the output tensor)
- * @param[out] output Destination tensor. Data types supported: U8, F32. (Must be the same as the input tensor)
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined);
-
- // Inherited methods overridden:
- BorderSize border_size() const override;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLNONMAXIMASUPPRESSION3x3KERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h b/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h
deleted file mode 100644
index 2511818ef2..0000000000
--- a/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLNORMALIZATIONLAYERKERNEL_H
-#define ARM_COMPUTE_CLNORMALIZATIONLAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the normalization layer kernel.
- */
-class CLNormalizationLayerKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLNormalizationLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLNormalizationLayerKernel(const CLNormalizationLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLNormalizationLayerKernel &operator=(const CLNormalizationLayerKernel &) = delete;
- /** Default Move Constructor. */
- CLNormalizationLayerKernel(CLNormalizationLayerKernel &&) = default;
- /** Default move assignment operator */
- CLNormalizationLayerKernel &operator=(CLNormalizationLayerKernel &&) = default;
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
- * and an optional 4th dimension for batch of inputs. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC.
- * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data types supported: same as @p input.
- * Data layouts supported: same as @p input.
- * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters.
- */
- void configure(const ICLTensor *input, ICLTensor *output, NormalizationLayerInfo norm_info);
- /** Set the input and output tensors.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
- * and an optional 4th dimension for batch of inputs. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC.
- * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data types supported: same as @p input.
- * Data layouts supported: same as @p input.
- * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, NormalizationLayerInfo norm_info);
- /** Static function to check if given info will lead to a valid configuration of @ref CLNormalizationLayerKernel
- *
- * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
- * and an optional 4th dimension for batch of inputs. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC.
- * @param[in] output Destination tensor. Output will have the same number of dimensions as input. Data types supported: same as @p input.
- * Data layouts supported: same as @p input.
- * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, NormalizationLayerInfo norm_info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
- BorderSize _border_size;
- bool _is_norm_across_width;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLNORMALIZATIONLAYERKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h b/arm_compute/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h
deleted file mode 100644
index d247e1fddc..0000000000
--- a/arm_compute/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLNORMALIZEPLANARYUVLAYERKERNEL_H
-#define ARM_COMPUTE_CLNORMALIZEPLANARYUVLAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the NormalizePlanarYUV layer kernel. */
-class CLNormalizePlanarYUVLayerKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLNormalizePlanarYUVLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLNormalizePlanarYUVLayerKernel(const CLNormalizePlanarYUVLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLNormalizePlanarYUVLayerKernel &operator=(const CLNormalizePlanarYUVLayerKernel &) = delete;
- /** Default Move Constructor. */
- CLNormalizePlanarYUVLayerKernel(CLNormalizePlanarYUVLayerKernel &&) = default;
- /** Default move assignment operator */
- CLNormalizePlanarYUVLayerKernel &operator=(CLNormalizePlanarYUVLayerKernel &&) = default;
- /** Default destructor */
- ~CLNormalizePlanarYUVLayerKernel() = default;
-
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, channels].
- * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[out] output Destination tensor. Data type supported: same as @p input
- * @param[in] mean Mean values tensor. 1 dimension with size equal to the number of input channels. Data types supported: same as @p input
- * @param[in] std Standard deviation values tensor. 1 dimension with size equal to the number of input channels.
- * Data types supported: same as @p input
- */
- void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *std);
- /** Set the input and output tensors.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, channels].
- * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[out] output Destination tensor. Data type supported: same as @p input
- * @param[in] mean Mean values tensor. 1 dimension with size equal to the number of input channels. Data types supported: same as @p input
- * @param[in] std Standard deviation values tensor. 1 dimension with size equal to the number of input channels.
- * Data types supported: same as @p input
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *std);
- /** Static function to check if given info will lead to a valid configuration of @ref CLNormalizePlanarYUVLayerKernel
- *
- * @param[in] input Source tensor info. 3 lower dimensions represent a single input with dimensions [width, height, channels].
- * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[out] output Destination tensor info. Data type supported: same as @p input
- * @param[in] mean Mean values tensor info. 1 dimension with size equal to the number of input channels. Data types supported: same as @p input
- * @param[in] std Standard deviation values tensor info. 1 dimension with size equal to the number of input channels.
- * Data types supported: same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *mean, const ITensorInfo *std);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
- const ICLTensor *_mean;
- const ICLTensor *_std;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLNORMALIZEPLANARYUVLAYERKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLPadLayerKernel.h b/arm_compute/core/CL/kernels/CLPadLayerKernel.h
deleted file mode 100644
index 166c202335..0000000000
--- a/arm_compute/core/CL/kernels/CLPadLayerKernel.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (c) 2019-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLPADLAYERKERNEL_H
-#define ARM_COMPUTE_CLPADLAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the PadLayer function. */
-class CLPadLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLPadLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLPadLayerKernel(const CLPadLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLPadLayerKernel &operator=(const CLPadLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLPadLayerKernel(CLPadLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLPadLayerKernel &operator=(CLPadLayerKernel &&) = default;
- /** Default destructor */
- ~CLPadLayerKernel() = default;
- /** Set the input and output tensor.
- *
- * @param[in] input Source tensor. Data types supported: U8, S8, QASYMM8, QASYMM8_SIGNED, U16, S16, U32, S32, F16, F32.
- * @param[out] output Output tensor. Data type supported: same as @p input
- * @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i]
- * specifies the front and the end padding in the i-th dimension.
- * @param[in] constant_value (Optional) Constant value to be used for the padding.
- * @param[in] mode (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT,
- * or reflect the input, either including the border values (SYMMETRIC) or not (REFLECT).
- */
- void configure(const ICLTensor *input, ICLTensor *output, const PaddingList &padding, PixelValue constant_value = PixelValue(), PaddingMode mode = PaddingMode::CONSTANT);
- /** Set the input and output tensor.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: All.
- * @param[out] output Output tensor. Data type supported: same as @p input
- * @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i]
- * specifies the front and the end padding in the i-th dimension.
- * @param[in] constant_value (Optional) Constant value to be used for the padding.
- * @param[in] mode (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT,
- * or reflect the input, either including the border values (SYMMETRIC) or not (REFLECT).
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PaddingList &padding, PixelValue constant_value = PixelValue(),
- PaddingMode mode = PaddingMode::CONSTANT);
- /** Static function to check if given info will lead to a valid configuration of @ref CLPadLayerKernel
- *
- * @param[in] input Source tensor info. Data types supported: U8, S8, QASYMM8, QASYMM8_SIGNED, U16, S16, U32, S32, F16, F32.
- * @param[in] output Output tensor info. Data type supported: same as @p input
- * @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i]
- * specifies the front and the end padding in the i-th dimension.
- * @param[in] constant_value (Optional) Constant value to be used for the padding.
- * @param[in] mode (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT,
- * or reflect the input, either including the border values (SYMMETRIC) or not (REFLECT).
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding, PixelValue constant_value = PixelValue(), PaddingMode mode = PaddingMode::CONSTANT);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
- int _input_start_x;
- int _input_start_y;
- bool _4d_enabled;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLPADLAYERKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLPermuteKernel.h b/arm_compute/core/CL/kernels/CLPermuteKernel.h
deleted file mode 100644
index 1a9240ef6b..0000000000
--- a/arm_compute/core/CL/kernels/CLPermuteKernel.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLPERMUTEKERNEL_H
-#define ARM_COMPUTE_CLPERMUTEKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to perform tensor permutation.
- *
- * Permutes given a permutation vector
- */
-class CLPermuteKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLPermuteKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLPermuteKernel(const CLPermuteKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLPermuteKernel &operator=(const CLPermuteKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLPermuteKernel(CLPermuteKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLPermuteKernel &operator=(CLPermuteKernel &&) = default;
- /** Set the input and output of the kernel.
- *
- * @note Arbitrary permutation vectors are supported with rank not greater than 4
- *
- * @param[in] input The input tensor to permute. Data types supported: All.
- * @param[in] output The output tensor. Data types supported: Same as @p input
- * @param[in] perm Permutation vector
- */
- void configure(const ICLTensor *input, ICLTensor *output, const PermutationVector &perm);
- /** Set the input and output of the kernel.
- *
- * @note Arbitrary permutation vectors are supported with rank not greater than 4
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input The input tensor to permute. Data types supported: All.
- * @param[in] output The output tensor. Data types supported: Same as @p input
- * @param[in] perm Permutation vector
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PermutationVector &perm);
- /** Static function to check if given info will lead to a valid configuration of @ref CLPermuteKernel
- *
- * @note Arbitrary permutation vectors are supported with rank not greater than 4
- *
- * @param[in] input First tensor input info. Data types supported: All.
- * @param[in] output Output tensor info. Data types supported: same as @p input.
- * @param[in] perm Permutation vector
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PermutationVector &perm);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
- PermutationVector _perm;
-};
-} // arm_compute
-#endif /*ARM_COMPUTE_CLPERMUTEKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h b/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h
deleted file mode 100644
index 52a09d9a49..0000000000
--- a/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h
+++ /dev/null
@@ -1,170 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLPIXELWISEMULTIPLICATIONKERNEL_H
-#define ARM_COMPUTE_CLPIXELWISEMULTIPLICATIONKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ICLTensor;
-
-/** Interface for the pixelwise multiplication kernel. */
-class CLPixelWiseMultiplicationKernel : public ICLKernel
-{
-public:
- /** Default constructor.*/
- CLPixelWiseMultiplicationKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLPixelWiseMultiplicationKernel(const CLPixelWiseMultiplicationKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLPixelWiseMultiplicationKernel &operator=(const CLPixelWiseMultiplicationKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLPixelWiseMultiplicationKernel(CLPixelWiseMultiplicationKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLPixelWiseMultiplicationKernel &operator=(CLPixelWiseMultiplicationKernel &&) = default;
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
- * @param[in] input2 An input tensor. Data types supported: same as @p input1.
- * @param[out] output The output tensor, Data types supported:
- * - U8, only if both input are U8
- * - QASYMM8, only if both inputs are QASYMM8
- * - QASYMM8_SIGNED, only if both inputs are QASYMM8_SIGNED
- * - S16
- * - QSYMM16, only if both inputs are QSYMM16
- * - S32, only if both inputs are QSYMM16
- * - F16
- * - F32
- * @param[in] scale Scale to apply after multiplication.
- * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
- * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
- * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- */
- void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float scale,
- ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
- * @param[in] input2 An input tensor. Data types supported: same as @p input1.
- * @param[out] output The output tensor, Data types supported: same as @p input1. Note: U8 requires both inputs to be U8.
- * @param[in] scale Scale to apply after multiplication.
- * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
- * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
- * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float scale,
- ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
- /** Static function to check if given info will lead to a valid configuration of @ref CLPixelWiseMultiplicationKernel
- *
- * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
- * @param[in] input2 An input tensor info. Data types supported: same as @p input1.
- * @param[in] output The output tensor info, Data types supported:
- * - U8, only if both input are U8
- * - QASYMM8, only if both inputs are QASYMM8
- * - QASYMM8_SIGNED, only if both inputs are QASYMM8_SIGNED
- * - S16
- * - QSYMM16, only if both inputs are QSYMM16
- * - S32, only if both inputs are QSYMM16
- * - F16
- * - F32
- * @param[in] scale Scale to apply after multiplication.
- * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
- * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
- * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale,
- ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- const ICLTensor *_input1;
- const ICLTensor *_input2;
- ICLTensor *_output;
-};
-
-/** Interface for the complex pixelwise multiplication kernel. */
-class CLComplexPixelWiseMultiplicationKernel : public ICLKernel
-{
-public:
- /** Default constructor.*/
- CLComplexPixelWiseMultiplicationKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLComplexPixelWiseMultiplicationKernel(const CLComplexPixelWiseMultiplicationKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLComplexPixelWiseMultiplicationKernel &operator=(const CLComplexPixelWiseMultiplicationKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLComplexPixelWiseMultiplicationKernel(CLComplexPixelWiseMultiplicationKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLComplexPixelWiseMultiplicationKernel &operator=(CLComplexPixelWiseMultiplicationKernel &&) = default;
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] input1 An input tensor. Data types supported: F32. Number of channels supported: 2.
- * @param[in] input2 An input tensor. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
- * @param[out] output The output tensor, Data types supported: same as @p input1. Number of channels supported: same as @p input1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- */
- void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input1 An input tensor. Data types supported: F32. Number of channels supported: 2.
- * @param[in] input2 An input tensor. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
- * @param[out] output The output tensor, Data types supported: same as @p input1. Number of channels supported: same as @p input1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
- /** Static function to check if given info will lead to a valid configuration of @ref CLComplexPixelWiseMultiplicationKernel
- *
- * @param[in] input1 An input tensor info. Data types supported: F32. Number of channels supported: 2.
- * @param[in] input2 An input tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
- * @param[in] output The output tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- const ICLTensor *_input1;
- const ICLTensor *_input2;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLPIXELWISEMULTIPLICATIONKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h b/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h
deleted file mode 100644
index 395750440c..0000000000
--- a/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLPOOLINGLAYERKERNEL_H
-#define ARM_COMPUTE_CLPOOLINGLAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-#include "arm_compute/core/Error.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the pooling layer kernel */
-class CLPoolingLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLPoolingLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLPoolingLayerKernel(const CLPoolingLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLPoolingLayerKernel &operator=(const CLPoolingLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLPoolingLayerKernel(CLPoolingLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLPoolingLayerKernel &operator=(CLPoolingLayerKernel &&) = default;
- /** Default destructor */
- ~CLPoolingLayerKernel() = default;
-
- /** Set the input and output tensors.
- *
- *
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[out] output Destination tensor. Data types supported: Same as @p input.
- * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
- * @param[out] indices (optional) The indices of the maximal values. Data type supported: U32.
- */
- void configure(const ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info, ICLTensor *indices = nullptr);
- /** Set the input and output tensors.
- *
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[out] output Destination tensor. Data types supported: Same as @p input.
- * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
- * @param[out] indices (optional) The indices of the maximal values. Data type supported: U32.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info, ICLTensor *indices = nullptr);
- /** Static function to check if given info will lead to a valid configuration of @ref CLPoolingLayerKernel
- *
- * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] output Destination tensor info. Data types supported: Same as @p input.
- * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
- * @param[in] indices (optional) The indices of the maximal values. Data type supported: U32.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices = nullptr);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-public:
- const ICLTensor *_input;
- ICLTensor *_output;
- ICLTensor *_indices;
- PoolingLayerInfo _pool_info;
- DataLayout _data_layout;
- BorderSize _border_size;
- unsigned int _num_elems_processed_per_iteration;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLPOOLINGLAYERKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLPriorBoxLayerKernel.h b/arm_compute/core/CL/kernels/CLPriorBoxLayerKernel.h
deleted file mode 100644
index 5fd27d9233..0000000000
--- a/arm_compute/core/CL/kernels/CLPriorBoxLayerKernel.h
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLPRIORBOXLAYERKERNEL_H
-#define ARM_COMPUTE_CLPRIORBOXLAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the PriorBox layer kernel. */
-class CLPriorBoxLayerKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLPriorBoxLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLPriorBoxLayerKernel(const CLPriorBoxLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLPriorBoxLayerKernel &operator=(const CLPriorBoxLayerKernel &) = delete;
- /** Default Move Constructor. */
- CLPriorBoxLayerKernel(CLPriorBoxLayerKernel &&) = default;
- /** Default move assignment operator */
- CLPriorBoxLayerKernel &operator=(CLPriorBoxLayerKernel &&) = default;
- /** Default destructor */
- ~CLPriorBoxLayerKernel() = default;
-
- /** Set the input and output tensors.
- *
- * @param[in] input1 First source tensor. Data types supported: F32. Data layouts supported: NCHW/NHWC.
- * @param[in] input2 Second source tensor. Data types and layouts supported: same as @p input1
- * @param[out] output Destination tensor. Output dimensions are [W * H * num_priors * 4, 2]. Data types and layouts supported: same as @p input1
- * @param[in] info Prior box layer info.
- * @param[in] min Minimum prior box values
- * @param[in] max Maximum prior box values
- * @param[in] aspect_ratios Aspect ratio values
- */
- void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const PriorBoxLayerInfo &info, cl::Buffer *min, cl::Buffer *max, cl::Buffer *aspect_ratios);
- /** Set the input and output tensors.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input1 First source tensor. Data types supported: F32. Data layouts supported: NCHW/NHWC.
- * @param[in] input2 Second source tensor. Data types and layouts supported: same as @p input1
- * @param[out] output Destination tensor. Output dimensions are [W * H * num_priors * 4, 2]. Data types and layouts supported: same as @p input1
- * @param[in] info Prior box layer info.
- * @param[in] min Minimum prior box values
- * @param[in] max Maximum prior box values
- * @param[in] aspect_ratios Aspect ratio values
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const PriorBoxLayerInfo &info, cl::Buffer *min, cl::Buffer *max,
- cl::Buffer *aspect_ratios);
- /** Static function to check if given info will lead to a valid configuration of @ref CLPriorBoxLayerKernel
- *
- * @param[in] input1 First source tensor info. Data types supported: F32. Data layouts supported: NCHW/NHWC.
- * @param[in] input2 Second source tensor info. Data types and layouts supported: same as @p input1
- * @param[in] output Destination tensor info. Output dimensions are [W * H * num_priors * 4, 2]. Data type supported: same as @p input1
- * @param[in] info Prior box layer info.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const PriorBoxLayerInfo &info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input1;
- const ICLTensor *_input2;
- ICLTensor *_output;
- PriorBoxLayerInfo _info;
- int _num_priors;
- cl::Buffer *_min;
- cl::Buffer *_max;
- cl::Buffer *_aspect_ratios;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLPRIORBOXLAYERKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h b/arm_compute/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h
deleted file mode 100644
index 2d4707245f..0000000000
--- a/arm_compute/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Copyright (c) 2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLQLSTMLAYERVNORMALIZATIONKERNEL_H
-#define ARM_COMPUTE_CLQLSTMLAYERVNORMALIZATIONKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the kernel to do layer normalization. */
-class CLQLSTMLayerNormalizationKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLQLSTMLayerNormalizationKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLQLSTMLayerNormalizationKernel(const CLQLSTMLayerNormalizationKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLQLSTMLayerNormalizationKernel &operator=(const CLQLSTMLayerNormalizationKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLQLSTMLayerNormalizationKernel(CLQLSTMLayerNormalizationKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLQLSTMLayerNormalizationKernel &operator=(CLQLSTMLayerNormalizationKernel &&) = default;
- /** Default destructor */
- ~CLQLSTMLayerNormalizationKernel() = default;
- /** Initialise the kernel's input and outputs.
- *
- * @param[in] input Source tensor with 2 dimensions. Data types supported: QSYMM16.
- * @param[out] output Destination tensor. Data type supported: same as @p input
- * @param[in] weight Weight tensor. Data types supported: Same as @p input.
- * @param[in] bias Bias tensor. Data types supported: S32.
- *
- */
- void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *weight, const ICLTensor *bias);
- /** Initialise the kernel's input and outputs.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor with 2 dimensions. Data types supported: QSYMM16.
- * @param[out] output Destination tensor. Data type supported: same as @p input
- * @param[in] weight Weight tensor. Data types supported: Same as @p input.
- * @param[in] bias Bias tensor. Data types supported: S32.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *weight, const ICLTensor *bias);
- /** Static function to check if given info will lead to a valid configuration of @ref CLQLSTMLayerNormalizationKernel
- *
- * @param[in] input Source tensor info with 2 dimensions. Data types supported: QSYMM16.
- * @param[in] output Destination info tensor. Data type supported: same as @p input
- * @param[in] weight Weight info tensor. Data types supported: Same as @p input.
- * @param[in] bias Bias tensor info. Data types supported: S32.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *weight, const ITensorInfo *bias);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- const ICLTensor *_weight;
- const ICLTensor *_bias;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLQLSTMLAYERVNORMALIZATIONKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLQuantizationLayerKernel.h b/arm_compute/core/CL/kernels/CLQuantizationLayerKernel.h
deleted file mode 100644
index de30447e17..0000000000
--- a/arm_compute/core/CL/kernels/CLQuantizationLayerKernel.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLQUANTIZATIONLAYERKERNEL_H
-#define ARM_COMPUTE_CLQUANTIZATIONLAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the quantization layer kernel.
- *
- * @note The implementation supports only 3D input tensors.
- */
-class CLQuantizationLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLQuantizationLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLQuantizationLayerKernel(const CLQuantizationLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLQuantizationLayerKernel &operator=(const CLQuantizationLayerKernel &) = delete;
- /** Default Move Constructor. */
- CLQuantizationLayerKernel(CLQuantizationLayerKernel &&) = default;
- /** Default move assignment operator */
- CLQuantizationLayerKernel &operator=(CLQuantizationLayerKernel &&) = default;
- /** Default destructor */
- ~CLQuantizationLayerKernel() = default;
- /** Set the input, output.
- *
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
- * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16.
- *
- * @note Output auto initialization is not supported by this kernel
- */
- void configure(const ICLTensor *input, ICLTensor *output);
- /** Set the input, output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
- * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16.
- *
- * @note Output auto initialization is not supported by this kernel
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLQuantizationLayerKernel
- *
- * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
- * @param[in] output Destination tensor info with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLQUANTIZATIONLAYERKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLROIAlignLayerKernel.h b/arm_compute/core/CL/kernels/CLROIAlignLayerKernel.h
deleted file mode 100644
index 30bdbb1844..0000000000
--- a/arm_compute/core/CL/kernels/CLROIAlignLayerKernel.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLROIALIGNLAYERKERNEL_H
-#define ARM_COMPUTE_CLROIALIGNLAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLArray.h"
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the RoIAlign kernel.
- */
-class CLROIAlignLayerKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLROIAlignLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLROIAlignLayerKernel(const CLROIAlignLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLROIAlignLayerKernel &operator=(const CLROIAlignLayerKernel &) = delete;
- /** Default Move Constructor. */
- CLROIAlignLayerKernel(CLROIAlignLayerKernel &&) = default;
- /** Default move assignment operator. */
- CLROIAlignLayerKernel &operator=(CLROIAlignLayerKernel &&) = default;
- /** Default destructor */
- ~CLROIAlignLayerKernel() = default;
-
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner
- * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ].
- * Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8/QASYMM8_SIGNED, otherwise same as @p input
- * @param[out] output Destination tensor. Data types supported: Same as @p input.
- * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo.
- *
- * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled
- * width and pooled height.
- * @note The z dimensions of @p output tensor and @p input tensor must be the same.
- * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
- */
- void configure(const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info);
- /** Set the input and output tensors.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner
- * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ].
- * Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8/QASYMM8_SIGNED, otherwise same as @p input
- * @param[out] output Destination tensor. Data types supported: Same as @p input.
- * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo.
- *
- * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled
- * width and pooled height.
- * @note The z dimensions of @p output tensor and @p input tensor must be the same.
- * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info);
- /** Static function to check if given info will lead to a valid configuration of @ref CLROIAlignLayerKernel
- *
- * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] rois ROIs tensor info. Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8/QASYMM8_SIGNED,
- * otherwise same as @p input
- * @param[in] output Destination tensor info. Data types supported: Same as @p input.
- * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo.
- *
- * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled
- * width and pooled height.
- * @note The z dimensions of @p output tensor and @p input tensor must be the same.
- * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
- *
- * @return a Status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *rois, ITensorInfo *output, const ROIPoolingLayerInfo &pool_info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue);
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
- const ICLTensor *_rois;
- ROIPoolingLayerInfo _pool_info;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLROIALIGNLAYERKERNEL_H*/
diff --git a/arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h b/arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h
deleted file mode 100644
index ea70a58188..0000000000
--- a/arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLROIPOOLINGLAYERKERNEL_H
-#define ARM_COMPUTE_CLROIPOOLINGLAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-#include "arm_compute/core/CL/ICLArray.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the ROI pooling layer kernel */
-class CLROIPoolingLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLROIPoolingLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLROIPoolingLayerKernel(const CLROIPoolingLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLROIPoolingLayerKernel &operator=(const CLROIPoolingLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLROIPoolingLayerKernel(CLROIPoolingLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLROIPoolingLayerKernel &operator=(CLROIPoolingLayerKernel &&) = default;
- /** Default destructor */
- ~CLROIPoolingLayerKernel() = default;
-
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. Data types supported: F16/F32.
- * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner
- * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. Data types supported: U16
- * @param[out] output Destination tensor. Data types supported: Same as @p input.
- * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo.
- *
- * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled
- * width and pooled height.
- * @note The z dimensions of @p output tensor and @p input tensor must be the same.
- * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
- */
- void configure(const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info);
- /** Set the input and output tensors.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: F16/F32.
- * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner
- * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. Data types supported: U16
- * @param[out] output Destination tensor. Data types supported: Same as @p input.
- * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo.
- *
- * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled
- * width and pooled height.
- * @note The z dimensions of @p output tensor and @p input tensor must be the same.
- * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- const ICLTensor *_rois;
- ICLTensor *_output;
- ROIPoolingLayerInfo _pool_info;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLROIPOOLINGLAYERKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLRangeKernel.h b/arm_compute/core/CL/kernels/CLRangeKernel.h
deleted file mode 100644
index fc8db98bf9..0000000000
--- a/arm_compute/core/CL/kernels/CLRangeKernel.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLRANGEKERNEL_H
-#define ARM_COMPUTE_CLRANGEKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Kernel class for Range
- *
- * range generates a 1-D tensor containing a sequence of numbers that begins at 'start' and extends by increments
- * of 'step' up to but not including 'end'.
- */
-class CLRangeKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLRangeKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLRangeKernel(const CLRangeKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLRangeKernel &operator=(const CLRangeKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLRangeKernel(CLRangeKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLRangeKernel &operator=(CLRangeKernel &&) = default;
- /** Default destructor */
- ~CLRangeKernel() = default;
- /** Initialize the kernel's output tensor, start, end and step of the sequence.
- *
- * @param[out] output Output tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
- * @param[in] start The starting value of the sequence.
- * @param[in] end The ending (not including) value of the sequence.
- * @param[in] step The gap between each pair of values in the sequence.
- */
- void configure(ICLTensor *output, float start, float end, float step);
- /** Initialize the kernel's output tensor, start, end and step of the sequence.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[out] output Output tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
- * @param[in] start The starting value of the sequence.
- * @param[in] end The ending (not including) value of the sequence.
- * @param[in] step The gap between each pair of values in the sequence.
- */
- void configure(const CLCompileContext &compile_context, ICLTensor *output, float start, float end, float step);
- /** Static function to check if given info will lead to a valid configuration of @ref CLRangeKernel
- *
- * @param[in] output Output tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
- * @param[in] start The starting value of the sequence.
- * @param[in] end The ending (not including) value of the sequence.
- * @param[in] step The gap between each pair of values in the sequence.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *output, float start, float end, float step);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- float _start; /**< Start of sequence */
- float _end; /**< End of sequence */
- float _step; /**< Increment/step value */
- ICLTensor *_output; /**< Destination tensor */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLRANGEKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLReductionOperationKernel.h b/arm_compute/core/CL/kernels/CLReductionOperationKernel.h
deleted file mode 100644
index 0b0b4ae9b0..0000000000
--- a/arm_compute/core/CL/kernels/CLReductionOperationKernel.h
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLREDUCTIONOPERATIONKERNEL_H
-#define ARM_COMPUTE_CLREDUCTIONOPERATIONKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the reduction operation kernel
- */
-class CLReductionOperationKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLReductionOperationKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLReductionOperationKernel(const CLReductionOperationKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLReductionOperationKernel &operator=(const CLReductionOperationKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLReductionOperationKernel(CLReductionOperationKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLReductionOperationKernel &operator=(CLReductionOperationKernel &&) = default;
- /** Default destructor */
- ~CLReductionOperationKernel() = default;
-
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/S32/F16/F32.
- * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input.
- * Output will have the same number of dimensions as input.
- * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3
- * @param[in] op Reduction operation to perform. Operations supported: MEAN_SUM, PROD, SUM_SQUARE, SUM, MIN, MAX
- * @param[in] width (Optional) In case of x-axis we also need to provide the width of the input image.
- */
- void configure(const ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op, unsigned int width = 0);
- /** Set the input and output tensors.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/S32/F16/F32.
- * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input.
- * Output will have the same number of dimensions as input.
- * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3
- * @param[in] op Reduction operation to perform. Operations supported: MEAN_SUM, PROD, SUM_SQUARE, SUM, MIN, MAX
- * @param[in] width (Optional) In case of x-axis we also need to provide the width of the input image.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op, unsigned int width = 0);
-
- /** Static function to check if given info will lead to a valid configuration of @ref CLReductionOperationKernel.
- *
- * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/S32/F16/F32.
- * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p input.
- * Output will have the same number of dimensions as input.
- * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3
- * @param[in] op Reduction operation to perform. Operations supported: MEAN_SUM, PROD, SUM_SQUARE, SUM, MIN, MAX
- * @param[in] width (Optional) In case of x-axis we also need to provide the width of the input image.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op, unsigned int width = 0);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
- unsigned int _reduction_axis;
- ReductionOperation _op;
- BorderSize _border_size;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLREDUCTIONOPERATIONKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLRemapKernel.h b/arm_compute/core/CL/kernels/CLRemapKernel.h
deleted file mode 100644
index f3d1511905..0000000000
--- a/arm_compute/core/CL/kernels/CLRemapKernel.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLREMAPKERNEL_H
-#define ARM_COMPUTE_CLREMAPKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to perform a remap on a tensor */
-class CLRemapKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLRemapKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLRemapKernel(const CLRemapKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLRemapKernel &operator=(const CLRemapKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLRemapKernel(CLRemapKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLRemapKernel &operator=(CLRemapKernel &&) = default;
- /** Initialize the kernel's input, output and border mode.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[in] map_x Map for X coordinates. Data types supported: F32.
- * @param[in] map_y Map for Y coordinates. Data types supported: F32.
- * @param[out] output Destination tensor. Data types supported: U8. All but the lowest two dimensions must be the same size as in the input tensor, i.e. remapping is only performed within the XY-plane.
- * @param[in] policy The interpolation type.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, InterpolationPolicy policy, bool border_undefined);
- /** Initialize the kernel's input, output and border mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[in] map_x Map for X coordinates. Data types supported: F32.
- * @param[in] map_y Map for Y coordinates. Data types supported: F32.
- * @param[out] output Destination tensor. Data types supported: U8. All but the lowest two dimensions must be the same size as in the input tensor, i.e. remapping is only performed within the XY-plane.
- * @param[in] policy The interpolation type.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, InterpolationPolicy policy, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
- const ICLTensor *_map_x;
- const ICLTensor *_map_y;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLREMAPKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLReorgLayerKernel.h b/arm_compute/core/CL/kernels/CLReorgLayerKernel.h
deleted file mode 100644
index 9c064858af..0000000000
--- a/arm_compute/core/CL/kernels/CLReorgLayerKernel.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLREORGLAYERKERNEL_H
-#define ARM_COMPUTE_CLREORGLAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to perform a reorg layer */
-class CLReorgLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLReorgLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- CLReorgLayerKernel(const CLReorgLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- CLReorgLayerKernel &operator=(const CLReorgLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLReorgLayerKernel(CLReorgLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLReorgLayerKernel &operator=(CLReorgLayerKernel &&) = default;
- /** Initialize the kernel's input, output.
- *
- * @param[in] input Source tensor. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/F16/U32/S32/F32.
- * @param[out] output Destination tensor with tensor shape:
- * [width_input / stride, height_input / stride, channels_input * stride * stride, batch_size]. This means the output has
- * the same number of input elements. Data types supported: same as @p input.
- * @param[in] stride Stride value to use for reorganizing the values in the output tensor.
- * It defines the spatial distance between 2 consecutive pixels in the x and y direction
- */
- void configure(const ICLTensor *input, ICLTensor *output, int32_t stride);
- /** Initialize the kernel's input, output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/F16/U32/S32/F32.
- * @param[out] output Destination tensor with tensor shape:
- * [width_input / stride, height_input / stride, channels_input * stride * stride, batch_size]. This means the output has
- * the same number of input elements. Data types supported: same as @p input.
- * @param[in] stride Stride value to use for reorganizing the values in the output tensor.
- * It defines the spatial distance between 2 consecutive pixels in the x and y direction
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t stride);
- /** Static function to check if given info will lead to a valid configuration of @ref CLReorgLayerKernel
- *
- * @param[in] input Source tensor. Data types supported: All.
- * @param[in] output Destination tensor with tensor shape:
- * [width_input / stride, height_input / stride, channels_input * stride * stride, batch_size]. This means the output has
- * the same number of input elements. Data types supported: same as @p input. Data types supported: same as @p input.
- * @param[in] stride Stride value to use for reorganizing the values in the output tensor
- * It defines the spatial distance between 2 consecutive pixels in the x and y direction
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t stride);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLREORGLAYERKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLReshapeLayerKernel.h b/arm_compute/core/CL/kernels/CLReshapeLayerKernel.h
deleted file mode 100644
index 3ea74114d0..0000000000
--- a/arm_compute/core/CL/kernels/CLReshapeLayerKernel.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLRESHAPELAYERKERNEL_H
-#define ARM_COMPUTE_CLRESHAPELAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the kernel to perform tensor reshaping */
-class CLReshapeLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLReshapeLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLReshapeLayerKernel(const CLReshapeLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLReshapeLayerKernel &operator=(const CLReshapeLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLReshapeLayerKernel(CLReshapeLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLReshapeLayerKernel &operator=(CLReshapeLayerKernel &&) = default;
- /** Default destructor */
- ~CLReshapeLayerKernel() = default;
- /** Set the input and output of the kernel
- *
- * @param[in] input Source tensor. Data type supported: All.
- * @param[out] output Destination tensor. Data type supported: Same as @p input
- */
- void configure(const ICLTensor *input, ICLTensor *output);
- /** Set the input and output of the kernel
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data type supported: All.
- * @param[out] output Destination tensor. Data type supported: Same as @p input
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
-
- /** Static function to check if given info will lead to a valid configuration of @ref CLReshapeLayerKernel
- *
- * @param[in] input Source tensor info. Data type supported: All
- * @param[in] output Destination tensor info. Data type supported: Same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input; /**< Source tensor */
- ICLTensor *_output; /**< Destination tensor */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLRESHAPELAYERKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLReverseKernel.h b/arm_compute/core/CL/kernels/CLReverseKernel.h
deleted file mode 100644
index e8f4507969..0000000000
--- a/arm_compute/core/CL/kernels/CLReverseKernel.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLREVERSEKERNEL_H
-#define ARM_COMPUTE_CLREVERSEKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the reverse kernel */
-class CLReverseKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLReverseKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLReverseKernel(const CLReverseKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLReverseKernel &operator=(const CLReverseKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLReverseKernel(CLReverseKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLReverseKernel &operator=(CLReverseKernel &&) = default;
- /** Default destructor */
- ~CLReverseKernel() = default;
- /** Initialise the kernel's inputis and output
- *
- * @param[in] input Input tensor. Data types supported: All.
- * @param[out] output Output tensor. Data type supported: Same as @p input
- * @param[in] axis Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32
- */
- void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *axis);
- /** Initialise the kernel's inputis and output
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor. Data types supported: All.
- * @param[out] output Output tensor. Data type supported: Same as @p input
- * @param[in] axis Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *axis);
-
- /** Static function to check if given info will lead to a valid configuration of @ref CLReverseKernel
- *
- * @param[in] input Input tensor info. Data types supported: All.
- * @param[in] output Output tensor info. Data type supported: Same as @p input
- * @param[in] axis Axis tensor info. Contains the indices of the dimensions to reverse. Data type supported: U32
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *axis);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-public:
- const ICLTensor *_input;
- ICLTensor *_output;
- const ICLTensor *_axis;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLREVERSEKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLScaleKernel.h b/arm_compute/core/CL/kernels/CLScaleKernel.h
deleted file mode 100644
index 328578d88c..0000000000
--- a/arm_compute/core/CL/kernels/CLScaleKernel.h
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLSCALEKERNEL_H
-#define ARM_COMPUTE_CLSCALEKERNEL_H
-
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the scale kernel */
-class CLScaleKernel : public ICLSimple2DKernel
-{
-public:
- /** Initialise the kernel's inputs, output and interpolation policy
- *
- * @param[in] input Source tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32
- * @param[out] output Destination tensor. Data types supported: Same as @p input
- * All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
- * @param[in] policy Interpolation type to use
- * @param[in] border_mode Selected border mode.
- * @param[in] sampling_policy (Optional) Sampling policy used by the interpolation. Defaults to @ref SamplingPolicy::CENTER
- * @param[in] align_corners (Optional) Align corners of input and output, only affecting bilinear policy with TOP_LEFT sampling policy. Defaults to false.
- */
- void configure(const ICLTensor *input, ICLTensor *output, InterpolationPolicy policy, BorderMode border_mode, SamplingPolicy sampling_policy = SamplingPolicy::CENTER, bool align_corners = false);
- /** Initialise the kernel's inputs, output and interpolation policy
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32
- * @param[out] output Destination tensor. Data types supported: Same as @p input
- * All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
- * @param[in] policy Interpolation type to use
- * @param[in] border_mode Selected border mode.
- * @param[in] sampling_policy (Optional) Sampling policy used by the interpolation. Defaults to @ref SamplingPolicy::CENTER
- * @param[in] align_corners (Optional) Align corners of input and output, only affecting bilinear policy with TOP_LEFT sampling policy. Defaults to false.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, InterpolationPolicy policy, BorderMode border_mode,
- SamplingPolicy sampling_policy = SamplingPolicy::CENTER, bool align_corners = false);
-
- /** Static function to check if given info will lead to a valid configuration of @ref CLScaleKernel
- *
- * @param[in] input Source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32
- * @param[in] output Destination tensor info. Data types supported: Same as @p input
- * All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
- * @param[in] policy Interpolation type to use
- * @param[in] border_mode Selected border mode.
- * @param[in] sampling_policy (Optional) Sampling policy used by the interpolation. Defaults to @ref SamplingPolicy::CENTER
- * @param[in] align_corners (Optional) Align corners of input and output, only affecting bilinear policy with TOP_LEFT sampling policy. Defaults to false.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, InterpolationPolicy policy, BorderMode border_mode, SamplingPolicy sampling_policy = SamplingPolicy::CENTER,
- bool align_corners = false);
- /** Input tensor accessor.
- *
- * @return Pointer to input tensor.
- */
- const ICLTensor *input() const;
- /** Output tensor accessor.
- *
- * @return Pointer to output tensor.
- */
- const ICLTensor *output() const;
-
- // Inherited methods overridden:
- BorderSize border_size() const override;
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-public:
- InterpolationPolicy _interpolationPolicy = InterpolationPolicy::BILINEAR;
- DataLayout _data_layout = DataLayout::UNKNOWN;
- bool _align_corners = false;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLSCALEKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLScharr3x3Kernel.h b/arm_compute/core/CL/kernels/CLScharr3x3Kernel.h
deleted file mode 100644
index 209a150a67..0000000000
--- a/arm_compute/core/CL/kernels/CLScharr3x3Kernel.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLSCHARR3X3KERNEL_H
-#define ARM_COMPUTE_CLSCHARR3X3KERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the kernel to run a 3x3 Scharr filter on a tensor.
- *
- * @f[
- * \mathbf{G}_x=\begin{vmatrix}
- * -3 & 0 & +3\\
- * -10& 0 & +10\\
- * -3 & 0 & +3
- * \end{vmatrix}
- * @f]
- * @f[
- * \mathbf{G}_y=\begin{vmatrix}
- * -3 & -10 & -3\\
- * 0 & 0 & 0\\
- * +3 & +10 & +3
- * \end{vmatrix}
- * @f]
- */
-class CLScharr3x3Kernel : public ICLKernel
-{
-public:
- /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */
- CLScharr3x3Kernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLScharr3x3Kernel(const CLScharr3x3Kernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLScharr3x3Kernel &operator=(const CLScharr3x3Kernel &) = delete;
- /** Allow instances of this class to be moved */
- CLScharr3x3Kernel(CLScharr3x3Kernel &&) = default;
- /** Allow instances of this class to be moved */
- CLScharr3x3Kernel &operator=(CLScharr3x3Kernel &&) = default;
- /** Initialise the kernel's source, destination and border.
- *
- * @note At least one of output_x or output_y must be set.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
- /** Initialise the kernel's source, destination and border.
- *
- * @note At least one of output_x or output_y must be set.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- bool _run_scharr_x; /**< Do we need to run Scharr X ? */
- bool _run_scharr_y; /**< Do we need to run Scharr Y ? */
- const ICLTensor *_input; /**< Input image */
- ICLTensor *_output_x; /**< Output image for scharr X */
- ICLTensor *_output_y; /**< Output image for scharr Y */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLSCHARR3X3KERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLSelectKernel.h b/arm_compute/core/CL/kernels/CLSelectKernel.h
deleted file mode 100644
index 5cbd985cda..0000000000
--- a/arm_compute/core/CL/kernels/CLSelectKernel.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLSELECTKERNEL_H
-#define ARM_COMPUTE_CLSELECTKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ICLTensor;
-
-/** OpenCL interface for executing the select kernel
- *
- * Select is computed by:
- * @f[ output(i) = condition(i) ? x(i) : y(i) @f]
- **/
-class CLSelectKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLSelectKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLSelectKernel(const CLSelectKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLSelectKernel &operator=(const CLSelectKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLSelectKernel(CLSelectKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLSelectKernel &operator=(CLSelectKernel &&) = default;
- /** Default destructor */
- ~CLSelectKernel() = default;
- /** Initialise the kernel's inputs and output.
- *
- * @param[in] c Condition input tensor. Data types supported: U8.
- * @param[in] x First input tensor. Data types supported: All.
- * @param[out] y Second input tensor. Data types supported: Same as @p x
- * @param[in] output Output tensor. Data types supported: Same as @p x.
- */
- void configure(const ICLTensor *c, const ICLTensor *x, const ICLTensor *y, ICLTensor *output);
- /** Initialise the kernel's inputs and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] c Condition input tensor. Data types supported: U8.
- * @param[in] x First input tensor. Data types supported: All.
- * @param[out] y Second input tensor. Data types supported: Same as @p x
- * @param[in] output Output tensor. Data types supported: Same as @p x.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *c, const ICLTensor *x, const ICLTensor *y, ICLTensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLSelectKernel
- *
- * @param[in] c Condition input tensor. Data types supported: U8.
- * @param[in] x First input tensor. Data types supported: All.
- * @param[in] y Second input tensor. Data types supported: Same as @p x
- * @param[in] output Output tensor. Data types supported: Same as @p x.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *c, const ITensorInfo *x, const ITensorInfo *y, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_c; /**< Condition tensor */
- const ICLTensor *_x; /**< Source tensor 1 */
- const ICLTensor *_y; /**< Source tensor 2 */
- ICLTensor *_output; /**< Destination tensor */
- bool _has_same_rank; /**< Flag that indicates if condition tensor and other inputs have the same rank */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLWHEREKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLSobel3x3Kernel.h b/arm_compute/core/CL/kernels/CLSobel3x3Kernel.h
deleted file mode 100644
index 4240fe80b3..0000000000
--- a/arm_compute/core/CL/kernels/CLSobel3x3Kernel.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLSOBEL3X3KERNEL_H
-#define ARM_COMPUTE_CLSOBEL3X3KERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the kernel to run a 3x3 Sobel filter on a tensor. */
-class CLSobel3x3Kernel : public ICLKernel
-{
-public:
- /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */
- CLSobel3x3Kernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLSobel3x3Kernel(const CLSobel3x3Kernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLSobel3x3Kernel &operator=(const CLSobel3x3Kernel &) = delete;
- /** Allow instances of this class to be moved */
- CLSobel3x3Kernel(CLSobel3x3Kernel &&) = default;
- /** Allow instances of this class to be moved */
- CLSobel3x3Kernel &operator=(CLSobel3x3Kernel &&) = default;
- /** Default destructor */
- ~CLSobel3x3Kernel() = default;
- /** Initialise the kernel's source, destination and border.
- *
- * @note At least one of output_x or output_y must be set.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
- /** Initialise the kernel's source, destination and border.
- *
- * @note At least one of output_x or output_y must be set.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- const ICLTensor *_input; /**< Input tensor */
- ICLTensor *_output_x; /**< Output tensor for Sobel X */
- ICLTensor *_output_y; /**< Output tensor for Sobel Y */
- bool _run_sobel_x; /**< Do we need to run Sobel X ? */
- bool _run_sobel_y; /**< Do we need to run Sobel Y ? */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLSOBEL3X3KERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLSobel5x5Kernel.h b/arm_compute/core/CL/kernels/CLSobel5x5Kernel.h
deleted file mode 100644
index ef30f0ec93..0000000000
--- a/arm_compute/core/CL/kernels/CLSobel5x5Kernel.h
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLSOBEL5X5KERNEL_H
-#define ARM_COMPUTE_CLSOBEL5X5KERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the kernel to run the horizontal pass of 5x5 Sobel filter on a tensor. */
-class CLSobel5x5HorKernel : public ICLKernel
-{
-public:
- /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */
- CLSobel5x5HorKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLSobel5x5HorKernel(const CLSobel5x5HorKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLSobel5x5HorKernel &operator=(const CLSobel5x5HorKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLSobel5x5HorKernel(CLSobel5x5HorKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLSobel5x5HorKernel &operator=(CLSobel5x5HorKernel &&) = default;
- /** Default destructor */
- ~CLSobel5x5HorKernel() = default;
-
- /** Initialise the kernel's source, destination and border.
- *
- * @note At least one of output_x or output_y must be set.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
- /** Initialise the kernel's source, destination and border.
- *
- * @note At least one of output_x or output_y must be set.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- const ICLTensor *_input; /**< Input tensor */
- ICLTensor *_output_x; /**< X output of horizontal pass */
- ICLTensor *_output_y; /**< Y output of horizontal pass */
- bool _run_sobel_x; /**< Do we need to run Sobel X ? */
- bool _run_sobel_y; /**< Do we need to run Sobel Y ? */
- BorderSize _border_size; /**< Border size */
-};
-
-/** Interface for the kernel to run the vertical pass of 5x5 Sobel filter on a tensor. */
-class CLSobel5x5VertKernel : public ICLKernel
-{
-public:
- /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */
- CLSobel5x5VertKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLSobel5x5VertKernel(const CLSobel5x5VertKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLSobel5x5VertKernel &operator=(const CLSobel5x5VertKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLSobel5x5VertKernel(CLSobel5x5VertKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLSobel5x5VertKernel &operator=(CLSobel5x5VertKernel &&) = default;
- /** Default destructor */
- ~CLSobel5x5VertKernel() = default;
-
- /** Initialise the kernel's source, destination and border.
- *
- * @note At least one of output_x or output_y must be set and the corresponding input.
- *
- * @param[in] input_x (Optional) Input for X (X output of horizontal pass). Data types supported: S16.
- * @param[in] input_y (Optional) Input for Y (Y output of horizontal pass). Data types supported: S16.
- * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
- /** Initialise the kernel's source, destination and border.
- *
- * @note At least one of output_x or output_y must be set and the corresponding input.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input_x (Optional) Input for X (X output of horizontal pass). Data types supported: S16.
- * @param[in] input_y (Optional) Input for Y (Y output of horizontal pass). Data types supported: S16.
- * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- const ICLTensor *_input_x; /**< X input (X output of the horizontal pass) */
- const ICLTensor *_input_y; /**< Y input (Y output of the horizontal pass) */
- ICLTensor *_output_x; /**< X output of sobel */
- ICLTensor *_output_y; /**< Y output of sobel */
- bool _run_sobel_x; /**< Do we need to run sobel X? */
- bool _run_sobel_y; /**< Do we need to run sobel Y? */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLSOBEL5X5KERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLSobel7x7Kernel.h b/arm_compute/core/CL/kernels/CLSobel7x7Kernel.h
deleted file mode 100644
index 4eda5a40d4..0000000000
--- a/arm_compute/core/CL/kernels/CLSobel7x7Kernel.h
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLSOBEL7X7KERNEL_H
-#define ARM_COMPUTE_CLSOBEL7X7KERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the kernel to run the horizontal pass of 7x7 Sobel filter on a tensor. */
-class CLSobel7x7HorKernel : public ICLKernel
-{
-public:
- /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */
- CLSobel7x7HorKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLSobel7x7HorKernel(const CLSobel7x7HorKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLSobel7x7HorKernel &operator=(const CLSobel7x7HorKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLSobel7x7HorKernel(CLSobel7x7HorKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLSobel7x7HorKernel &operator=(CLSobel7x7HorKernel &&) = default;
- /** Default destructor */
- ~CLSobel7x7HorKernel() = default;
-
- /** Initialise the kernel's source, destination and border.
- *
- * @note At least one of output_x or output_y must be set.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S32.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S32.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
- /** Initialise the kernel's source, destination and border.
- *
- * @note At least one of output_x or output_y must be set.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S32.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S32.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- const ICLTensor *_input; /**< Input tensor */
- ICLTensor *_output_x; /**< X output of horizontal pass */
- ICLTensor *_output_y; /**< Y output of horizontal pass */
- bool _run_sobel_x; /**< Do we need to run Sobel X ? */
- bool _run_sobel_y; /**< Do we need to run Sobel Y ? */
- BorderSize _border_size; /**< Border size */
-};
-
-/** Interface for the kernel to run the vertical pass of 7x7 Sobel filter on a tensor. */
-class CLSobel7x7VertKernel : public ICLKernel
-{
-public:
- /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */
- CLSobel7x7VertKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLSobel7x7VertKernel(const CLSobel7x7VertKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLSobel7x7VertKernel &operator=(const CLSobel7x7VertKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLSobel7x7VertKernel(CLSobel7x7VertKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLSobel7x7VertKernel &operator=(CLSobel7x7VertKernel &&) = default;
- /** Default destructor */
- ~CLSobel7x7VertKernel() = default;
-
- /** Initialise the kernel's source, destination and border.
- *
- * @note At least one of output_x or output_y must be set and the corresponding input.
- *
- * @param[in] input_x (Optional) Input for X (X output of horizontal pass). Data types supported: S32.
- * @param[in] input_y (Optional) Input for Y (Y output of horizontal pass). Data types supported: S32.
- * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S32.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S32.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
- /** Initialise the kernel's source, destination and border.
- *
- * @note At least one of output_x or output_y must be set and the corresponding input.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input_x (Optional) Input for X (X output of horizontal pass). Data types supported: S32.
- * @param[in] input_y (Optional) Input for Y (Y output of horizontal pass). Data types supported: S32.
- * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S32.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S32.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- const ICLTensor *_input_x; /**< X input (X output of the horizontal pass) */
- const ICLTensor *_input_y; /**< Y input (Y output of the horizontal pass) */
- ICLTensor *_output_x; /**< X output of sobel */
- ICLTensor *_output_y; /**< Y output of sobel */
- bool _run_sobel_x; /**< Do we need to run sobel X? */
- bool _run_sobel_y; /**< Do we need to run sobel Y? */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLSOBEL7X7KERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h b/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h
deleted file mode 100644
index b174f493b5..0000000000
--- a/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h
+++ /dev/null
@@ -1,241 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLSOFTMAXLAYERKERNEL_H
-#define ARM_COMPUTE_CLSOFTMAXLAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLSimple3DKernel.h"
-#include "arm_compute/core/KernelDescriptors.h"
-
-#include <tuple>
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the identifying the max value of 1D Logits */
-class CLLogits1DMaxKernel : public ICLSimple3DKernel
-{
-public:
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32
- * @param[out] output Destination tensor. Data types supported: same as @p input
- */
- void configure(const ICLTensor *input, ICLTensor *output);
- /** Set the input and output tensors.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32
- * @param[out] output Destination tensor. Data types supported: same as @p input
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLLogits1DMaxKernel
- *
- * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32
- * @param[in] output Destination tensor. Data types supported: same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-};
-
-/** Interface for shifting, exponentiating and summing the logits */
-class CLLogits1DShiftExpSumKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLLogits1DShiftExpSumKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLLogits1DShiftExpSumKernel(const CLLogits1DShiftExpSumKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLLogits1DShiftExpSumKernel &operator=(const CLLogits1DShiftExpSumKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLLogits1DShiftExpSumKernel(CLLogits1DShiftExpSumKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLLogits1DShiftExpSumKernel &operator=(CLLogits1DShiftExpSumKernel &&) = default;
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32
- * @param[in] max Max values tensor. Data types supported: same as @p input
- * @param[out] output Destination tensor. Data types supported: S32 for QASYMM8 @p input, or same as @p input
- * @param[out] sum Sum of 1D logits tensor. Data types supported: S32 for QASYMM8 @p input, or same as @p input
- * @param[in] beta (Optional) A scaling factor for the exponent. Defaults to 1.0
- */
- void configure(const ICLTensor *input, const ICLTensor *max, ICLTensor *output, ICLTensor *sum, float beta = 1.0f);
- /** Set the input and output tensors.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32
- * @param[in] max Max values tensor. Data types supported: same as @p input
- * @param[out] output Destination tensor. Data types supported: S32 for QASYMM8 @p input, or same as @p input
- * @param[out] sum Sum of 1D logits tensor. Data types supported: S32 for QASYMM8 @p input, or same as @p input
- * @param[in] beta (Optional) A scaling factor for the exponent. Defaults to 1.0
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *max, ICLTensor *output, ICLTensor *sum, float beta = 1.0f);
- /** Static function to check if given info will lead to a valid configuration of @ref CLLogits1DShiftExpSumKernel
- *
- * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32
- * @param[in] max Max values tensor. Data types supported: same as @p input
- * @param[in] output Destination tensor. Data types supported: S32 for QASYMM8 @p input, or same as @p input
- * @param[in] sum Sum of 1D logits tensor. Data types supported: S32 for QASYMM8 @p input, or same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *max, const ITensorInfo *output, const ITensorInfo *sum);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- const ICLTensor *_max;
- ICLTensor *_output;
- ICLTensor *_sum;
-};
-
-/** Interface for max, shifting, exponentiating and summing the logits */
-class CLLogits1DMaxShiftExpSumKernel : public ICLKernel
-{
-public:
- /** Info for whether a parallel reduction will be run and the vector size of the execution. */
- using ParallelReductionInfo = std::tuple<bool, unsigned int>;
-
-public:
- /** Default constructor */
- CLLogits1DMaxShiftExpSumKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLLogits1DMaxShiftExpSumKernel(const CLLogits1DMaxShiftExpSumKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLLogits1DMaxShiftExpSumKernel &operator=(const CLLogits1DMaxShiftExpSumKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLLogits1DMaxShiftExpSumKernel(CLLogits1DMaxShiftExpSumKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLLogits1DMaxShiftExpSumKernel &operator=(CLLogits1DMaxShiftExpSumKernel &&) = default;
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. Data types supported: F16/F32
- * @param[in,out] max Max values tensor. Data types supported: same as @p input
- * @param[out] output Destination tensor. Data types supported: same as @p input
- * @param[out] sum Sum of 1D logits tensor. Data types supported: same as @p input
- * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo.
- */
- void configure(const ICLTensor *input, ICLTensor *max, ICLTensor *output, ICLTensor *sum, const SoftmaxKernelInfo &info);
- /** Set the input and output tensors.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: F16/F32
- * @param[in,out] max Max values tensor. Data types supported: same as @p input
- * @param[out] output Destination tensor. Data types supported: same as @p input
- * @param[out] sum Sum of 1D logits tensor. Data types supported: same as @p input
- * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *max, ICLTensor *output, ICLTensor *sum, const SoftmaxKernelInfo &info);
- /** Static function to check if given info will lead to a valid configuration of @ref CLLogits1DMaxShiftExpSumKernel
- *
- * @param[in] input Source tensor. Data types supported: F16/F32
- * @param[in] max Max values tensor. Data types supported: same as @p input
- * @param[in] output Destination tensor. Data types supported: same as @p input
- * @param[in] sum Sum of 1D logits tensor. Data types supported: same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *max, const ITensorInfo *output, const ITensorInfo *sum);
- /** Checks if the given size is eligible for parallel reduction
- *
- * @note Serial reduction is launched for width < (_grid_size * _serial_vector_size).
- * @note Parallel reduction is launched for width >= (_grid_size * _serial_vector_size) and vector_size is forced to 4.
- *
- * @param[in] size Size to check
- *
- * @return A two-element tuple where the first element is a boolean specifying if a parallel reduction will be run,
- * while the second element is the vector size of the execution.
- */
- static ParallelReductionInfo is_parallel_reduction(size_t size);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_max;
- ICLTensor *_output;
- ICLTensor *_sum;
-
-private:
- static const unsigned int _grid_size;
- static const unsigned int _serial_vector_size;
- static const unsigned int _parallel_vector_size;
-};
-/** Interface for calculating the final step of the Softmax Layer where each logit value is multiplied by the inverse of the sum of the logits. */
-class CLLogits1DNormKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLLogits1DNormKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLLogits1DNormKernel(const CLLogits1DNormKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLLogits1DNormKernel &operator=(const CLLogits1DNormKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLLogits1DNormKernel(CLLogits1DNormKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLLogits1DNormKernel &operator=(CLLogits1DNormKernel &&) = default;
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. Data types supported: S32/F16/F32
- * @param[in] sum Sum tensor. Dimensions should be dim(input)-1. Data types supported: same as @p input
- * @param[out] output Destination tensor. Data types supported: QASYMM8 for S32 @p input, or same as @p input
- * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo.
- */
- void configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, const SoftmaxKernelInfo &info);
- /** Set the input and output tensors.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: S32/F16/F32
- * @param[in] sum Sum tensor. Dimensions should be dim(input)-1. Data types supported: same as @p input
- * @param[out] output Destination tensor. Data types supported: QASYMM8 for S32 @p input, or same as @p input
- * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, const SoftmaxKernelInfo &info);
- /** Static function to check if given info will lead to a valid configuration of @ref CLLogits1DNormKernel
- *
- * @param[in] input Source tensor. Data types supported: S32/F16/F32
- * @param[in] sum Sum tensor. Dimensions should be dim(input)-1. Data types supported: same as @p input
- * @param[in] output Destination tensor. Data types supported: QASYMM8 for S32 @p input, or same as @p input
- * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, const SoftmaxKernelInfo &info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- const ICLTensor *_sum;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLSOFTMAXLAYERKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLSpaceToBatchLayerKernel.h b/arm_compute/core/CL/kernels/CLSpaceToBatchLayerKernel.h
deleted file mode 100644
index 799b7b16c3..0000000000
--- a/arm_compute/core/CL/kernels/CLSpaceToBatchLayerKernel.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLSPACETOBATCHLAYERKERNEL_H
-#define ARM_COMPUTE_CLSPACETOBATCHLAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the space to batch kernel */
-class CLSpaceToBatchLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLSpaceToBatchLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLSpaceToBatchLayerKernel(const CLSpaceToBatchLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLSpaceToBatchLayerKernel &operator=(const CLSpaceToBatchLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLSpaceToBatchLayerKernel(CLSpaceToBatchLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLSpaceToBatchLayerKernel &operator=(CLSpaceToBatchLayerKernel &&) = default;
- /** Default destructor */
- ~CLSpaceToBatchLayerKernel() = default;
- /** Initialise the kernel's inputs and output.
- *
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
- * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32
- * @param[in] paddings 2-D tensor with shape [2, M]. Data types supported: S32
- * @param[out] output Tensor output. Data types supported: same as @p input
- */
- void configure(const ICLTensor *input, const ICLTensor *block_shape, const ICLTensor *paddings, ICLTensor *output);
- /** Initialise the kernel's inputs and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
- * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32
- * @param[in] paddings 2-D tensor with shape [2, M]. Data types supported: S32
- * @param[out] output Tensor output. Data types supported: same as @p input
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *block_shape, const ICLTensor *paddings, ICLTensor *output);
- /** Initialise the kernel's input and output. (Static block shape and paddings)
- *
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
- * @param[in] block_shape_x Block shape x value.
- * @param[in] block_shape_y Block shape y value.
- * @param[in] padding_left The left padding of the output tensor.
- * @param[in] padding_right The right padding of the output tensor.
- * @param[out] output Tensor output. Data types supported: same as @p input
- */
- void configure(const ICLTensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, ICLTensor *output);
- /** Initialise the kernel's input and output. (Static block shape and paddings)
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
- * @param[in] block_shape_x Block shape x value.
- * @param[in] block_shape_y Block shape y value.
- * @param[in] padding_left The left padding of the output tensor.
- * @param[in] padding_right The right padding of the output tensor.
- * @param[out] output Tensor output. Data types supported: same as @p input
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, ICLTensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLSpaceToBatchLayerKernel
- *
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
- * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32
- * @param[in] paddings 2-D tensor with shape [2, M]. Data types supported: S32
- * @param[in] output Tensor output. Data types supported: same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *paddings, const ITensorInfo *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLSpaceToBatchLayerKernel (Static block shape and paddings)
- *
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
- * @param[in] block_shape_x Block shape x value.
- * @param[in] block_shape_y Block shape y value.
- * @param[in] padding_left The left padding of the output tensor.
- * @param[in] padding_right The right padding of the output tensor.
- * @param[in] output Tensor output. Data types supported: same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input; /**< Source tensor */
- const ICLTensor *_block_shape; /**< Block shape tensor */
- const ICLTensor *_paddings; /**< Paddings tensor */
- ICLTensor *_output; /**< Destination tensor */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLSPACETOBATCHLAYERKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLSpaceToDepthLayerKernel.h b/arm_compute/core/CL/kernels/CLSpaceToDepthLayerKernel.h
deleted file mode 100644
index f2371e7d87..0000000000
--- a/arm_compute/core/CL/kernels/CLSpaceToDepthLayerKernel.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Copyright (c) 2019-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLSPACETODEPTHLAYERKERNEL_H
-#define ARM_COMPUTE_CLSPACETODEPTHLAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the space to depth kernel */
-class CLSpaceToDepthLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLSpaceToDepthLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLSpaceToDepthLayerKernel(const CLSpaceToDepthLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLSpaceToDepthLayerKernel &operator=(const CLSpaceToDepthLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLSpaceToDepthLayerKernel(CLSpaceToDepthLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLSpaceToDepthLayerKernel &operator=(CLSpaceToDepthLayerKernel &&) = default;
- /** Default destructor */
- ~CLSpaceToDepthLayerKernel() = default;
- /** Initialise the kernel's inputs and output.
- *
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
- * @param[out] output Tensor output. Data types supported: same as @p input
- * @param[in] block_shape Block shape value.
- */
- void configure(const ICLTensor *input, ICLTensor *output, int32_t block_shape);
- /** Initialise the kernel's inputs and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
- * @param[out] output Tensor output. Data types supported: same as @p input
- * @param[in] block_shape Block shape value.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t block_shape);
- /** Static function to check if given info will lead to a valid configuration of @ref CLSpaceToDepthLayerKernel.
- *
- * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All.
- * @param[in] output Tensor output info. Data types supported: same as @p input
- * @param[in] block_shape Block shape value.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input; /**< Source tensor */
- ICLTensor *_output; /**< Destination tensor */
- int32_t _block_shape; /**< Block shape */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLSPACETODEPTHLAYERKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLStackLayerKernel.h b/arm_compute/core/CL/kernels/CLStackLayerKernel.h
deleted file mode 100644
index e11c0a30d6..0000000000
--- a/arm_compute/core/CL/kernels/CLStackLayerKernel.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_CLSTACKLAYERKERNEL_H
-#define ARM_COMPUTE_CLSTACKLAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to stacks a rank-R tensor into one with rank-(R+1) along the axis dimension.*/
-class CLStackLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLStackLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLStackLayerKernel(const CLStackLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLStackLayerKernel &operator=(const CLStackLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLStackLayerKernel(CLStackLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLStackLayerKernel &operator=(CLStackLayerKernel &&) = default;
- /** Default destructor */
- ~CLStackLayerKernel() = default;
- /** Initialise the kernel's inputs and output
- *
- * @note Supported input tensor rank: up to 4
- *
- * @param[in] input Input tensor. Data types supported: All.
- * @param[in] axis The dimension to stack the tensors along. It must be smaller than the number of input dimensions.
- * @param[in] idx_input Index of the input tensor in the list of tensors to stack.
- * All tensors in the list must have the same shape
- * @param[in] num_tensors Number of tensors to stack
- * @param[out] output Output tensor. Data types supported: Same as @p input.
- *
- */
- void configure(const ICLTensor *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, ICLTensor *output);
- /** Initialise the kernel's inputs and output
- *
- * @note Supported input tensor rank: up to 4
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor. Data types supported: All.
- * @param[in] axis The dimension to stack the tensors along. It must be smaller than the number of input dimensions.
- * @param[in] idx_input Index of the input tensor in the list of tensors to stack.
- * All tensors in the list must have the same shape
- * @param[in] num_tensors Number of tensors to stack
- * @param[out] output Output tensor. Data types supported: Same as @p input.
- *
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, ICLTensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLStackLayerKernel
- *
- * @note Supported input tensor rank: up to 4
- *
- * @param[in] input Input tensor info. Data types supported: All.
- * @param[in] axis The dimension to stack the tensors along. It must be smaller than the number of input dimensions.
- * @param[in] idx_input Index of the input tensor in the list of tensors to stack
- * All tensors in the list must have the same shape
- * @param[in] num_tensors Number of tensors to stack
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLSTACKLAYERKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLStridedSliceKernel.h b/arm_compute/core/CL/kernels/CLStridedSliceKernel.h
deleted file mode 100644
index ebe1b38878..0000000000
--- a/arm_compute/core/CL/kernels/CLStridedSliceKernel.h
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_STRIDED_SLICE_KERNEL_H
-#define ARM_COMPUTE_CL_STRIDED_SLICE_KERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-// Forward declarations
-class ICLTensor;
-
-/** Interface for the kernel to perform tensor strided slicing */
-class CLStridedSliceKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLStridedSliceKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLStridedSliceKernel(const CLStridedSliceKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLStridedSliceKernel &operator=(const CLStridedSliceKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLStridedSliceKernel(CLStridedSliceKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLStridedSliceKernel &operator=(CLStridedSliceKernel &&) = default;
- /** Default destructor */
- ~CLStridedSliceKernel() = default;
- /** Configure kernel
- *
- * @note Supported tensor rank: up to 4
- *
- * @param[in] input Source tensor. Data type supported: All.
- * @param[out] output Destination tensor. Data type supported: Same as @p input
- * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
- * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
- * @param[in] strides The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input).
- * @param[in] begin_mask If the ith bit of begin_mask is set, starts[i] is ignored and the fullest possible range in that dimension is used instead.
- * @param[in] end_mask If the ith bit of end_mask is set, ends[i] is ignored and the fullest possible range in that dimension is used instead.
- * @param[in] shrink_axis_mask If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1.
- * A slice of size 1 starting from starts[i] in the dimension must be preserved.
- */
- void configure(const ICLTensor *input, ICLTensor *output,
- const Coordinates &starts, const Coordinates &ends, const BiStrides &strides,
- int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask);
- /** Configure kernel
- *
- * @note Supported tensor rank: up to 4
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data type supported: All.
- * @param[out] output Destination tensor. Data type supported: Same as @p input
- * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
- * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
- * @param[in] strides The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input).
- * @param[in] begin_mask If the ith bit of begin_mask is set, starts[i] is ignored and the fullest possible range in that dimension is used instead.
- * @param[in] end_mask If the ith bit of end_mask is set, ends[i] is ignored and the fullest possible range in that dimension is used instead.
- * @param[in] shrink_axis_mask If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1.
- * A slice of size 1 starting from starts[i] in the dimension must be preserved.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output,
- const Coordinates &starts, const Coordinates &ends, const BiStrides &strides,
- int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask);
-
- /** Static function to check if given info will lead to a valid configuration of @ref CLStridedSliceKernel
- *
- * @note Supported tensor rank: up to 4
- *
- * @param[in] input Source tensor. Data type supported: All.
- * @param[in] output Destination tensor. Data type supported: Same as @p input
- * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
- * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
- * @param[in] strides The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input).
- * @param[in] begin_mask If the ith bit of begin_mask is set, starts[i] is ignored and the fullest possible range in that dimension is used instead.
- * @param[in] end_mask If the ith bit of end_mask is set, ends[i] is ignored and the fullest possible range in that dimension is used instead.
- * @param[in] shrink_axis_mask If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1.
- * A slice of size 1 starting from starts[i] in the dimension must be preserved.
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output,
- const Coordinates &starts, const Coordinates &ends, const BiStrides &strides,
- int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input; /**< Source tensor */
- ICLTensor *_output; /**< Destination tensor */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CL_STRIDED_SLICE_KERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLTableLookupKernel.h b/arm_compute/core/CL/kernels/CLTableLookupKernel.h
deleted file mode 100644
index 24e333f164..0000000000
--- a/arm_compute/core/CL/kernels/CLTableLookupKernel.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLTABLELOOKUPKERNEL_H
-#define ARM_COMPUTE_CLTABLELOOKUPKERNEL_H
-
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-class ICLLut;
-
-/** Interface for the kernel to perform table lookup calculations. */
-class CLTableLookupKernel : public ICLSimple2DKernel
-{
-public:
- /** Initialise the kernel's input, lut and output.
- *
- * @param[in] input An input tensor. Data types supported: U8, S16.
- * @param[in] lut The input LUT. Data types supported: U8, S16.
- * @param[out] output The output tensor. Data types supported: U8, S16.
- */
- void configure(const ICLTensor *input, const ICLLut *lut, ICLTensor *output);
- /** Initialise the kernel's input, lut and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input An input tensor. Data types supported: U8, S16.
- * @param[in] lut The input LUT. Data types supported: U8, S16.
- * @param[out] output The output tensor. Data types supported: U8, S16.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLLut *lut, ICLTensor *output);
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLTABLELOOKUPKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLThresholdKernel.h b/arm_compute/core/CL/kernels/CLThresholdKernel.h
deleted file mode 100644
index 3db48706a3..0000000000
--- a/arm_compute/core/CL/kernels/CLThresholdKernel.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLTHRESHOLDKERNEL_H
-#define ARM_COMPUTE_CLTHRESHOLDKERNEL_H
-
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
-#include "arm_compute/core/Types.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the thresholding kernel.
- *
- */
-class CLThresholdKernel : public ICLSimple2DKernel
-{
-public:
- /**Initialise the kernel's input, output and threshold parameters.
- *
- * @param[in] input An input tensor. Data types supported: U8
- * @param[out] output The output tensor. Data types supported: U8.
- * @param[in] threshold Threshold. When the threshold type is RANGE, this is used as the lower threshold.
- * @param[in] false_value value to set when the condition is not respected.
- * @param[in] true_value value to set when the condition is respected.
- * @param[in] type Thresholding type. Either RANGE or BINARY.
- * @param[in] upper Upper threshold. Only used when the thresholding type is RANGE.
- */
- void configure(const ICLTensor *input, ICLTensor *output, uint8_t threshold,
- uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper);
- /**Initialise the kernel's input, output and threshold parameters.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input An input tensor. Data types supported: U8
- * @param[out] output The output tensor. Data types supported: U8.
- * @param[in] threshold Threshold. When the threshold type is RANGE, this is used as the lower threshold.
- * @param[in] false_value value to set when the condition is not respected.
- * @param[in] true_value value to set when the condition is respected.
- * @param[in] type Thresholding type. Either RANGE or BINARY.
- * @param[in] upper Upper threshold. Only used when the thresholding type is RANGE.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, uint8_t threshold,
- uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper);
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NETHRESHOLDKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLTileKernel.h b/arm_compute/core/CL/kernels/CLTileKernel.h
deleted file mode 100644
index 68f3c929a6..0000000000
--- a/arm_compute/core/CL/kernels/CLTileKernel.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLTILEKERNEL_H
-#define ARM_COMPUTE_CLTILEKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to perform a Tile operation */
-class CLTileKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLTileKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLTileKernel(const CLTileKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLTileKernel &operator=(const CLTileKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLTileKernel(CLTileKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLTileKernel &operator=(CLTileKernel &&) = default;
- /** Default destructor */
- ~CLTileKernel() = default;
- /** Set the source, destination of the kernel
- *
- * @param[in] input Source tensor. Data type supported: All.
- * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension.
- * Cannot have more than 4 elements (tiling in dimensions greater than 4 is not supported).
- * @param[out] output Destination tensor. Same as @p input
- *
- */
- void configure(const ICLTensor *input, ICLTensor *output, const Multiples &multiples);
- /** Set the source, destination of the kernel
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data type supported: All.
- * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension.
- * Cannot have more than 4 elements (tiling in dimensions greater than 4 is not supported).
- * @param[out] output Destination tensor. Same as @p input
- *
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Multiples &multiples);
- /** Static function to check if given info will lead to a valid configuration of @ref CLTileKernel
- *
- * @param[in] input Source tensor info. Data type supported: All.
- * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension.
- * Cannot have more than 4 elements (tiling in dimensions greater than 4 is not supported).
- * @param[in] output Destination tensor info. Same as @p input
- *
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Multiples &multiples);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLTILEKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLTransposeKernel.h b/arm_compute/core/CL/kernels/CLTransposeKernel.h
deleted file mode 100644
index 09c9e3babf..0000000000
--- a/arm_compute/core/CL/kernels/CLTransposeKernel.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLTRANSPOSEKERNEL_H
-#define ARM_COMPUTE_CLTRANSPOSEKERNEL_H
-
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel which transposes the elements of a matrix.
- *
- * [width, height, batch] -> [height, width, batch]
- *
- */
-class CLTransposeKernel : public ICLSimple2DKernel
-{
-public:
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor. Data types supported: All.
- * @param[out] output Output tensor. Data type supported: Same as @p input
- */
- void configure(const ICLTensor *input, ICLTensor *output);
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor. Data types supported: All.
- * @param[out] output Output tensor. Data type supported: Same as @p input
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLTransposeKernel
- *
- * @param[in] input Input tensor. Data types supported: All.
- * @param[in] output Output tensor. Data type supported: Same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLTRANSPOSEKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h b/arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h
deleted file mode 100644
index e6b4209501..0000000000
--- a/arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLUPSAMPLELAYERKERNEL_H
-#define ARM_COMPUTE_CLUPSAMPLELAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the UpsampleLayer kernel on OpenCL. */
-class CLUpsampleLayerKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLUpsampleLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLUpsampleLayerKernel(const CLUpsampleLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLUpsampleLayerKernel &operator=(const CLUpsampleLayerKernel &) = delete;
- /** Default Move Constructor. */
- CLUpsampleLayerKernel(CLUpsampleLayerKernel &&) = default;
- /** Default move assignment operator */
- CLUpsampleLayerKernel &operator=(CLUpsampleLayerKernel &&) = default;
- /** Default destructor */
- ~CLUpsampleLayerKernel() = default;
-
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[out] output Destination tensor. Data types supported: same as @p input.
- * @param[in] info Contains stride information described in @ref Size2D.
- * @param[in] upsampling_policy Defines the policy to fill the intermediate pixels.
- */
- void configure(const ICLTensor *input, ICLTensor *output, const Size2D &info, const InterpolationPolicy upsampling_policy);
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[out] output Destination tensor. Data types supported: same as @p input.
- * @param[in] info Contains stride information described in @ref Size2D.
- * @param[in] upsampling_policy Defines the policy to fill the intermediate pixels.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Size2D &info, const InterpolationPolicy upsampling_policy);
- /** Static function to check if given info will lead to a valid configuration of @ref CLUpsampleLayerKernel
- *
- * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] output Destination tensor info. Data types supported: same as @p input.
- * @param[in] info Contains stride information described in @ref Size2D.
- * @param[in] upsampling_policy Defines the policy to fill the intermediate pixels.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &info, const InterpolationPolicy upsampling_policy);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
- Size2D _info;
- DataLayout _data_layout;
- unsigned int _num_elems_processed_per_iteration_input_x;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLUPSAMPLELAYERKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLWarpAffineKernel.h b/arm_compute/core/CL/kernels/CLWarpAffineKernel.h
deleted file mode 100644
index a21325e1c4..0000000000
--- a/arm_compute/core/CL/kernels/CLWarpAffineKernel.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLWARPAFFINEKERNEL_H
-#define ARM_COMPUTE_CLWARPAFFINEKERNEL_H
-
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the warp affine kernel.*/
-class CLWarpAffineKernel : public ICLSimple2DKernel
-{
-public:
- /** Initialize the function's source, destination, interpolation policy and border_mode.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor, Data types supported: U8.
- * @param[in] matrix The perspective matrix. Must be 2x3 of type float
- * The matrix argument requires 9 values, the last 3 values are ignored.
- * @param[in] policy The interpolation type.
- */
- void configure(const ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy);
- /** Initialize the function's source, destination, interpolation policy and border_mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor, Data types supported: U8.
- * @param[in] matrix The perspective matrix. Must be 2x3 of type float
- * The matrix argument requires 9 values, the last 3 values are ignored.
- * @param[in] policy The interpolation type.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy);
-
- // Inherited methods overridden:
- BorderSize border_size() const override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLWARPAFFINEKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h b/arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h
deleted file mode 100644
index bb1a018a2b..0000000000
--- a/arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLWARPERSPECTIVEKERNEL_H
-#define ARM_COMPUTE_CLWARPERSPECTIVEKERNEL_H
-
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-/** Interface for the warp perspective kernel.*/
-class CLWarpPerspectiveKernel : public ICLSimple2DKernel
-{
-public:
- /** Initialize the function's source, destination, interpolation policy and border_mode.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor, Data types supported: U8.
- * @param[in] matrix The perspective matrix. Must be 3x3 of type float.
- * @param[in] policy The interpolation type.
- */
- void configure(const ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy);
- /** Initialize the function's source, destination, interpolation policy and border_mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor, Data types supported: U8.
- * @param[in] matrix The perspective matrix. Must be 3x3 of type float.
- * @param[in] policy The interpolation type.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy);
-
- // Inherited methods overridden:
- BorderSize border_size() const override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLWARPERSPECTIVEKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h b/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h
deleted file mode 100644
index 47e987b09b..0000000000
--- a/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLWEIGHTSRESHAPEKERNEL_H
-#define ARM_COMPUTE_CLWEIGHTSRESHAPEKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-/** OpenCL kernel to perform reshaping on the weights used by convolution and locally connected layer
- *
- * Rearranges each 3-dimensional kernel to a single row leading to a matrix with linearized kernels.
- * In combination with the @ref CLIm2ColKernel can transform a convolution to a matrix multiplication.
- *
- * For example assuming a 3D weight kernel of 3x3 dimensions and depth of 2 we have:
- * @f[
- * \left( \begin{array}{ccc}
- * a000 & a001 & a002 \\
- * a010 & a011 & a012 \\
- * a020 & a021 & a022 \\
- * \end{array} \right)
- * \left( \begin{array}{ccc}
- * a100 & a101 & a102 \\
- * a110 & a111 & a112 \\
- * a120 & a121 & a122 \\
- * \end{array} \right)
- * \rightarrow
- * \left( \begin{array}{ccccccccc}
- * a000 & a001 & a002 & a010 & a011 & a012 & a020 & a021 & a022 & a100 & a101 & a102 & a110 & a111 & a112 & a120 & a121 & a122 \\
- * \end{array} \right)
- * @f]
- */
-class CLWeightsReshapeKernel : public ICLKernel
-{
-public:
- /** Constructor.*/
- CLWeightsReshapeKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLWeightsReshapeKernel(const CLWeightsReshapeKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLWeightsReshapeKernel &operator=(const CLWeightsReshapeKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLWeightsReshapeKernel(CLWeightsReshapeKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLWeightsReshapeKernel &operator=(CLWeightsReshapeKernel &&) = default;
- /** Default destructor */
- ~CLWeightsReshapeKernel() = default;
- /** Set the input and output of the kernel.
- *
- * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared,
- * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. Data types supported: All
- * @param[in] biases The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with
- * dimensions [OFM, num_patches] if unshared. Data types supported: F16/F32, for quantized types this must be nullptr.
- * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types.
- * @param[out] output The output tensor. Should be a 2D Tensor if there are no groups and the weights are not shared; a 3D Tensor otherwise.
- * Data types supported: Same as @p input
- * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout
- * Number of groups greater than one are only supported for NCHW data layout, and the number of weights must be a multiple of it.
- */
- void configure(const ICLTensor *input, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups = 1);
- /** Set the input and output of the kernel.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared,
- * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. Data types supported: All
- * @param[in] biases The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with
- * dimensions [OFM, num_patches] if unshared. Data types supported: F16/F32, for quantized types this must be nullptr.
- * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types.
- * @param[out] output The output tensor. Should be a 2D Tensor if there are no groups and the weights are not shared; a 3D Tensor otherwise.
- * Data types supported: Same as @p input
- * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout
- * Number of groups greater than one are only supported for NCHW data layout, and the number of weights must be a multiple of it.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups = 1);
- /** Static function to check if given info will lead to a valid configuration of @ref CLWeightsReshapeKernel
- *
- * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared,
- * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. Data types supported: All
- * @param[in] biases The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with
- * dimensions [OFM, num_patches] if unshared. Data types supported: F16/F32, for quantized types this must be nullptr.
- * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types.
- * @param[in] output The output tensor. Should be a 2D Tensor if there are no groups and the weights are not shared; a 3D Tensor otherwise.
- * Data types supported: Same as @p input
- * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout
- * Number of groups greater than one are only supported for NCHW data layout, and the number of weights must be a multiple of it.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output, unsigned int num_groups = 1);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- const ICLTensor *_biases;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLWEIGHTSRESHAPEKERNEL_H */ \ No newline at end of file
diff --git a/arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h b/arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h
deleted file mode 100644
index a39ccc2869..0000000000
--- a/arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_CLWIDTHCONCATENATE_2TENSORS_KERNEL_H
-#define ARM_COMPUTE_CLWIDTHCONCATENATE_2TENSORS_KERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the width concatenate kernel of 2 tensors.
- * The input1 and input2 tensors will be concatenated into the output tensor.
- */
-class CLWidthConcatenate2TensorsKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLWidthConcatenate2TensorsKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLWidthConcatenate2TensorsKernel(const CLWidthConcatenate2TensorsKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLWidthConcatenate2TensorsKernel &operator=(const CLWidthConcatenate2TensorsKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLWidthConcatenate2TensorsKernel(CLWidthConcatenate2TensorsKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLWidthConcatenate2TensorsKernel &operator=(CLWidthConcatenate2TensorsKernel &&) = default;
- /** Default destructor */
- ~CLWidthConcatenate2TensorsKernel() = default;
- /** Initialise the kernel's input1s and output
- *
- * @param[in] input1 First input tensor. Data types supported: All.
- * @param[in] input2 Second input tensor. Data types supported: same as @p input1
- * @param[out] output Output tensor. Data types supported: Same as @p input1.
- */
- void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
- /** Initialise the kernel's input1s and output
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input1 First input tensor. Data types supported: All.
- * @param[in] input2 Second input tensor. Data types supported: same as @p input1
- * @param[out] output Output tensor. Data types supported: Same as @p input1.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenate2TensorsKernel
- *
- * @param[in] input1 First tensor info. Data types supported: All.
- * @param[in] input2 Second tensor info. Data types supported: same as @p input1
- * @param[in] output Output tensor info. Data types supported: Same as @p input1.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input1;
- const ICLTensor *_input2;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLWIDTHCONCATENATE_2TENSORS_KERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h b/arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h
deleted file mode 100644
index 0e0eae6e85..0000000000
--- a/arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_CLWIDTHCONCATENATE_4TENSORS_KERNEL_H
-#define ARM_COMPUTE_CLWIDTHCONCATENATE_4TENSORS_KERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the width concatenate kernel of 4 tensors.
- * All input tensors will be concatenated into the output tensor.
- */
-class CLWidthConcatenate4TensorsKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLWidthConcatenate4TensorsKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLWidthConcatenate4TensorsKernel(const CLWidthConcatenate4TensorsKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLWidthConcatenate4TensorsKernel &operator=(const CLWidthConcatenate4TensorsKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLWidthConcatenate4TensorsKernel(CLWidthConcatenate4TensorsKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLWidthConcatenate4TensorsKernel &operator=(CLWidthConcatenate4TensorsKernel &&) = default;
- /** Default destructor */
- ~CLWidthConcatenate4TensorsKernel() = default;
- /** Initialise the kernel's input1s and output
- *
- * @param[in] input1 First input tensor. Data types supported: All.
- * @param[in] input2 Second input tensor. Data types supported: same as @p input1
- * @param[in] input3 Third input tensor. Data types supported: same as @p input1
- * @param[in] input4 Fourth input tensor. Data types supported: same as @p input1
- * @param[out] output Output tensor. Data types supported: Same as @p input1.
- */
- void configure(const ICLTensor *input1, const ICLTensor *input2, const ICLTensor *input3, const ICLTensor *input4, ICLTensor *output);
- /** Initialise the kernel's input1s and output
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input1 First input tensor. Data types supported: All.
- * @param[in] input2 Second input tensor. Data types supported: same as @p input1
- * @param[in] input3 Third input tensor. Data types supported: same as @p input1
- * @param[in] input4 Fourth input tensor. Data types supported: same as @p input1
- * @param[out] output Output tensor. Data types supported: Same as @p input1.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, const ICLTensor *input3, const ICLTensor *input4, ICLTensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenate4TensorsKernel
- *
- * @param[in] input1 First tensor info. Data types supported: All.
- * @param[in] input2 Second tensor info. Data types supported: same as @p input1
- * @param[in] input3 Third tensor info. Data types supported: same as @p input1
- * @param[in] input4 Fourth tensor info. Data types supported: same as @p input1
- * @param[in] output Output tensor info. Data types supported: Same as @p input1.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *input3, const ITensorInfo *input4, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input1;
- const ICLTensor *_input2;
- const ICLTensor *_input3;
- const ICLTensor *_input4;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLWIDTHCONCATENATE_4TENSORS_KERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h b/arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h
deleted file mode 100644
index ef5851fa9a..0000000000
--- a/arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_CLWIDTHCONCATENATELAYERKERNEL_H
-#define ARM_COMPUTE_CLWIDTHCONCATENATELAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the width concatenate kernel.
- * The input tensor will be concatenated into the output tensor.
- */
-class CLWidthConcatenateLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLWidthConcatenateLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLWidthConcatenateLayerKernel(const CLWidthConcatenateLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLWidthConcatenateLayerKernel &operator=(const CLWidthConcatenateLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLWidthConcatenateLayerKernel(CLWidthConcatenateLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLWidthConcatenateLayerKernel &operator=(CLWidthConcatenateLayerKernel &&) = default;
- /** Default destructor */
- ~CLWidthConcatenateLayerKernel() = default;
- /** Initialise the kernel's inputs and output
- *
- * @param[in] input Input tensor. Data types supported: All.
- * @param[in] width_offset The offset on the X axis.
- * @param[in,out] output Output tensor. Data types supported: Same as @p input.
- *
- */
- void configure(const ICLTensor *input, unsigned int width_offset, ICLTensor *output);
- /** Initialise the kernel's inputs and output
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor. Data types supported: All.
- * @param[in] width_offset The offset on the X axis.
- * @param[in,out] output Output tensor. Data types supported: Same as @p input.
- *
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, unsigned int width_offset, ICLTensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenateLayerKernel
- *
- * @param[in] input Input tensor info. Data types supported: All.
- * @param[in] width_offset The offset on the X axis.
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, unsigned int width_offset, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
- unsigned int _width_offset;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLWIDTHCONCATENATELAYERKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h b/arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h
deleted file mode 100644
index 5b2dc8cfc9..0000000000
--- a/arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLWINOGRADFILTERTRANSFORMKERNEL_H
-#define ARM_COMPUTE_CLWINOGRADFILTERTRANSFORMKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the Winograd filter transform kernel. */
-class CLWinogradFilterTransformKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLWinogradFilterTransformKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLWinogradFilterTransformKernel(const CLWinogradFilterTransformKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLWinogradFilterTransformKernel &operator=(const CLWinogradFilterTransformKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLWinogradFilterTransformKernel(CLWinogradFilterTransformKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLWinogradFilterTransformKernel &operator=(CLWinogradFilterTransformKernel &&) = default;
- /** Default destructor */
- ~CLWinogradFilterTransformKernel() = default;
- /** Set the input and output tensor.
- *
- * @note Winograd filter transform supports the following configurations for NCWH data layout
- * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3),
- * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
- * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
- *
- * @note Winograd filter transform supports the following configurations for NHWC data layout
- * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
- * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
- *
- * Strides: only unit strides
- *
- * @param[in] input Source tensor. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout) or [IFM, kernel_x, kernel_y, OFM] (NHWC data layout). Data types supported: F16/F32.
- * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_filter_transform_shape. Data types supported: Same as @p input
- * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
- */
- void configure(const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info);
- /** Set the input and output tensor.
- *
- * @note Winograd filter transform supports the following configurations for NCWH data layout
- * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3),
- * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
- * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
- *
- * @note Winograd filter transform supports the following configurations for NHWC data layout
- * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
- * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
- *
- * Strides: only unit strides
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout) or [IFM, kernel_x, kernel_y, OFM] (NHWC data layout). Data types supported: F16/F32.
- * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_filter_transform_shape. Data types supported: Same as @p input
- * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info);
- /** Static function to check if given info will lead to a valid configuration of @ref CLWinogradFilterTransformKernel
- *
- * @note Winograd filter transform supports the following configurations for NCWH data layout
- * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3),
- * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
- * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
- *
- * @note Winograd filter transform supports the following configurations for NHWC data layout
- * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
- * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
- *
- * Strides: only unit strides
- *
- * @param[in] input Source tensor. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout) or [IFM, kernel_x, kernel_y, OFM] (NHWC data layout). Data types supported: F16/F32.
- * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_filter_transform_shape. Data types supported: Same as @p input
- * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLWINOGRADFILTERTRANSFORMKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h b/arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h
deleted file mode 100644
index a305126f2d..0000000000
--- a/arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLWINOGRADINPUTTRANSFORMKERNEL_H
-#define ARM_COMPUTE_CLWINOGRADINPUTTRANSFORMKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to perform Winograd input transform.*/
-class CLWinogradInputTransformKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLWinogradInputTransformKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLWinogradInputTransformKernel(const CLWinogradInputTransformKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLWinogradInputTransformKernel &operator=(const CLWinogradInputTransformKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLWinogradInputTransformKernel(CLWinogradInputTransformKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLWinogradInputTransformKernel &operator=(CLWinogradInputTransformKernel &&) = default;
- /** Set the input and output of the kernel.
- *
- * @note Winograd input transform supports the following configurations for NCWH data layout
- * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3),
- * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
- * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
- *
- * @note Winograd input transform supports the following configurations for NHWC data layout
- * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
- * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
- *
- * Strides: only unit strides
- *
- * @param[in] input The input tensor to transform. Data types supported: F16/F32
- * @param[in] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_input_transform_shape. Data types supported: Same as @p input
- * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo.
- */
- void configure(const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info);
- /** Set the input and output of the kernel.
- *
- * @note Winograd input transform supports the following configurations for NCWH data layout
- * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3),
- * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
- * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
- *
- * @note Winograd input transform supports the following configurations for NHWC data layout
- * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
- * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
- *
- * Strides: only unit strides
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input The input tensor to transform. Data types supported: F16/F32
- * @param[in] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_input_transform_shape. Data types supported: Same as @p input
- * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info);
- /** Static function to check if given info will lead to a valid configuration of @ref CLWinogradInputTransformKernel
- *
- * @note Winograd input transform supports the following configurations for NCWH data layout
- * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3),
- * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
- * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
- *
- * @note Winograd input transform supports the following configurations for NHWC data layout
- * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
- * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
- *
- * Strides: only unit strides
- *
- * @param[in] input The input tensor to transform. Data types supported: F16/F32
- * @param[in] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_input_transform_shape. Data types supported: Same as @p input
- * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- using WinogradKey = std::pair<std::pair<int, int>, std::pair<int, int>>;
-
- BorderSize _border_size;
- const ICLTensor *_input;
- ICLTensor *_output;
- DataLayout _data_layout;
- int _num_tiles_x;
- int _num_tiles_y;
- unsigned int _step_z;
-};
-} // arm_compute
-#endif /*ARM_COMPUTE_CLWINOGRADINPUTTRANSFORMKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h b/arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h
deleted file mode 100644
index 512b352637..0000000000
--- a/arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLWINOGRADOUTPUTTRANSFORMKERNEL_H
-#define ARM_COMPUTE_CLWINOGRADOUTPUTTRANSFORMKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the Winograd output transform kernel. */
-class CLWinogradOutputTransformKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLWinogradOutputTransformKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLWinogradOutputTransformKernel(const CLWinogradOutputTransformKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLWinogradOutputTransformKernel &operator=(const CLWinogradOutputTransformKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLWinogradOutputTransformKernel(CLWinogradOutputTransformKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLWinogradOutputTransformKernel &operator=(CLWinogradOutputTransformKernel &&) = default;
- /** Default destructor */
- ~CLWinogradOutputTransformKernel() = default;
- /** Set the input and output tensor.
- *
- * @note Winograd output transform supports the following configurations for NCWH data layout
- * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3),
- * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
- * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
- *
- * @note Winograd output transform supports the following configurations for NHWC data layout
- * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
- * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
- *
- * Strides: only unit strides
- *
- * @param[in] input Source tensor with shape [C, N, K, batches]. Data types supported: F16/F32.
- * @param[in] bias Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input
- * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_output_transform_shape. Data types supported: Same as @p input
- * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- */
- void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const WinogradInfo &winograd_info, const ActivationLayerInfo &act_info = ActivationLayerInfo());
- /** Set the input and output tensor.
- *
- * @note Winograd output transform supports the following configurations for NCWH data layout
- * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3),
- * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
- * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
- *
- * @note Winograd output transform supports the following configurations for NHWC data layout
- * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
- * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
- *
- * Strides: only unit strides
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor with shape [C, N, K, batches]. Data types supported: F16/F32.
- * @param[in] bias Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input
- * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_output_transform_shape. Data types supported: Same as @p input
- * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const WinogradInfo &winograd_info,
- const ActivationLayerInfo &act_info = ActivationLayerInfo());
-
- /** Static function to check if given info will lead to a valid configuration of @ref CLWinogradOutputTransformKernel
- *
- * @note Winograd output transform supports the following configurations for NCWH data layout
- * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3),
- * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
- * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
- *
- * @note Winograd output transform supports the following configurations for NHWC data layout
- * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
- * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
- *
- * Strides: only unit strides
- *
- * @param[in] input Source tensor with shape [C, N, K, batches]. Data types supported: F16/F32.
- * @param[in] bias Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input
- * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_output_transform_shape. Data types supported: Same as @p input
- * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation @ref ActivationLayerInfo. Only RELU, BOUNDED_RELU, LU_BOUNDED_RELU, LEAKY_RELU and SOFT_RELU supported.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const WinogradInfo &winograd_info, const ActivationLayerInfo &act_info = ActivationLayerInfo());
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- using WinogradKey = std::pair<std::pair<int, int>, std::pair<int, int>>;
-
- const ICLTensor *_input;
- const ICLTensor *_bias;
- ICLTensor *_output;
- bool _is_nhwc;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLWINOGRADOUTPUTTRANSFORMKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLYOLOLayerKernel.h b/arm_compute/core/CL/kernels/CLYOLOLayerKernel.h
deleted file mode 100644
index d0c4a9e417..0000000000
--- a/arm_compute/core/CL/kernels/CLYOLOLayerKernel.h
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLYOLOLAYERKERNEL_H
-#define ARM_COMPUTE_CLYOLOLAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the YOLO layer kernel that performs partial activation.
- * For each box, activate only:
- * - x and y position (channel 0 and 1 of each box)
- * - objectiveness (channel 4 of each box)
- * - classes (channel 5 to (classes - 5) of each box)
- */
-class CLYOLOLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLYOLOLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLYOLOLayerKernel(const CLYOLOLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLYOLOLayerKernel &operator=(const CLYOLOLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLYOLOLayerKernel(CLYOLOLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLYOLOLayerKernel &operator=(CLYOLOLayerKernel &&) = default;
- /** Default destructor */
- ~CLYOLOLayerKernel() = default;
- /** Set the input and output tensor.
- *
- * @note If the output tensor is a nullptr, the activation function will be performed in-place
- *
- * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
- * of the activation function. Data types supported: F16/F32.
- * @param[out] output Destination tensor. Data type supported: same as @p input
- * @param[in] act_info Activation layer information.
- * @param[in] num_classes Number of classes to activate (must be submultiple of @p input channels)
- */
- void configure(ICLTensor *input, ICLTensor *output, const ActivationLayerInfo &act_info, int32_t num_classes);
- /** Set the input and output tensor.
- *
- * @note If the output tensor is a nullptr, the activation function will be performed in-place
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
- * of the activation function. Data types supported: F16/F32.
- * @param[out] output Destination tensor. Data type supported: same as @p input
- * @param[in] act_info Activation layer information.
- * @param[in] num_classes Number of classes to activate (must be submultiple of @p input channels)
- */
- void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const ActivationLayerInfo &act_info, int32_t num_classes);
- /** Static function to check if given info will lead to a valid configuration of @ref CLYOLOLayerKernel
- *
- * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result
- * of the activation function. Data types supported: F16/F32.
- * @param[in] output Destination tensor info. Data type supported: same as @p input
- * @param[in] act_info Activation layer information.
- * @param[in] num_classes Number of classes to activate (must be submultiple of @p input channels)
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info, int32_t num_classes);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- ICLTensor *_input;
- ICLTensor *_output;
- bool _run_in_place;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLYOLOLAYERKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h b/arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h
deleted file mode 100644
index f0f7754960..0000000000
--- a/arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_ICLDEPTHWISECONVOLUTIONKERNEL3x3_H
-#define ARM_COMPUTE_ICLDEPTHWISECONVOLUTIONKERNEL3x3_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the kernel to run a 3x3 depthwise convolution on a tensor.
- */
-class ICLDepthwiseConvolutionLayer3x3Kernel : public ICLKernel
-{
-public:
- /** Default constructor */
- ICLDepthwiseConvolutionLayer3x3Kernel()
- : _border_size(0), _input(), _output(), _weights(), _biases(), _conv_stride_y(1), _output_multipliers(), _output_shifts(), _is_quantized(false)
- {
- }
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- ICLDepthwiseConvolutionLayer3x3Kernel(const ICLDepthwiseConvolutionLayer3x3Kernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- ICLDepthwiseConvolutionLayer3x3Kernel &operator=(const ICLDepthwiseConvolutionLayer3x3Kernel &) = delete;
- /** Default Move Constructor. */
- ICLDepthwiseConvolutionLayer3x3Kernel(ICLDepthwiseConvolutionLayer3x3Kernel &&) = default;
- /** Default move assignment operator */
- ICLDepthwiseConvolutionLayer3x3Kernel &operator=(ICLDepthwiseConvolutionLayer3x3Kernel &&) = default;
- /** Initialize the function's source, destination, conv and border_size.
- *
- * @param[in] input Source tensor. DataType supported: QASYMM8/F16/F32.
- * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM].
- * Data type supported: Same as @p input, QASYMM8/QSYMM8_PER_CHANNEL when input is QASYMM8.
- * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input, S32 when input is QASYMM8.
- * @param[out] output Destination tensor. Data type supported: Same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported for QASYMM8.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization,
- * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
- * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization,
- * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
- */
- virtual void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U),
- const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) = 0;
- /** Initialize the function's source, destination, conv and border_size.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. DataType supported: QASYMM8/F16/F32.
- * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM].
- * Data type supported: Same as @p input, QASYMM8/QSYMM8_PER_CHANNEL when input is QASYMM8.
- * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input, S32 when input is QASYMM8.
- * @param[out] output Destination tensor. Data type supported: Same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported for QASYMM8.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization,
- * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
- * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization,
- * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
- */
- virtual void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U),
- const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) = 0;
-
-protected:
- BorderSize _border_size;
- const ICLTensor *_input;
- ICLTensor *_output;
- const ICLTensor *_weights;
- const ICLTensor *_biases;
- unsigned int _conv_stride_y;
- const ICLTensor *_output_multipliers;
- const ICLTensor *_output_shifts;
- bool _is_quantized;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_ICLDEPTHWISECONVOLUTIONKERNEL3x3_H */
diff --git a/arm_compute/core/CPP/CPPKernels.h b/arm_compute/core/CPP/CPPKernels.h
index c7b40baf22..f6f36596c4 100644
--- a/arm_compute/core/CPP/CPPKernels.h
+++ b/arm_compute/core/CPP/CPPKernels.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 ARM Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,11 +26,8 @@
/* Header regrouping all the CPP kernels */
#include "arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h"
-#include "arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h"
-#include "arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h"
#include "arm_compute/core/CPP/kernels/CPPNonMaximumSuppressionKernel.h"
#include "arm_compute/core/CPP/kernels/CPPPermuteKernel.h"
-#include "arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h"
#include "arm_compute/core/CPP/kernels/CPPTopKVKernel.h"
#include "arm_compute/core/CPP/kernels/CPPUpsampleKernel.h"
diff --git a/arm_compute/core/CPP/CPPTypes.h b/arm_compute/core/CPP/CPPTypes.h
index d3f6fc944d..e5322bdcb1 100644
--- a/arm_compute/core/CPP/CPPTypes.h
+++ b/arm_compute/core/CPP/CPPTypes.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2022, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,111 +21,123 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_CPP_TYPES_H
-#define ARM_COMPUTE_CPP_TYPES_H
+#ifndef ACL_ARM_COMPUTE_CORE_CPP_CPPTYPES_H
+#define ACL_ARM_COMPUTE_CORE_CPP_CPPTYPES_H
#include "arm_compute/core/Error.h"
-#include <array>
-#include <string>
-#include <vector>
+#include <memory>
namespace arm_compute
{
-/** CPU models - we only need to detect CPUs we have
- * microarchitecture-specific code for.
- *
- * Architecture features are detected via HWCAPs.
- */
-enum class CPUModel
+namespace cpuinfo
{
- GENERIC,
- GENERIC_FP16,
- GENERIC_FP16_DOT,
- A53,
- A55r0,
- A55r1
-};
-
-/** Global memory policy.
- * The functions in the runtime will use different strategies based on the policy currently set.
- *
- * MINIMIZE will try to reduce the amount allocated by the functions at the expense of performance normally.
- * NORMAL won't try to save any memory and will favor speed over memory consumption
+struct CpuIsaInfo;
+} // namespace cpuinfo
+
+#define ARM_COMPUTE_CPU_MODEL_LIST \
+ X(GENERIC) \
+ X(GENERIC_FP16) \
+ X(GENERIC_FP16_DOT) \
+ X(A53) \
+ X(A55r0) \
+ X(A55r1) \
+ X(A35) \
+ X(A73) \
+ X(A76) \
+ X(A510) \
+ X(X1) \
+ X(V1) \
+ X(A64FX) \
+ X(N1)
+
+/** CPU models types
*
+ * @note We only need to detect CPUs we have microarchitecture-specific code for.
+ * @note Architecture features are detected via HWCAPs.
*/
-enum class MemoryPolicy
+enum class CPUModel
{
- MINIMIZE,
- NORMAL
+#define X(model) model,
+ ARM_COMPUTE_CPU_MODEL_LIST
+#undef X
};
-/** Convert a cpumodel value to a string
- *
- * @param val CPUModel value to be converted
- *
- * @return String representing the corresponding CPUModel.
- */
-inline std::string cpu_model_to_string(CPUModel val)
-{
- switch(val)
- {
- case CPUModel::GENERIC:
- {
- return std::string("GENERIC");
- }
- case CPUModel::GENERIC_FP16:
- {
- return std::string("GENERIC_FP16");
- }
- case CPUModel::GENERIC_FP16_DOT:
- {
- return std::string("GENERIC_FP16_DOT");
- }
- case CPUModel::A53:
- {
- return std::string("A53");
- }
- case CPUModel::A55r0:
- {
- return std::string("A55r0");
- }
- case CPUModel::A55r1:
- {
- return std::string("A55r1");
- }
- default:
- {
- ARM_COMPUTE_ERROR("Invalid CPUModel.");
- return std::string("GENERIC");
- }
- }
-}
-
class CPUInfo final
{
-public:
- /** Constructor */
+protected:
CPUInfo();
+ ~CPUInfo();
- /** Disable copy constructor and assignment operator to avoid copying the vector of CPUs each time
- * CPUInfo is initialized once in the IScheduler and ThreadInfo will get a pointer to it.
+public:
+ /** Access the KernelLibrary singleton.
+ * This method has been deprecated and will be removed in future releases
+ * @return The KernelLibrary instance.
*/
- CPUInfo &operator=(const CPUInfo &cpuinfo) = delete;
- CPUInfo(const CPUInfo &cpuinfo) = delete;
- CPUInfo &operator=(CPUInfo &&cpuinfo) = default;
- CPUInfo(CPUInfo &&cpuinfo) = default;
+ static CPUInfo &get();
+
+ /* Delete move and copy constructors and assignment operator
+ s */
+ CPUInfo(CPUInfo const &) = delete; // Copy construct
+ CPUInfo(CPUInfo &&) = delete; // Move construct
+ CPUInfo &operator=(CPUInfo const &) = delete; // Copy assign
+ CPUInfo &operator=(CPUInfo &&) = delete; // Move assign
/** Checks if the cpu model supports fp16.
*
- * @return true of the cpu supports fp16, false otherwise
+ * @return true if the cpu supports fp16, false otherwise
*/
bool has_fp16() const;
+ /** Checks if the cpu model supports bf16.
+ *
+ * @return true if the cpu supports bf16, false otherwise
+ */
+ bool has_bf16() const;
+ /** Checks if the cpu model supports bf16.
+ *
+ * @return true if the cpu supports bf16, false otherwise
+ */
+ bool has_svebf16() const;
/** Checks if the cpu model supports dot product.
*
- * @return true of the cpu supports dot product, false otherwise
+ * @return true if the cpu supports dot product, false otherwise
*/
bool has_dotprod() const;
+ /** Checks if the cpu model supports floating-point matrix multiplication.
+ *
+ * @return true if the cpu supports floating-point matrix multiplication, false otherwise
+ */
+ bool has_svef32mm() const;
+ /** Checks if the cpu model supports integer matrix multiplication.
+ *
+ * @return true if the cpu supports integer matrix multiplication, false otherwise
+ */
+ bool has_i8mm() const;
+ /** Checks if the cpu model supports integer matrix multiplication.
+ *
+ * @return true if the cpu supports integer matrix multiplication, false otherwise
+ */
+ bool has_svei8mm() const;
+ /** Checks if the cpu model supports sve.
+ *
+ * @return true if the cpu supports sve, false otherwise
+ */
+ bool has_sve() const;
+ /** Checks if the cpu model supports sve2.
+ *
+ * @return true if the cpu supports sve2, false otherwise
+ */
+ bool has_sve2() const;
+ /** Checks if the cpu model supports sme.
+ *
+ * @return true if the cpu supports sme, false otherwise
+ */
+ bool has_sme() const;
+ /** Checks if the cpu model supports sme2.
+ *
+ * @return true if the cpu supports sme2, false otherwise
+ */
+ bool has_sme2() const;
/** Gets the cpu model for a given cpuid.
*
* @param[in] cpuid the id of the cpu core to be retrieved,
@@ -138,6 +150,11 @@ public:
* @return Current thread's @ref CPUModel
*/
CPUModel get_cpu_model() const;
+ /** Gets the current cpu's ISA information
+ *
+ * @return Current cpu's ISA information
+ */
+ cpuinfo::CpuIsaInfo get_isa() const;
/** Gets the L1 cache size
*
* @return the size of the L1 cache
@@ -148,85 +165,41 @@ public:
* @return the size of the L1 cache
*/
unsigned int get_L2_cache_size() const;
- /** Set the L1 cache size
- *
- * @param[in] size the new size to be set.
- */
- void set_L1_cache_size(unsigned int size);
- /** Set the L2 cache size
- *
- * @param[in] size the new size to be set.
- */
- void set_L2_cache_size(unsigned int size);
- /** Set fp16 support
- *
- * @param[in] fp16 whether the cpu supports fp16.
- */
- void set_fp16(const bool fp16);
- /** Set dot product support
- *
- * @param[in] dotprod whether the cpu supports dot product.
- */
- void set_dotprod(const bool dotprod);
- /** Set the cpumodel for a given cpu core
- *
- * @param[in] cpuid the id of the core to be set.
- * @param[in] model the @ref CPUModel to be set.
- */
- void set_cpu_model(unsigned int cpuid, CPUModel model);
- /** Set max number of cpus
- *
- * @param[in] cpu_count the number of CPUs in the system.
- */
- void set_cpu_num(unsigned int cpu_count);
-
/** Return the maximum number of CPUs present
*
* @return Number of CPUs
*/
unsigned int get_cpu_num() const;
-
-private:
- std::vector<CPUModel> _percpu = {};
- bool _fp16 = false;
- bool _dotprod = false;
- unsigned int _L1_cache_size = 32768;
- unsigned int _L2_cache_size = 262144;
-};
-
-class MEMInfo final
-{
-public:
- MEMInfo();
-
- /** Return the total amount of RAM memory in the system expressed in KB.
+ /** Return the maximum number of CPUs present excluding the little cores
+ * in case of an Android device
*
- * @return Total memory
+ * @return Number of CPUs excluding little
*/
- size_t get_total_in_kb() const;
-
- static void set_policy(MemoryPolicy policy);
- static MemoryPolicy get_policy();
+ unsigned int get_cpu_num_excluding_little() const;
+ /** Return whether the device has little, medium and big CPUs in case
+ * of an Android device, returns false otherwise
+ *
+ * @return Whether the device has little, medium and big CPUs
+ */
+ bool cpu_has_little_mid_big() const;
- /** Common memory sizes expressed in Kb to avoid having them
- * duplicated throughout the code.
+ /** Return the vector length in bytes for sme2
+ *
+ * @return Vector length if sme2 is enabled, otherwise returns 0.
*/
- static const size_t ONE_GB_IN_KB = { 1035842 };
- static const size_t TWO_GB_IN_KB = { ONE_GB_IN_KB * 2 };
+ unsigned long get_sme2_vector_length() const;
private:
- size_t _total;
- size_t _free;
- size_t _buffer;
- static MemoryPolicy _policy;
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
/** Information about executing thread and CPU. */
struct ThreadInfo
{
- int thread_id{ 0 };
- int num_threads{ 1 };
- const CPUInfo *cpu_info{ nullptr };
+ int thread_id{0};
+ int num_threads{1};
+ const CPUInfo *cpu_info{nullptr};
};
} // namespace arm_compute
-#endif /* ARM_COMPUTE_CPP_TYPES_H */
+#endif // ACL_ARM_COMPUTE_CORE_CPP_CPPTYPES_H
diff --git a/arm_compute/core/CPP/ICPPKernel.h b/arm_compute/core/CPP/ICPPKernel.h
index ec05af20bd..03967a536d 100644
--- a/arm_compute/core/CPP/ICPPKernel.h
+++ b/arm_compute/core/CPP/ICPPKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 ARM Limited.
+ * Copyright (c) 2016-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,16 +25,21 @@
#define ARM_COMPUTE_ICPPKERNEL_H
#include "arm_compute/core/CPP/CPPTypes.h"
+#include "arm_compute/core/experimental/Types.h"
#include "arm_compute/core/IKernel.h"
+#include "arm_compute/core/Types.h"
namespace arm_compute
{
class Window;
+class ITensor;
/** Common interface for all kernels implemented in C++ */
class ICPPKernel : public IKernel
{
public:
+ static constexpr size_t default_mws = 1; /* Default minimum workload size value - no impact */
+
/** Default destructor */
virtual ~ICPPKernel() = default;
@@ -51,8 +56,7 @@ public:
*/
virtual void run(const Window &window, const ThreadInfo &info)
{
- ARM_COMPUTE_UNUSED(window);
- ARM_COMPUTE_UNUSED(info);
+ ARM_COMPUTE_UNUSED(window, info);
ARM_COMPUTE_ERROR("default implementation of legacy run() virtual member function invoked");
}
@@ -69,6 +73,37 @@ public:
run(window, info);
}
+ /** Execute the kernel on the passed window
+ *
+ * @warning If is_parallelisable() returns false then the passed window must be equal to window()
+ *
+ * @note The window has to be a region within the window returned by the window() method
+ *
+ * @note The width of the window has to be a multiple of num_elems_processed_per_iteration().
+ *
+ * @param[in] tensors A vector containing the tensors to operate on.
+ * @param[in] window Region on which to execute the kernel. (Must be a region of the window returned by window())
+ * @param[in] info Info about executing thread and CPU.
+ */
+ virtual void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
+ {
+ ARM_COMPUTE_UNUSED(tensors, window, info);
+ }
+
+ /** Return minimum workload size of the relevant kernel
+ *
+ * @param[in] platform The CPU platform used to create the context.
+ * @param[in] thread_count Number of threads in the execution.
+ *
+ * @return Minimum workload size for requested configuration.
+ */
+ virtual size_t get_mws(const CPUInfo &platform, size_t thread_count) const
+ {
+ ARM_COMPUTE_UNUSED(platform, thread_count);
+
+ return default_mws;
+ }
+
/** Name of the kernel
*
* @return Kernel name
diff --git a/arm_compute/core/CPP/ICPPSimpleKernel.h b/arm_compute/core/CPP/ICPPSimpleKernel.h
deleted file mode 100644
index acdd054c0e..0000000000
--- a/arm_compute/core/CPP/ICPPSimpleKernel.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_ICPPSIMPLEKERNEL_H
-#define ARM_COMPUTE_ICPPSIMPLEKERNEL_H
-
-#include "arm_compute/core/CPP/ICPPKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for simple C++ kernels having 1 tensor input and 1 tensor output */
-class ICPPSimpleKernel : public ICPPKernel
-{
-public:
- /** Constructor */
- ICPPSimpleKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- ICPPSimpleKernel(const ICPPSimpleKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- ICPPSimpleKernel &operator=(const ICPPSimpleKernel &) = delete;
- /** Allow instances of this class to be moved */
- ICPPSimpleKernel(ICPPSimpleKernel &&) = default;
- /** Allow instances of this class to be moved */
- ICPPSimpleKernel &operator=(ICPPSimpleKernel &&) = default;
- /** Default destructor */
- ~ICPPSimpleKernel() = default;
-
-protected:
- /** Configure the kernel
- *
- * @param[in] input Source tensor.
- * @param[out] output Destination tensor.
- * @param[in] num_elems_processed_per_iteration Number of processed elements per iteration.
- * @param[in] border_undefined (Optional) True if the border mode is undefined. False if it's replicate or constant.
- * @param[in] border_size (Optional) Size of the border.
- */
- void configure(const ITensor *input, ITensor *output, unsigned int num_elems_processed_per_iteration, bool border_undefined = false, const BorderSize &border_size = BorderSize());
- /** Static function to check if given info will lead to a valid configuration of @ref ICPPSimpleKernel.
- *
- * @param[in] input Source tensor info.
- * @param[in] output Destination tensor info.
- * @param[in] num_elems_processed_per_iteration Number of processed elements per iteration.
- * @param[in] border_undefined (Optional) True if the border mode is undefined. False if it's replicate or constant.
- * @param[in] border_size (Optional) Size of the border.
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int num_elems_processed_per_iteration,
- bool border_undefined = false, const BorderSize &border_size = BorderSize());
-
-protected:
- const ITensor *_input;
- ITensor *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_ICPPSIMPLEKERNEL_H */
diff --git a/arm_compute/core/CPP/Validate.h b/arm_compute/core/CPP/Validate.h
deleted file mode 100644
index dfee9de86e..0000000000
--- a/arm_compute/core/CPP/Validate.h
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CPP_VALIDATE_H
-#define ARM_COMPUTE_CPP_VALIDATE_H
-
-#include "arm_compute/core/Validate.h"
-
-namespace arm_compute
-{
-/** Return an error if the data type of the passed tensor info is FP16 and FP16 support is not compiled in.
- *
- * @param[in] function Function in which the error occurred.
- * @param[in] file Name of the file where the error occurred.
- * @param[in] line Line on which the error occurred.
- * @param[in] tensor_info Tensor info to validate.
- *
- * @return Status
- */
-inline Status error_on_unsupported_cpu_fp16(const char *function, const char *file, const int line,
- const ITensorInfo *tensor_info)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info == nullptr, function, file, line);
-#ifndef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
- ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(tensor_info->data_type() == DataType::F16,
- function, file, line, "This CPU architecture does not support F16 data type, you need v8.2 or above");
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
- return Status {};
-}
-
-/** Return an error if the data type of the passed tensor info is BFLOAT16 and BFLOAT16 support is not compiled in.
- *
- * @param[in] function Function in which the error occurred.
- * @param[in] file Name of the file where the error occurred.
- * @param[in] line Line on which the error occurred.
- * @param[in] tensor_info Tensor info to validate.
- *
- * @return Status
- */
-inline Status error_on_unsupported_cpu_bf16(const char *function, const char *file, const int line,
- const ITensorInfo *tensor_info)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info == nullptr, function, file, line);
-#if !(defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16))
- ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(tensor_info->data_type() == DataType::BFLOAT16,
- function, file, line, "This CPU architecture does not support BFloat16 data type, you need v8.6 or above");
-#endif /* !(defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16)) */
- return Status {};
-}
-
-/** Return an error if the data type of the passed tensor is FP16 and FP16 support is not compiled in.
- *
- * @param[in] function Function in which the error occurred.
- * @param[in] file Name of the file where the error occurred.
- * @param[in] line Line on which the error occurred.
- * @param[in] tensor Tensor to validate.
- *
- * @return Status
- */
-inline Status error_on_unsupported_cpu_fp16(const char *function, const char *file, const int line,
- const ITensor *tensor)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line);
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_cpu_fp16(function, file, line, tensor->info()));
- return Status{};
-}
-
-/** Return an error if the data type of the passed tensor is BFLOAT16 and BFLOAT16 support is not compiled in.
- *
- * @param[in] function Function in which the error occurred.
- * @param[in] file Name of the file where the error occurred.
- * @param[in] line Line on which the error occurred.
- * @param[in] tensor Tensor to validate.
- *
- * @return Status
- */
-inline Status error_on_unsupported_cpu_bf16(const char *function, const char *file, const int line,
- const ITensor *tensor)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line);
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_cpu_bf16(function, file, line, tensor->info()));
- return Status{};
-}
-
-#define ARM_COMPUTE_ERROR_ON_CPU_F16_UNSUPPORTED(tensor) \
- ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_unsupported_cpu_fp16(__func__, __FILE__, __LINE__, tensor))
-
-#define ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(tensor) \
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_cpu_fp16(__func__, __FILE__, __LINE__, tensor))
-
-#define ARM_COMPUTE_ERROR_ON_CPU_BF16_UNSUPPORTED(tensor) \
- ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_unsupported_cpu_bf16(__func__, __FILE__, __LINE__, tensor))
-
-#define ARM_COMPUTE_RETURN_ERROR_ON_CPU_BF16_UNSUPPORTED(tensor) \
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_cpu_bf16(__func__, __FILE__, __LINE__, tensor))
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CPP_VALIDATE_H */
diff --git a/arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h b/arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h
index 3fa83a6d6d..dd91595ea6 100644
--- a/arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h
+++ b/arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,9 +24,7 @@
#ifndef ARM_COMPUTE_CPPBOXWITHNONMAXIMASUPPRESSIONLIMITKERNEL_H
#define ARM_COMPUTE_CPPBOXWITHNONMAXIMASUPPRESSIONLIMITKERNEL_H
-#include "arm_compute/core/IArray.h"
-#include "arm_compute/core/IHOG.h"
-#include "arm_compute/core/NEON/INEKernel.h"
+#include "arm_compute/core/CPP/ICPPKernel.h"
#include "arm_compute/core/Types.h"
namespace arm_compute
@@ -62,11 +60,19 @@ public:
* @param[out] classes The classes output tensor of size [N]. Data types supported: Same as @p scores_in
* @param[out] batch_splits_out (Optional) The batch splits output tensor [batch_size]. Data types supported: Same as @p scores_in
* @param[out] keeps (Optional) The keeps output tensor of size [N]. Data types supported: Same as@p scores_in
- * @param[out] keeps_size (Optional) Number of filtered indices per class tensor of size [num_classes]. Data types supported: Same as @p scores_in
+ * @param[out] keeps_size (Optional) Number of filtered indices per class tensor of size [num_classes]. Data types supported: U32
* @param[in] info (Optional) BoxNMSLimitInfo information.
*/
- void configure(const ITensor *scores_in, const ITensor *boxes_in, const ITensor *batch_splits_in, ITensor *scores_out, ITensor *boxes_out, ITensor *classes,
- ITensor *batch_splits_out = nullptr, ITensor *keeps = nullptr, ITensor *keeps_size = nullptr, const BoxNMSLimitInfo info = BoxNMSLimitInfo());
+ void configure(const ITensor *scores_in,
+ const ITensor *boxes_in,
+ const ITensor *batch_splits_in,
+ ITensor *scores_out,
+ ITensor *boxes_out,
+ ITensor *classes,
+ ITensor *batch_splits_out = nullptr,
+ ITensor *keeps = nullptr,
+ ITensor *keeps_size = nullptr,
+ const BoxNMSLimitInfo info = BoxNMSLimitInfo());
// Inherited methods overridden:
void run(const Window &window, const ThreadInfo &info) override;
@@ -76,9 +82,9 @@ public:
void run_nmslimit();
private:
- const ITensor *_scores_in;
- const ITensor *_boxes_in;
- const ITensor *_batch_splits_in;
+ const ITensor *_scores_in;
+ const ITensor *_boxes_in;
+ const ITensor *_batch_splits_in;
ITensor *_scores_out;
ITensor *_boxes_out;
ITensor *_classes;
diff --git a/arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h b/arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h
deleted file mode 100644
index eeb6a65525..0000000000
--- a/arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CPPCORNERCANDIDATESKERNEL_H
-#define ARM_COMPUTE_CPPCORNERCANDIDATESKERNEL_H
-
-#include "arm_compute/core/IArray.h"
-#include "arm_compute/core/NEON/INEKernel.h"
-
-#include "support/Mutex.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-/** Interface for CPP Images. */
-using IImage = ITensor;
-
-/** CPP kernel to perform corner candidates
- */
-class CPPCornerCandidatesKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "CPPCornerCandidatesKernel";
- }
- /** Default constructor */
- CPPCornerCandidatesKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CPPCornerCandidatesKernel(const CPPCornerCandidatesKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CPPCornerCandidatesKernel &operator=(const CPPCornerCandidatesKernel &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- CPPCornerCandidatesKernel(CPPCornerCandidatesKernel &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- CPPCornerCandidatesKernel &operator=(CPPCornerCandidatesKernel &&) = delete;
- /** Default destructor */
- ~CPPCornerCandidatesKernel() = default;
-
- /** Setup the kernel parameters
- *
- * @param[in] input Source image (harris score). Format supported F32
- * @param[out] output Destination array of InternalKeypoint
- * @param[out] num_corner_candidates Number of corner candidates
- */
- void configure(const IImage *input, InternalKeypoint *output, int32_t *num_corner_candidates);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- int32_t *_num_corner_candidates; /**< Number of corner candidates */
- arm_compute::Mutex _corner_candidates_mutex; /**< Mutex to preventing race conditions */
- const IImage *_input; /**< Source image - Harris score */
- InternalKeypoint *_output; /**< Array of NEInternalKeypoint */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CPPCORNERCANDIDATESKERNEL_H */
diff --git a/arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h b/arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h
deleted file mode 100644
index cf8e4f00b9..0000000000
--- a/arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CPPDETECTIONWINDOWNONMAXIMASUPPRESSIONKERNEL_H
-#define ARM_COMPUTE_CPPDETECTIONWINDOWNONMAXIMASUPPRESSIONKERNEL_H
-
-#include "arm_compute/core/IArray.h"
-#include "arm_compute/core/IHOG.h"
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-/** CPP kernel to perform in-place computation of euclidean distance on IDetectionWindowArray
- *
- * @note This kernel is meant to be used alongside HOG or other object detection algorithms to perform a non-maxima suppression on a
- * IDetectionWindowArray
- */
-class CPPDetectionWindowNonMaximaSuppressionKernel : public ICPPKernel
-{
-public:
- const char *name() const override
- {
- return "CPPDetectionWindowNonMaximaSuppressionKernel";
- }
- /** Default constructor */
- CPPDetectionWindowNonMaximaSuppressionKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CPPDetectionWindowNonMaximaSuppressionKernel(const CPPDetectionWindowNonMaximaSuppressionKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CPPDetectionWindowNonMaximaSuppressionKernel &operator=(const CPPDetectionWindowNonMaximaSuppressionKernel &) = delete;
- /** Allow instances of this class to be moved */
- CPPDetectionWindowNonMaximaSuppressionKernel(CPPDetectionWindowNonMaximaSuppressionKernel &&) = default;
- /** Allow instances of this class to be moved */
- CPPDetectionWindowNonMaximaSuppressionKernel &operator=(CPPDetectionWindowNonMaximaSuppressionKernel &&) = default;
- /** Initialise the kernel's input, output and the euclidean minimum distance
- *
- * @attention: If @ref IDetectionWindowArray is passed to the kernel, the map() and unmap() methods @ref IDetectionWindowArray must be called respectively before and after
- * the run() method of @ref CPPDetectionWindowNonMaximaSuppressionKernel
- *
- * @param[in, out] input_output Input/Output array of @ref DetectionWindow
- * @param[in] min_distance Radial Euclidean distance for non-maxima suppression
- */
- void configure(IDetectionWindowArray *input_output, float min_distance);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- bool is_parallelisable() const override;
-
-private:
- IDetectionWindowArray *_input_output;
- float _min_distance;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CPPDETECTIONWINDOWNONMAXIMASUPPRESSIONKERNEL_H */
diff --git a/arm_compute/core/CPP/kernels/CPPNonMaximumSuppressionKernel.h b/arm_compute/core/CPP/kernels/CPPNonMaximumSuppressionKernel.h
index cb416af070..d1f7f8670f 100644
--- a/arm_compute/core/CPP/kernels/CPPNonMaximumSuppressionKernel.h
+++ b/arm_compute/core/CPP/kernels/CPPNonMaximumSuppressionKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,9 +24,8 @@
#ifndef ARM_COMPUTE_CPP_NONMAXIMUMSUPPRESSIONKERNEL_LAYER_H
#define ARM_COMPUTE_CPP_NONMAXIMUMSUPPRESSIONKERNEL_LAYER_H
-#include "arm_compute/runtime/CPP/ICPPSimpleFunction.h"
-
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CPP/ICPPSimpleFunction.h"
namespace arm_compute
{
@@ -65,7 +64,12 @@ public:
* @param[in] iou_threshold The threshold used in non maximum suppression.
*
*/
- void configure(const ITensor *input_bboxes, const ITensor *input_scores, ITensor *output_indices, unsigned int max_output_size, const float score_threshold, const float iou_threshold);
+ void configure(const ITensor *input_bboxes,
+ const ITensor *input_scores,
+ ITensor *output_indices,
+ unsigned int max_output_size,
+ const float score_threshold,
+ const float iou_threshold);
/** Static function to check if given arguments will lead to a valid configuration of @ref CPPNonMaximumSuppressionKernel
*
@@ -77,8 +81,12 @@ public:
* @param[in] iou_threshold The threshold used in non maximum suppression.
*
*/
- static Status validate(const ITensorInfo *input_bboxes, const ITensorInfo *input_scores, const ITensorInfo *output_indices, unsigned int max_output_size,
- const float score_threshold, const float iou_threshold);
+ static Status validate(const ITensorInfo *input_bboxes,
+ const ITensorInfo *input_scores,
+ const ITensorInfo *output_indices,
+ unsigned int max_output_size,
+ const float score_threshold,
+ const float iou_threshold);
// Inherited methods overridden:
void run(const Window &window, const ThreadInfo &info) override;
diff --git a/arm_compute/core/CPP/kernels/CPPPermuteKernel.h b/arm_compute/core/CPP/kernels/CPPPermuteKernel.h
index e75152f4ea..d141c2fb70 100644
--- a/arm_compute/core/CPP/kernels/CPPPermuteKernel.h
+++ b/arm_compute/core/CPP/kernels/CPPPermuteKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 ARM Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -56,15 +56,15 @@ public:
/** Set the input and output of the kernel.
*
- * @param[in] input The input tensor to permute. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/F16/U32/S32/F32
- * @param[out] output The output tensor. Data types supported: Same as @p input
+ * @param[in] input The input tensor to permute. Data types supported: All.
+ * @param[out] output The output tensor. Data types supported: same as @p input
* @param[in] perm Permutation vector
*/
void configure(const ITensor *input, ITensor *output, const PermutationVector &perm);
/** Static function to check if given info will lead to a valid configuration of @ref CPPPermuteKernel
*
- * @param[in] input The input tensor to permute. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/F16/U32/S32/F32
- * @param[in] output The output tensor. Data types supported: Same as @p input
+ * @param[in] input The input tensor to permute. Data types supported: All.
+ * @param[in] output The output tensor. Data types supported: same as @p input
* @param[in] perm Permutation vector
*
* @return a status
diff --git a/arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h b/arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h
deleted file mode 100644
index d127ef8d8a..0000000000
--- a/arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CPPSORTEUCLIDEANDISTANCEKERNEL_H
-#define ARM_COMPUTE_CPPSORTEUCLIDEANDISTANCEKERNEL_H
-
-#include "arm_compute/core/CPP/ICPPKernel.h"
-#include "arm_compute/core/IArray.h"
-
-#include <cstdint>
-#include <mutex>
-
-namespace arm_compute
-{
-/** CPP kernel to perform sorting and euclidean distance */
-class CPPSortEuclideanDistanceKernel : public ICPPKernel
-{
-public:
- const char *name() const override
- {
- return "CPPSortEuclideanDistanceKernel";
- }
- /** Default constructor */
- CPPSortEuclideanDistanceKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CPPSortEuclideanDistanceKernel(const CPPSortEuclideanDistanceKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CPPSortEuclideanDistanceKernel &operator=(const CPPSortEuclideanDistanceKernel &) = delete;
- /** Allow instances of this class to be moved */
- CPPSortEuclideanDistanceKernel(CPPSortEuclideanDistanceKernel &&) = default;
- /** Allow instances of this class to be moved */
- CPPSortEuclideanDistanceKernel &operator=(CPPSortEuclideanDistanceKernel &&) = default;
- /** Initialise the kernel's source, destination and border mode.
- *
- * @param[in,out] in_out Input internal keypoints. Marked as out as the kernel writes 0 in the strength member.
- * @param[out] output Output keypoints.
- * @param[in] num_corner_candidates Pointer to the number of corner candidates in the input array
- * @param[in] min_distance Radial Euclidean distance to use
- */
- void configure(InternalKeypoint *in_out, IKeyPointArray *output, const int32_t *num_corner_candidates, float min_distance);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- bool is_parallelisable() const override;
-
-private:
- const int32_t *_num_corner_candidates; /**< Number of corner candidates */
- float _min_distance; /**< Radial Euclidean distance */
- InternalKeypoint *_in_out; /**< Source array of InternalKeypoint */
- IKeyPointArray *_output; /**< Destination array of IKeyPointArray */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CPPSORTEUCLIDEANDISTANCEKERNEL_H */
diff --git a/arm_compute/core/CPP/kernels/CPPTopKVKernel.h b/arm_compute/core/CPP/kernels/CPPTopKVKernel.h
index 4b9bfdd3c9..7326a10e2f 100644
--- a/arm_compute/core/CPP/kernels/CPPTopKVKernel.h
+++ b/arm_compute/core/CPP/kernels/CPPTopKVKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 ARM Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -54,7 +54,7 @@ public:
/** Set the input and output of the kernel.
*
* @param[in] predictions A batch_size x classes tensor. Data types supported: F16/S32/F32/QASYMM8/QASYMM8_SIGNED
- * @param[in] targets A batch_size 1D tensor of class ids. Data types supported: S32
+ * @param[in] targets A batch_size 1D tensor of class ids. Data types supported: U32
* @param[out] output Computed precision at @p k as a bool 1D tensor. Data types supported: U8
* @param[in] k Number of top elements to look at for computing precision.
*/
@@ -63,13 +63,14 @@ public:
/** Static function to check if given info will lead to a valid configuration of @ref CPPTopKVKernel
*
* @param[in] predictions A batch_size x classes tensor info. Data types supported: F16/S32/F32/QASYMM8/QASYMM8_SIGNED
- * @param[in] targets A batch_size 1D tensor info of class ids. Data types supported: S32
+ * @param[in] targets A batch_size 1D tensor info of class ids. Data types supported: U32
* @param[in] output Computed precision at @p k as a bool 1D tensor info. Data types supported: U8
* @param[in] k Number of top elements to look at for computing precision.
*
* @return a status
*/
- static Status validate(const ITensorInfo *predictions, const ITensorInfo *targets, ITensorInfo *output, const unsigned int k);
+ static Status
+ validate(const ITensorInfo *predictions, const ITensorInfo *targets, ITensorInfo *output, const unsigned int k);
// Inherited methods overridden:
void run(const Window &window, const ThreadInfo &info) override;
diff --git a/arm_compute/core/CPP/kernels/CPPUpsampleKernel.h b/arm_compute/core/CPP/kernels/CPPUpsampleKernel.h
index 9fbc9b697c..dd7e07c390 100644
--- a/arm_compute/core/CPP/kernels/CPPUpsampleKernel.h
+++ b/arm_compute/core/CPP/kernels/CPPUpsampleKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 ARM Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -55,8 +55,8 @@ public:
/** Set the input and output of the kernel.
*
- * @param[in] input The input tensor to upsample. Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED
- * @param[out] output The output tensor. Data types supported: Same as @p input
+ * @param[in] input The input tensor to upsample. Data types supported: All.
+ * @param[out] output The output tensor. Data types supported: same as @p input.
* @param[in] info Padding info.
*/
void configure(const ITensor *input, ITensor *output, const PadStrideInfo &info);
diff --git a/arm_compute/core/Coordinates.h b/arm_compute/core/Coordinates.h
index 78ca5250ab..d1240bb10a 100644
--- a/arm_compute/core/Coordinates.h
+++ b/arm_compute/core/Coordinates.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2019 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -42,8 +42,7 @@ public:
* @param[in] coords Values to initialize the dimensions.
*/
template <typename... Ts>
- constexpr Coordinates(Ts... coords)
- : Dimensions{ coords... }
+ constexpr Coordinates(Ts... coords) : Dimensions{coords...}
{
}
/** Allow instances of this class to be copy constructed */
@@ -57,5 +56,5 @@ public:
/** Default destructor */
~Coordinates() = default;
};
-}
+} // namespace arm_compute
#endif /*ARM_COMPUTE_COORDINATES_H*/
diff --git a/arm_compute/core/CoreTypes.h b/arm_compute/core/CoreTypes.h
new file mode 100644
index 0000000000..1a9db1937c
--- /dev/null
+++ b/arm_compute/core/CoreTypes.h
@@ -0,0 +1,352 @@
+/*
+ * Copyright (c) 2016-2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_ARM_COMPUTE_CORE_CORETYPES
+#define ACL_ARM_COMPUTE_CORE_CORETYPES
+
+#include "arm_compute/core/Strides.h"
+
+#include "support/Half.h"
+
+/** CoreTypes.h groups together essential small types that are used across functions */
+
+namespace arm_compute
+{
+/** 16-bit floating point type */
+using half = half_float::half;
+/** Permutation vector */
+using PermutationVector = Strides;
+
+/** Available channels */
+enum class Channel
+{
+ UNKNOWN, /** Unknown channel format */
+ C0, /**< First channel (used by formats with unknown channel types). */
+ C1, /**< Second channel (used by formats with unknown channel types). */
+ C2, /**< Third channel (used by formats with unknown channel types). */
+ C3, /**< Fourth channel (used by formats with unknown channel types). */
+ R, /**< Red channel. */
+ G, /**< Green channel. */
+ B, /**< Blue channel. */
+ A, /**< Alpha channel. */
+ Y, /**< Luma channel. */
+ U, /**< Cb/U channel. */
+ V /**< Cr/V/Value channel. */
+};
+
+/** Image colour formats */
+enum class Format
+{
+ UNKNOWN, /**< Unknown image format */
+ U8, /**< 1 channel, 1 U8 per channel */
+ S16, /**< 1 channel, 1 S16 per channel */
+ U16, /**< 1 channel, 1 U16 per channel */
+ S32, /**< 1 channel, 1 S32 per channel */
+ U32, /**< 1 channel, 1 U32 per channel */
+ S64, /**< 1 channel, 1 S64 per channel */
+ U64, /**< 1 channel, 1 U64 per channel */
+ BFLOAT16, /**< 16-bit brain floating-point number */
+ F16, /**< 1 channel, 1 F16 per channel */
+ F32, /**< 1 channel, 1 F32 per channel */
+ UV88, /**< 2 channel, 1 U8 per channel */
+ RGB888, /**< 3 channels, 1 U8 per channel */
+ RGBA8888, /**< 4 channels, 1 U8 per channel */
+ YUV444, /**< A 3 plane of 8 bit 4:4:4 sampled Y, U, V planes */
+ YUYV422, /**< A single plane of 32-bit macro pixel of Y0, U0, Y1, V0 bytes */
+ NV12, /**< A 2 plane YUV format of Luma (Y) and interleaved UV data at 4:2:0 sampling */
+ NV21, /**< A 2 plane YUV format of Luma (Y) and interleaved VU data at 4:2:0 sampling */
+ IYUV, /**< A 3 plane of 8-bit 4:2:0 sampled Y, U, V planes */
+ UYVY422 /**< A single plane of 32-bit macro pixel of U0, Y0, V0, Y1 byte */
+};
+
+/** Available data types */
+enum class DataType
+{
+ UNKNOWN, /**< Unknown data type */
+ U8, /**< unsigned 8-bit number */
+ S8, /**< signed 8-bit number */
+ QSYMM8, /**< quantized, symmetric fixed-point 8-bit number */
+ QASYMM8, /**< quantized, asymmetric fixed-point 8-bit number unsigned */
+ QASYMM8_SIGNED, /**< quantized, asymmetric fixed-point 8-bit number signed */
+ QSYMM8_PER_CHANNEL, /**< quantized, symmetric per channel fixed-point 8-bit number */
+ U16, /**< unsigned 16-bit number */
+ S16, /**< signed 16-bit number */
+ QSYMM16, /**< quantized, symmetric fixed-point 16-bit number */
+ QASYMM16, /**< quantized, asymmetric fixed-point 16-bit number */
+ U32, /**< unsigned 32-bit number */
+ S32, /**< signed 32-bit number */
+ U64, /**< unsigned 64-bit number */
+ S64, /**< signed 64-bit number */
+ BFLOAT16, /**< 16-bit brain floating-point number */
+ F16, /**< 16-bit floating-point number */
+ F32, /**< 32-bit floating-point number */
+ F64, /**< 64-bit floating-point number */
+ SIZET /**< size_t */
+};
+
+/** [DataLayout enum definition] **/
+
+/** Supported tensor data layouts */
+enum class DataLayout
+{
+ UNKNOWN, /**< Unknown data layout */
+ NCHW, /**< Num samples, channels, height, width */
+ NHWC, /**< Num samples, height, width, channels */
+ NCDHW, /**< Num samples, channels, depth, height, width */
+ NDHWC /**< Num samples, depth, height, width, channels */
+};
+/** [DataLayout enum definition] **/
+
+/** Supported tensor data layout dimensions */
+enum class DataLayoutDimension
+{
+ CHANNEL, /**< channel */
+ HEIGHT, /**< height */
+ WIDTH, /**< width */
+ DEPTH, /**< depth */
+ BATCHES /**< batches */
+};
+
+/** Dimension rounding type when down-scaling on CNNs
+ * @note Used in pooling and convolution layer
+ */
+enum class DimensionRoundingType
+{
+ FLOOR, /**< Floor rounding */
+ CEIL /**< Ceil rounding */
+};
+
+class PadStrideInfo
+{
+public:
+ /** Constructor
+ *
+ * @param[in] stride_x (Optional) Stride, in elements, across x. Defaults to 1.
+ * @param[in] stride_y (Optional) Stride, in elements, across y. Defaults to 1.
+ * @param[in] pad_x (Optional) Padding, in elements, across x. Defaults to 0.
+ * @param[in] pad_y (Optional) Padding, in elements, across y. Defaults to 0.
+ * @param[in] round (Optional) Dimensions rounding. Defaults to @ref DimensionRoundingType::FLOOR.
+ */
+ PadStrideInfo(unsigned int stride_x = 1,
+ unsigned int stride_y = 1,
+ unsigned int pad_x = 0,
+ unsigned int pad_y = 0,
+ DimensionRoundingType round = DimensionRoundingType::FLOOR)
+ : _stride(std::make_pair(stride_x, stride_y)),
+ _pad_left(pad_x),
+ _pad_top(pad_y),
+ _pad_right(pad_x),
+ _pad_bottom(pad_y),
+ _round_type(round)
+ {
+ }
+ /** Constructor
+ *
+ * @param[in] stride_x Stride, in elements, across x.
+ * @param[in] stride_y Stride, in elements, across y.
+ * @param[in] pad_left Padding across x on the left, in elements.
+ * @param[in] pad_right Padding across x on the right, in elements.
+ * @param[in] pad_top Padding across y on the top, in elements.
+ * @param[in] pad_bottom Padding across y on the bottom, in elements.
+ * @param[in] round Dimensions rounding.
+ */
+ PadStrideInfo(unsigned int stride_x,
+ unsigned int stride_y,
+ unsigned int pad_left,
+ unsigned int pad_right,
+ unsigned int pad_top,
+ unsigned int pad_bottom,
+ DimensionRoundingType round)
+ : _stride(std::make_pair(stride_x, stride_y)),
+ _pad_left(pad_left),
+ _pad_top(pad_top),
+ _pad_right(pad_right),
+ _pad_bottom(pad_bottom),
+ _round_type(round)
+ {
+ }
+ /** Get the stride.
+ *
+ * @return a pair: stride x, stride y.
+ */
+ std::pair<unsigned int, unsigned int> stride() const
+ {
+ return _stride;
+ }
+ /** Check whether the padding is symmetric.
+ *
+ * @return True if the padding is symmetric.
+ */
+ bool padding_is_symmetric() const
+ {
+ return (_pad_left == _pad_right) && (_pad_top == _pad_bottom);
+ }
+ /** Get the padding.
+ *
+ * @note This should only be used when the padding is symmetric.
+ *
+ * @return a pair: padding left/right, padding top/bottom
+ */
+ std::pair<unsigned int, unsigned int> pad() const
+ {
+ //this accessor should be used only when padding is symmetric
+ ARM_COMPUTE_ERROR_ON(!padding_is_symmetric());
+ return std::make_pair(_pad_left, _pad_top);
+ }
+
+ /** Get the left padding */
+ unsigned int pad_left() const
+ {
+ return _pad_left;
+ }
+ /** Get the right padding */
+ unsigned int pad_right() const
+ {
+ return _pad_right;
+ }
+ /** Get the top padding */
+ unsigned int pad_top() const
+ {
+ return _pad_top;
+ }
+ /** Get the bottom padding */
+ unsigned int pad_bottom() const
+ {
+ return _pad_bottom;
+ }
+
+ /** Get the rounding type */
+ DimensionRoundingType round() const
+ {
+ return _round_type;
+ }
+
+ /** Check whether this has any padding */
+ bool has_padding() const
+ {
+ return (_pad_left != 0 || _pad_top != 0 || _pad_right != 0 || _pad_bottom != 0);
+ }
+
+private:
+ std::pair<unsigned int, unsigned int> _stride;
+ unsigned int _pad_left;
+ unsigned int _pad_top;
+ unsigned int _pad_right;
+ unsigned int _pad_bottom;
+
+ DimensionRoundingType _round_type;
+};
+
+/** Memory layouts for the weights tensor.
+ *
+ * * UNSPECIFIED is used to select kernels that do not run in
+ * variable weights mode.
+ *
+ * * ANY is used to query the kernel database to retrieve any of the
+ * kernels that runs in variable weights mode. Once a kernel is
+ * found, the specific format expected by the kernel can be
+ * retrieved by the user for reordering the weights tensor
+ * accordingly.
+ *
+ * The other values OHWIo{interleave_by}i{block_by} describe the
+ * memory layout of a 4D tensor with layout OHWI that has been
+ * transformed into a 4D tensor with dimensions O'HWI' where:
+ *
+ * O' = first multiple of {interleave_by} s.t. O<=O'
+ * I' = first multiple of {block_by} s.t. I<=I'
+ *
+ * The total size of the dst tensor is O' x H x W x I'
+ *
+ * The access function of the tensor with layout
+ * OHWIo{interleave_by}i{block_by} and size O'HWI' is a 6-parameter
+ * access function, where the 6 parameters are computed as follows:
+ *
+ * x5 = floor(o/{interleave_by}) RANGE [0, O'/{interleave_by} -1] SIZE: O'/{interleave_by}
+ *
+ * x4 = h RANGE [0, H-1] SIZE: H
+ * x3 = w RANGE [0, W-1] SIZE: W
+ * x2 = floor(i/{block_by}) RANGE [0, I'/{block_by} -1] SIZE: I'/{block_by}
+ * x1 = o%{interleave_by} RANGE [0, {interleave_by} -1] SIZE: {interleave_by}
+ * x0 = i%{block_by} RANGE [0, {block_by} -1] SIZE: {block_by}
+ * TOTAL SIZE: O' * H * W * I'
+ *
+ * 4D 6D
+ * ----------------- -----------------------------------
+ * value(o, h, w, i) = x5 * H * W * I' * {interleave_by}
+ * + x4 * W * I' * {interleave_by}
+ * + x3 * I' * {interleave_by}
+ * + x2 * {interleave_by} * {block_by}
+ * + x1 * {block_by}
+ * + x0
+ *
+ * Notice that in arm_gemm the 4D tensor of dimension O'HWI' created
+ * for the OHWIo{interleave_by}i{block_by} format is in reality seen
+ * as a 2D tensor, where the number of rows is O'/{interleave_by}
+ * and the number of columns is {interleave_by} * H * W * I'.
+ *
+ * The postfix *_bf16 is for the memory layout needed for the
+ * fast-mode kernels, in which the weights are passed in bfloat16
+ * format.
+ */
+enum class WeightFormat
+{
+ UNSPECIFIED = 0x1,
+ ANY = 0x2,
+ OHWI = 0x100100,
+ OHWIo2 = 0x100200,
+ OHWIo4 = 0x100400,
+ OHWIo8 = 0x100800,
+ OHWIo16 = 0x101000,
+ OHWIo32 = 0x102000,
+ OHWIo64 = 0x104000,
+ OHWIo128 = 0x108000,
+ OHWIo4i2 = 0x200400,
+ OHWIo4i2_bf16 = 0x200410,
+ OHWIo8i2 = 0x200800,
+ OHWIo8i2_bf16 = 0x200810,
+ OHWIo16i2 = 0x201000,
+ OHWIo16i2_bf16 = 0x201010,
+ OHWIo32i2 = 0x202000,
+ OHWIo32i2_bf16 = 0x202010,
+ OHWIo64i2 = 0x204000,
+ OHWIo64i2_bf16 = 0x204010,
+ OHWIo4i4 = 0x400400,
+ OHWIo4i4_bf16 = 0x400410,
+ OHWIo8i4 = 0x400800,
+ OHWIo8i4_bf16 = 0x400810,
+ OHWIo16i4 = 0x401000,
+ OHWIo16i4_bf16 = 0x401010,
+ OHWIo32i4 = 0x402000,
+ OHWIo32i4_bf16 = 0x402010,
+ OHWIo64i4 = 0x404000,
+ OHWIo64i4_bf16 = 0x404010,
+ OHWIo2i8 = 0x800200,
+ OHWIo4i8 = 0x800400,
+ OHWIo8i8 = 0x800800,
+ OHWIo16i8 = 0x801000,
+ OHWIo32i8 = 0x802000,
+ OHWIo64i8 = 0x804000
+};
+
+} // namespace arm_compute
+#endif /* ACL_ARM_COMPUTE_CORE_CORETYPES */
diff --git a/arm_compute/core/Dimensions.h b/arm_compute/core/Dimensions.h
index fbaef3a8f0..bb8692d70a 100644
--- a/arm_compute/core/Dimensions.h
+++ b/arm_compute/core/Dimensions.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -29,6 +29,7 @@
#include <algorithm>
#include <array>
#include <functional>
+#include <limits>
#include <numeric>
namespace arm_compute
@@ -49,8 +50,7 @@ public:
* @param[in] dims Values to initialize the dimensions.
*/
template <typename... Ts>
- explicit Dimensions(Ts... dims)
- : _id{ { static_cast<T>(dims)... } }, _num_dimensions{ sizeof...(dims) }
+ explicit Dimensions(Ts... dims) : _id{{static_cast<T>(dims)...}}, _num_dimensions{sizeof...(dims)}
{
}
@@ -68,14 +68,19 @@ public:
/** Accessor to set the value of one of the dimensions.
*
- * @param[in] dimension Dimension for which the value is set.
- * @param[in] value Value to be set for the dimension.
+ * @param[in] dimension Dimension for which the value is set.
+ * @param[in] value Value to be set for the dimension.
+ * @param[in] increase_dim_unit (Optional) Set to true if new unit dimensions increase the number of dimensions (e.g. for Coordinates), false otherwise (e.g. for TensorShapes)
*/
- void set(size_t dimension, T value)
+ void set(size_t dimension, T value, bool increase_dim_unit = true)
{
ARM_COMPUTE_ERROR_ON(dimension >= num_max_dimensions);
- _id[dimension] = value;
- _num_dimensions = std::max(_num_dimensions, dimension + 1);
+ _id[dimension] = value;
+ // Don't increase the number of dimensions if the new dimension is 1
+ if (increase_dim_unit || value != 1)
+ {
+ _num_dimensions = std::max(_num_dimensions, dimension + 1);
+ }
}
/** Alias to access the size of the first dimension */
T x() const
@@ -92,6 +97,21 @@ public:
{
return _id[2];
}
+ /** Increments the given dimension by a step size, avoiding overflows
+ *
+ * @note Precondition: dim < _num_dimensions
+ *
+ * @param[in] dim Dimension to increment.
+ * @param[in] step Step to increment @p dim by.
+ */
+ void increment(size_t dim, T step = 1)
+ {
+ ARM_COMPUTE_ERROR_ON(dim >= _num_dimensions);
+ if ((std::numeric_limits<T>::max() - _id[dim]) >= step)
+ {
+ _id[dim] += step;
+ }
+ }
/** Generic accessor to get the size of any dimension
*
* @note Precondition: dimension < Dimensions::num_max_dimensions
@@ -141,7 +161,7 @@ public:
const size_t last = std::min(_num_dimensions, first + n);
- if(last > (first + 1))
+ if (last > (first + 1))
{
// Collapse dimensions into the first
_id[first] = std::accumulate(&_id[first], &_id[last], 1, std::multiplies<T>());
@@ -175,7 +195,7 @@ public:
void remove(size_t idx)
{
ARM_COMPUTE_ERROR_ON(_num_dimensions < 1);
- if(idx >= _num_dimensions)
+ if (idx >= _num_dimensions)
{
return;
}
@@ -241,7 +261,7 @@ protected:
~Dimensions() = default;
std::array<T, num_max_dimensions> _id;
- size_t _num_dimensions{ 0 };
+ size_t _num_dimensions{0};
};
/** Check that given dimensions are equal.
@@ -268,5 +288,5 @@ inline bool operator!=(const Dimensions<T> &lhs, const Dimensions<T> &rhs)
{
return !(lhs == rhs);
}
-}
+} // namespace arm_compute
#endif /*ARM_COMPUTE_DIMENSIONS_H*/
diff --git a/arm_compute/core/Error.h b/arm_compute/core/Error.h
index dd3e8889bc..7a7033805a 100644
--- a/arm_compute/core/Error.h
+++ b/arm_compute/core/Error.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2019, 2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -53,8 +53,7 @@ class Status
{
public:
/** Default Constructor **/
- Status()
- : _code(ErrorCode::OK), _error_description(" ")
+ Status() : _code(ErrorCode::OK), _error_description(" ")
{
}
/** Default Constructor
@@ -101,7 +100,7 @@ public:
/** Throws a runtime exception in case it contains a valid error status */
void throw_if_error() const
{
- if(!bool(*this))
+ if (!bool(*this))
{
internal_throw_on_error();
}
@@ -119,7 +118,7 @@ private:
/** Creates an error containing the error message
*
* @param[in] error_code Error code
- * @param[in] msg Message to display before aborting.
+ * @param[in] msg Message to display before abandoning.
*
* @return status containing the error
*/
@@ -131,7 +130,7 @@ Status create_error(ErrorCode error_code, std::string msg);
* @param[in] func Function in which the error occurred.
* @param[in] file File in which the error occurred.
* @param[in] line Line in which the error occurred.
- * @param[in] msg Message to display before aborting.
+ * @param[in] msg Message to display before abandoning.
*
* @return status containing the error
*/
@@ -141,7 +140,7 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
* @param[in] err Error status
*/
[[noreturn]] void throw_error(Status err);
-}
+} // namespace arm_compute
/** To avoid unused variables warnings
*
* This is useful if for example a variable is only used
@@ -156,7 +155,8 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
* @param[in] error_code Error code.
* @param[in] msg Message to encapsulate.
*/
-#define ARM_COMPUTE_CREATE_ERROR(error_code, msg) arm_compute::create_error_msg(error_code, __func__, __FILE__, __LINE__, msg)
+#define ARM_COMPUTE_CREATE_ERROR(error_code, msg) \
+ arm_compute::create_error_msg(error_code, __func__, __FILE__, __LINE__, msg)
/** Creates an error on location with a given message
*
@@ -164,9 +164,10 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
* @param[in] func Function in which the error occurred.
* @param[in] file File in which the error occurred.
* @param[in] line Line in which the error occurred.
- * @param[in] msg Message to display before aborting.
+ * @param[in] msg Message to display before abandoning.
*/
-#define ARM_COMPUTE_CREATE_ERROR_LOC(error_code, func, file, line, msg) arm_compute::create_error_msg(error_code, func, file, line, msg)
+#define ARM_COMPUTE_CREATE_ERROR_LOC(error_code, func, file, line, msg) \
+ arm_compute::create_error_msg(error_code, func, file, line, msg)
/** Creates an error on location with a given message. Accepts a message format
* and a variable list of arguments matching the format description.
@@ -178,14 +179,14 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
* @param[in] msg Error description message format.
* @param[in] ... List of arguments matching the format description.
*/
-#define ARM_COMPUTE_CREATE_ERROR_LOC_VAR(error_code, func, file, line, msg, ...) \
- do \
- { \
- std::array<char, 512> out{ 0 }; \
- int offset = snprintf(out.data(), out.size(), "in %s %s:%d: ", func, file, line); \
- snprintf(out.data() + offset, out.size() - offset, msg, __VA_ARGS__); \
- arm_compute::create_error(error_code, std::string(out.data())); \
- } while(false)
+#define ARM_COMPUTE_CREATE_ERROR_LOC_VAR(error_code, func, file, line, msg, ...) \
+ do \
+ { \
+ std::array<char, 512> out{0}; \
+ int offset = snprintf(out.data(), out.size(), "in %s %s:%d: ", func, file, line); \
+ snprintf(out.data() + offset, out.size() - offset, msg, __VA_ARGS__); \
+ arm_compute::create_error(error_code, std::string(out.data())); \
+ } while (false)
/** An error is returned with the given description.
*
@@ -195,7 +196,7 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
do \
{ \
return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR, __VA_ARGS__); \
- } while(false)
+ } while (false)
/** Checks if a status contains an error and returns it
*
@@ -204,18 +205,18 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
#define ARM_COMPUTE_RETURN_ON_ERROR(status) \
do \
{ \
- if(!bool(status)) \
+ const auto s = status; \
+ if (!bool(s)) \
{ \
- return status; \
+ return s; \
} \
- } while(false)
+ } while (false)
/** Checks if an error value is valid if not throws an exception with the error
*
* @param[in] error Error value to check.
*/
-#define ARM_COMPUTE_THROW_ON_ERROR(error) \
- error.throw_if_error();
+#define ARM_COMPUTE_THROW_ON_ERROR(error) error.throw_if_error();
/** If the condition is true, an error is returned. Accepts a message format
* and a variable list of arguments matching the format description.
@@ -227,28 +228,29 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG_VAR(cond, msg, ...) \
do \
{ \
- if(cond) \
+ if (cond) \
{ \
- std::array<char, 512> out{ 0 }; \
+ std::array<char, 512> out{0}; \
int offset = snprintf(out.data(), out.size(), "in %s %s:%d: ", __func__, __FILE__, __LINE__); \
snprintf(out.data() + offset, out.size() - offset, msg, __VA_ARGS__); \
return arm_compute::create_error(arm_compute::ErrorCode::RUNTIME_ERROR, std::string(out.data())); \
} \
- } while(false)
+ } while (false)
/** If the condition is true, an error is returned
*
* @param[in] cond Condition to evaluate.
* @param[in] msg Error description message
*/
-#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg) \
- do \
- { \
- if(cond) \
- { \
- return arm_compute::create_error_msg(arm_compute::ErrorCode::RUNTIME_ERROR, __func__, __FILE__, __LINE__, msg); \
- } \
- } while(false)
+#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg) \
+ do \
+ { \
+ if (cond) \
+ { \
+ return arm_compute::create_error_msg(arm_compute::ErrorCode::RUNTIME_ERROR, __func__, __FILE__, __LINE__, \
+ msg); \
+ } \
+ } while (false)
/** If the condition is true, an error is thrown. Accepts a message format
* and a variable list of arguments matching the format description.
@@ -260,17 +262,17 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
* @param[in] msg Error description message format.
* @param[in] ... List of arguments matching the format description.
*/
-#define ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG_VAR(cond, func, file, line, msg, ...) \
- do \
- { \
- if(cond) \
- { \
- std::array<char, 512> out{ 0 }; \
- int offset = snprintf(out.data(), out.size(), "in %s %s:%d: ", func, file, line); \
- snprintf(out.data() + offset, out.size() - offset, msg, __VA_ARGS__); \
- return arm_compute::create_error(ErrorCode::RUNTIME_ERROR, std::string(out.data())); \
- } \
- } while(false)
+#define ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG_VAR(cond, func, file, line, msg, ...) \
+ do \
+ { \
+ if (cond) \
+ { \
+ std::array<char, 512> out{0}; \
+ int offset = snprintf(out.data(), out.size(), "in %s %s:%d: ", func, file, line); \
+ snprintf(out.data() + offset, out.size() - offset, msg, __VA_ARGS__); \
+ return arm_compute::create_error(ErrorCode::RUNTIME_ERROR, std::string(out.data())); \
+ } \
+ } while (false)
/** If the condition is true, an error is thrown.
*
@@ -283,18 +285,17 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
#define ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(cond, func, file, line, msg) \
do \
{ \
- if(cond) \
+ if (cond) \
{ \
return arm_compute::create_error_msg(ErrorCode::RUNTIME_ERROR, func, file, line, msg); \
} \
- } while(false)
+ } while (false)
/** If the condition is true, an error is returned
*
* @param[in] cond Condition to evaluate
*/
-#define ARM_COMPUTE_RETURN_ERROR_ON(cond) \
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, #cond)
+#define ARM_COMPUTE_RETURN_ERROR_ON(cond) ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, #cond)
/** If the condition is true, an error is returned
*
@@ -313,11 +314,12 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
* @param[in] line Line in which the error occurred.
* @param[in] msg Message to display.
*/
-#define ARM_COMPUTE_THROW_ERROR(func, file, line, msg) \
- do \
- { \
- arm_compute::throw_error(arm_compute::create_error_msg(arm_compute::ErrorCode::RUNTIME_ERROR, func, file, line, msg)); \
- } while(false)
+#define ARM_COMPUTE_THROW_ERROR(func, file, line, msg) \
+ do \
+ { \
+ arm_compute::throw_error( \
+ arm_compute::create_error_msg(arm_compute::ErrorCode::RUNTIME_ERROR, func, file, line, msg)); \
+ } while (false)
/** Print the given message then throw an std::runtime_error. Accepts a message format
* and a variable list of arguments matching the format description.
@@ -331,11 +333,11 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
#define ARM_COMPUTE_THROW_ERROR_VAR(func, file, line, msg, ...) \
do \
{ \
- std::array<char, 512> out{ 0 }; \
- int offset = snprintf(out.data(), out.size(), "in %s %s:%d: ", func, file, line); \
+ std::array<char, 512> out{0}; \
+ int offset = snprintf(out.data(), out.size(), "in %s %s:%d: ", func, file, line); \
snprintf(out.data() + offset, out.size() - offset, msg, __VA_ARGS__); \
arm_compute::throw_error(arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, std::string(out.data()))); \
- } while(false)
+ } while (false)
/** Print the given message then throw an std::runtime_error. Accepts a message format
* and a variable list of arguments matching the format description.
@@ -360,7 +362,8 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
* @param[in] msg Error description message format.
* @param[in] ... List of arguments matching the format description.
*/
-#define ARM_COMPUTE_ERROR_LOC_VAR(func, file, line, msg, ...) ARM_COMPUTE_THROW_ERROR_VAR(func, file, line, msg, __VA_ARGS__) // NOLINT
+#define ARM_COMPUTE_ERROR_LOC_VAR(func, file, line, msg, ...) \
+ ARM_COMPUTE_THROW_ERROR_VAR(func, file, line, msg, __VA_ARGS__) // NOLINT
/** Print the given message then throw an std::runtime_error.
*
@@ -379,11 +382,11 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
#define ARM_COMPUTE_EXIT_ON_MSG(cond, msg) \
do \
{ \
- if(cond) \
+ if (cond) \
{ \
ARM_COMPUTE_ERROR(msg); \
} \
- } while(false)
+ } while (false)
/** If the condition is true, the given message is printed and program exits. Accepts a message format
* and a variable list of arguments matching the format description.
@@ -395,27 +398,25 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
#define ARM_COMPUTE_EXIT_ON_MSG_VAR(cond, msg, ...) \
do \
{ \
- if(cond) \
+ if (cond) \
{ \
ARM_COMPUTE_ERROR_VAR(msg, __VA_ARGS__); \
} \
- } while(false)
+ } while (false)
#ifdef ARM_COMPUTE_ASSERTS_ENABLED
/** Checks if a status value is valid if not throws an exception with the error
*
* @param[in] status Status value to check.
*/
-#define ARM_COMPUTE_ERROR_THROW_ON(status) \
- status.throw_if_error()
+#define ARM_COMPUTE_ERROR_THROW_ON(status) status.throw_if_error()
/** If the condition is true, the given message is printed and an exception is thrown
*
* @param[in] cond Condition to evaluate.
* @param[in] msg Message to display.
*/
-#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg) \
- ARM_COMPUTE_EXIT_ON_MSG(cond, msg)
+#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg) ARM_COMPUTE_EXIT_ON_MSG(cond, msg)
/** If the condition is true, the given message is printed and an exception is thrown. Accepts a message format
* and a variable list of arguments matching the format description.
@@ -424,8 +425,7 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
* @param[in] msg Error description message format.
* @param[in] ... List of arguments matching the format description.
*/
-#define ARM_COMPUTE_ERROR_ON_MSG_VAR(cond, msg, ...) \
- ARM_COMPUTE_EXIT_ON_MSG_VAR(cond, msg, __VA_ARGS__)
+#define ARM_COMPUTE_ERROR_ON_MSG_VAR(cond, msg, ...) ARM_COMPUTE_EXIT_ON_MSG_VAR(cond, msg, __VA_ARGS__)
/** If the condition is true, the given message is printed and an exception is thrown.
*
@@ -438,11 +438,11 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
#define ARM_COMPUTE_ERROR_ON_LOC_MSG(cond, func, file, line, ...) \
do \
{ \
- if(cond) \
+ if (cond) \
{ \
ARM_COMPUTE_ERROR_LOC_VAR(func, file, line, __VA_ARGS__); \
} \
- } while(false)
+ } while (false)
/** If the condition is true, the given message is printed and an exception is thrown, otherwise value is returned
*
@@ -463,8 +463,7 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
*
* @param[in] cond Condition to evaluate.
*/
-#define ARM_COMPUTE_ERROR_ON(cond) \
- ARM_COMPUTE_ERROR_ON_MSG(cond, #cond)
+#define ARM_COMPUTE_ERROR_ON(cond) ARM_COMPUTE_ERROR_ON_MSG(cond, #cond)
/** If the condition is true then an error message is printed and an exception thrown
*
diff --git a/arm_compute/core/GLES_COMPUTE/GCCoreRuntimeContext.h b/arm_compute/core/GLES_COMPUTE/GCCoreRuntimeContext.h
deleted file mode 100644
index 9706c9b3a6..0000000000
--- a/arm_compute/core/GLES_COMPUTE/GCCoreRuntimeContext.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_GCCORERUNTIME_CONTEXT_H
-#define ARM_COMPUTE_GCCORERUNTIME_CONTEXT_H
-
-#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class GCKernelLibrary;
-
-/** Core runtime context for OpenGL ES */
-class GCCoreRuntimeContext final
-{
-public:
- /** Legacy constructor */
- GCCoreRuntimeContext();
-
- /** Constructor */
- GCCoreRuntimeContext(GCKernelLibrary *kernel_lib);
- /** Destructor */
- ~GCCoreRuntimeContext() = default;
- /** Default copy constructor */
- GCCoreRuntimeContext(const GCCoreRuntimeContext &) = default;
- /** Default move constructor */
- GCCoreRuntimeContext(GCCoreRuntimeContext &&) = default;
- /** Default copy assignment */
- GCCoreRuntimeContext &operator=(const GCCoreRuntimeContext &) = default;
- /** Default move assignment operator */
- GCCoreRuntimeContext &operator=(GCCoreRuntimeContext &&) = default;
- /** Kernel Library accessor
- *
- * @return The kernel library instance used by the core context
- */
- GCKernelLibrary *kernel_library() const;
-
-private:
- GCKernelLibrary *_kernel_lib{ nullptr };
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_GCCORERUNTIME_CONTEXT_H */
diff --git a/arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h b/arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h
deleted file mode 100644
index 0f6daf786b..0000000000
--- a/arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h
+++ /dev/null
@@ -1,271 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_GCKERNELLIBRARY_H
-#define ARM_COMPUTE_GCKERNELLIBRARY_H
-
-#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
-#include "arm_compute/core/Utils.h"
-
-#include <map>
-#include <set>
-#include <string>
-#include <utility>
-#include <vector>
-
-namespace arm_compute
-{
-/** GCProgram class */
-class GCProgram final
-{
-public:
- /** Default constructor. */
- GCProgram();
- /** Construct program from source file.
- *
- * @param[in] name Program name.
- * @param[in] source Program source.
- */
- GCProgram(std::string name, std::string source);
- /** Default Copy Constructor. */
- GCProgram(const GCProgram &) = default;
- /** Default Move Constructor. */
- GCProgram(GCProgram &&) = default;
- /** Default copy assignment operator */
- GCProgram &operator=(const GCProgram &) = default;
- /** Default move assignment operator */
- GCProgram &operator=(GCProgram &&) = default;
- /** Returns program name.
- *
- * @return Program's name.
- */
- std::string name() const
- {
- return _name;
- }
- /** Link program.
- *
- * @param[in] shader Shader used to link program.
- *
- * @return linked program id .
- */
- GLuint link_program(GLuint shader);
- /** Compile shader.
- *
- * @param[in] build_options Shader build options.
- *
- * @return GLES shader object.
- */
- GLuint compile_shader(const std::string &build_options);
-
-private:
- std::string _name; /**< Program name. */
- std::string _source; /**< Source code for the program. */
-};
-
-/** GCKernel class */
-class GCKernel final
-{
-public:
- /** Default Constructor. */
- GCKernel();
- /** Default Copy Constructor. */
- GCKernel(const GCKernel &) = default;
- /** Default Move Constructor. */
- GCKernel(GCKernel &&) = default;
- /** Default copy assignment operator */
- GCKernel &operator=(const GCKernel &) = default;
- /** Default move assignment operator */
- GCKernel &operator=(GCKernel &&) = default;
- /** Constructor.
- *
- * @param[in] name Kernel name.
- * @param[in] program Built program.
- */
- GCKernel(std::string name, GLuint program);
- /** Destructor.
- */
- ~GCKernel();
- /** Returns kernel name.
- *
- * @return Kernel's name.
- */
- std::string name() const
- {
- return _name;
- }
- /** Get program id.
- *
- * @return program id.
- */
- GLuint get_program() const
- {
- return _program;
- }
- /** Use current program.
- *
- * @return program id.
- */
- void use();
- /** Unuse current program.
- *
- * @return program id.
- */
- void unuse();
- /** Set argument value at index of shader params.
- *
- * @param[in] idx Index in shader params.
- * @param[in] value Argument value to be set.
- */
- template <class T>
- void set_argument(unsigned int idx, T value)
- {
- if(idx >= _shader_arguments.size())
- {
- _shader_arguments.resize(idx + 1, 0);
- }
-
- unsigned int *p = reinterpret_cast<unsigned int *>(&value);
- _shader_arguments[idx] = *p;
- }
- /** Clear shader arguments.
- *
- */
- void clear_arguments()
- {
- _shader_arguments.clear();
- }
- /** Set shader params binding point.
- *
- * @param[in] binding Shader params binding point.
- */
- void set_shader_params_binding_point(unsigned int binding)
- {
- _shader_params_binding_point = binding;
- }
- /** Update shader params.
- *
- */
- void update_shader_params();
- /** Clean up program and ubo.
- *
- */
- void cleanup();
-
-private:
- std::string _name; /**< Kernel name */
- GLuint _program; /**< Linked program id */
- std::vector<unsigned int> _shader_arguments; /**< Store all the values of the shader arguments */
- GLuint _shader_params_ubo_name; /**< Uniform buffer object name for shader parameters */
- GLuint _shader_params_binding_point; /**< The binding point of the uniform block for shader parameters */
- GLuint _shader_params_index; /**< The index of the uniform block */
- GLint _shader_params_size; /**< The uniform block data size in the shader */
- static constexpr const char *_shader_params_name = "shader_params"; /**< The uniform block name in the shader */
-};
-
-/** GCKernelLibrary class */
-class GCKernelLibrary final
-{
- using StringSet = std::set<std::string>;
-
-public:
- /** Default Constructor. */
- GCKernelLibrary();
- /** Default Destructor */
- ~GCKernelLibrary();
- /** Prevent instances of this class from being copied */
- GCKernelLibrary(const GCKernelLibrary &) = delete;
- /** Prevent instances of this class from being copied */
- const GCKernelLibrary &operator=(const GCKernelLibrary &) = delete;
- /** Get the static instance of @ref GCKernelLibrary.
- * This method has been deprecated and will be removed in the next release.
- * @return The static instance of GCKernelLibrary.
- */
- static GCKernelLibrary &get();
- /** Initialises the kernel library.
- *
- * @param[in] shader_path (Optional) Path of the directory from which shader sources are loaded.
- * @param[in] dpy (Optional) EGLdisplay set by external application.
- * @param[in] ctx (Optional) EGLContext set by external application.
- */
- void init(std::string shader_path = "./", EGLDisplay dpy = EGL_NO_DISPLAY, EGLContext ctx = EGL_NO_CONTEXT);
- /** Sets the path that the shaders reside in.
- *
- * @param[in] shader_path Path of the shader.
- */
- void set_shader_path(const std::string &shader_path);
- /** Sets display and context to create kernel.
- *
- * @param[in] dpy EGLdisplay set by external application.
- * @param[in] ctx EGLContext set by external application.
- */
- void set_context(EGLDisplay dpy, EGLContext ctx);
- /** Creates a kernel from the kernel library.
- *
- * @param[in] shader_name Shader name.
- * @param[in] build_options_set Shader build options as a set.
- *
- * @return The created kernel.
- */
- GCKernel create_kernel(const std::string &shader_name, const StringSet &build_options_set = {}) const;
- /** Serializes and saves programs to a binary. */
- void save_binary();
- /** Load serialized binary with all the programs. */
- void load_binary();
- /** Setup a dummy fbo to workaround an issue on Galaxy S8. */
- void setup_dummy_fbo();
-
-private:
- /** Preprocess GLES shader
- *
- * @param[in] shader_source Source code of the shader to preprocess.
- *
- * @return Preprocessed GLES shader object.
- */
- std::string preprocess_shader(const std::string &shader_source) const;
- /** Load program and its dependencies.
- *
- * @param[in] program_name Name of the program to load.
- */
- const GCProgram &load_program(const std::string &program_name) const;
- /** Concatenates contents of a set into a single string.
- *
- * @param[in] s Input set to concatenate.
- *
- * @return Concatenated string.
- */
- std::string stringify_set(const StringSet &s) const;
-
- EGLDisplay _display; /**< Underlying EGL Display. */
- EGLContext _context; /**< Underlying EGL Context. */
- GLuint _frame_buffer; /**< Dummy fbo */
- GLuint _tex_rt; /**< Dummy texture for render target */
- std::string _shader_path; /**< Path to the shaders folder. */
- mutable std::map<std::string, const GCProgram> _programs_map; /**< Map with all already loaded program data. */
- mutable std::map<std::string, const GCKernel> _built_programs_map; /**< Map with all already built program data. */
- static const std::map<std::string, std::string> _shader_program_map; /**< Map that associates kernel names with programs. */
- static const std::map<std::string, std::string> _program_source_map; /**< Contains sources for all programs.
- Used for compile-time shader inclusion. */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_GCKERNELLIBRARY_H */
diff --git a/arm_compute/core/GLES_COMPUTE/GCKernels.h b/arm_compute/core/GLES_COMPUTE/GCKernels.h
deleted file mode 100644
index a1537ec152..0000000000
--- a/arm_compute/core/GLES_COMPUTE/GCKernels.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_GCKERNELS_H
-#define ARM_COMPUTE_GCKERNELS_H
-
-/* Header regrouping all the GLES compute kernels */
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.h"
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.h"
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCArithmeticAdditionKernel.h"
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h"
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCCol2ImKernel.h"
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.h"
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.h"
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h"
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.h"
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h"
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.h"
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.h"
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.h"
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h"
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.h"
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h"
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.h"
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCNormalizePlanarYUVLayerKernel.h"
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.h"
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.h"
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCScaleKernel.h"
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h"
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.h"
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCTransposeKernel.h"
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCWeightsReshapeKernel.h"
-
-#endif /* ARM_COMPUTE_GCKERNELS_H */
diff --git a/arm_compute/core/GLES_COMPUTE/IGCKernel.h b/arm_compute/core/GLES_COMPUTE/IGCKernel.h
deleted file mode 100644
index 7b2aad7cec..0000000000
--- a/arm_compute/core/GLES_COMPUTE/IGCKernel.h
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_IGCKERNEL_H
-#define ARM_COMPUTE_IGCKERNEL_H
-
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
-#include "arm_compute/core/GPUTarget.h"
-
-#include "arm_compute/core/IKernel.h"
-
-namespace arm_compute
-{
-class IGCTensor;
-class Window;
-
-/** Common interface for all the GLES kernels */
-class IGCKernel : public IKernel
-{
-public:
- /** Constructor */
- IGCKernel();
- /** Returns a reference to the GLES kernel of this object.
- *
- * @return A reference to the GLES kernel of this object.
- */
- GCKernel &kernel();
-
- /** Add the passed 1D tensor's parameters to the object's kernel's arguments starting from the index idx.
- *
- * @param[in] idx Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0.
- * @param[in] tensor Tensor to set as an argument of the object's kernel.
- * @param[in] binding_point Tensor's binding point in this kernel.
- * @param[in] window Window the kernel will be executed on.
- */
- void add_1D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const unsigned int binding_point, const Window &window);
-
- /** Add the passed 2D tensor's parameters to the object's kernel's arguments starting from the index idx.
- *
- * @param[in] idx Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0.
- * @param[in] tensor Tensor to set as an argument of the object's kernel.
- * @param[in] binding_point Tensor's binding point in this kernel.
- * @param[in] window Window the kernel will be executed on.
- */
- void add_2D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const unsigned int binding_point, const Window &window);
-
- /** Add the passed 3D tensor's parameters to the object's kernel's arguments starting from the index idx.
- *
- * @param[in] idx Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0.
- * @param[in] tensor Tensor to set as an argument of the object's kernel.
- * @param[in] binding_point Tensor's binding point in this kernel.
- * @param[in] window Window the kernel will be executed on.
- */
- void add_3D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const unsigned int binding_point, const Window &window);
-
- /** Returns the number of arguments enqueued per 1D tensor object.
- *
- * @return The number of arguments enqueues per 1D tensor object.
- */
- unsigned int num_arguments_per_1D_tensor() const;
- /** Returns the number of arguments enqueued per 2D tensor object.
- *
- * @return The number of arguments enqueues per 2D tensor object.
- */
- unsigned int num_arguments_per_2D_tensor() const;
- /** Returns the number of arguments enqueued per 3D tensor object.
- *
- * @return The number of arguments enqueues per 3D tensor object.
- */
- unsigned int num_arguments_per_3D_tensor() const;
- /** Enqueue the OpenGL ES shader to process the given window
- *
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
- */
- virtual void run(const Window &window) = 0;
-
- /** Set the Local-Workgroup-Size hint
- *
- * @note This method should be called after the configuration of the kernel
- *
- * @param[in] lws_hint Local-Workgroup-Size to use
- */
- void set_lws_hint(gles::NDRange &lws_hint)
- {
- _lws_hint = lws_hint;
- }
-
- /** Set the targeted GPU architecture
- *
- * @param[in] target The targeted GPU architecture
- */
- void set_target(GPUTarget target)
- {
- _target = target;
- }
-
- /** Get the targeted GPU architecture
- *
- * @return The targeted GPU architecture.
- */
- GPUTarget get_target() const
- {
- return _target;
- }
-
-private:
- /** Add the passed tensor's parameters to the object's kernel's arguments starting from the index idx.
- *
- * @param[in] idx Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0.
- * @param[in] tensor Tensor to set as an argument of the object's kernel.
- * @param[in] binding_point Tensor's binding point in this kernel.
- * @param[in] window Window the kernel will be executed on.
- */
- template <unsigned int dimension_size>
- void add_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const unsigned int binding_point, const Window &window);
-
- /** Returns the number of arguments enqueued per tensor object.
- *
- * @return The number of arguments enqueued per tensor object.
- */
- template <unsigned int dimension_size>
- unsigned int num_arguments_per_tensor() const;
-
-protected:
- GCKernel _kernel; /**< GLES kernel to run */
- gles::NDRange _lws_hint; /**< Local workgroup size hint for the GLES kernel */
- GPUTarget _target; /**< The targeted GPU */
-};
-
-/** Add the kernel to the command queue with the given window.
- *
- * @note Depending on the size of the window, this might translate into several jobs being enqueued.
- *
- * @note If kernel->kernel() is empty then the function will return without adding anything to the queue.
- *
- * @param[in] kernel Kernel to enqueue
- * @param[in] window Window the kernel has to process.
- * @param[in] lws Local workgroup size requested, by default (1, 1, 1)
- *
- * @note If any dimension of the lws is greater than the global workgroup size then no lws will be passed.
- */
-void enqueue(IGCKernel &kernel, const Window &window, const gles::NDRange &lws = gles::NDRange(1U, 1U, 1U));
-}
-#endif /*ARM_COMPUTE_IGCKERNEL_H */
diff --git a/arm_compute/core/GLES_COMPUTE/IGCSimple2DKernel.h b/arm_compute/core/GLES_COMPUTE/IGCSimple2DKernel.h
deleted file mode 100644
index ae8fd40888..0000000000
--- a/arm_compute/core/GLES_COMPUTE/IGCSimple2DKernel.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_IGCSIMPLE2DKERNEL_H
-#define ARM_COMPUTE_IGCSIMPLE2DKERNEL_H
-
-#include "arm_compute/core/GLES_COMPUTE/IGCSimpleKernel.h"
-
-namespace arm_compute
-{
-class IGCTensor;
-
-/** Interface for simple OpenGL ES kernels having 1 tensor input and 1 tensor output. This interface can be used when the work-item processes a 2D tile */
-class IGCSimple2DKernel : public IGCSimpleKernel
-{
-public:
- // Inherited methods overridden:
- void run(const Window &window) override;
-};
-}
-#endif /*ARM_COMPUTE_IGCSIMPLE2DKERNEL_H */
diff --git a/arm_compute/core/GLES_COMPUTE/IGCSimple3DKernel.h b/arm_compute/core/GLES_COMPUTE/IGCSimple3DKernel.h
deleted file mode 100644
index 40a21ee147..0000000000
--- a/arm_compute/core/GLES_COMPUTE/IGCSimple3DKernel.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_IGCSIMPLE3DKERNEL_H
-#define ARM_COMPUTE_IGCSIMPLE3DKERNEL_H
-
-#include "arm_compute/core/GLES_COMPUTE/IGCSimple2DKernel.h"
-
-namespace arm_compute
-{
-class IGCTensor;
-
-/** Interface for simple GLES kernels having 1 tensor input and 1 tensor output.
- * Both input tensor and output tensor must have at least 3 dimensions.
- */
-class IGCSimple3DKernel : public IGCSimple2DKernel
-{
-public:
- // Inherited methods overridden:
- void run(const Window &window) override;
-};
-}
-#endif /*ARM_COMPUTE_IGCSIMPLE3DKERNEL_H */
diff --git a/arm_compute/core/GLES_COMPUTE/IGCSimpleKernel.h b/arm_compute/core/GLES_COMPUTE/IGCSimpleKernel.h
deleted file mode 100644
index c0f561ab5d..0000000000
--- a/arm_compute/core/GLES_COMPUTE/IGCSimpleKernel.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_IGCSIMPLEKERNEL_H
-#define ARM_COMPUTE_IGCSIMPLEKERNEL_H
-
-#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/Helpers.h"
-
-namespace arm_compute
-{
-/** Interface for simple OpenGL ES kernels having 1 tensor input and 1 tensor output */
-class IGCSimpleKernel : public IGCKernel
-{
-public:
- /** Constructor. */
- IGCSimpleKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- IGCSimpleKernel(const IGCSimpleKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- IGCSimpleKernel &operator=(const IGCSimpleKernel &) = delete;
- /** Allow instances of this class to be moved */
- IGCSimpleKernel(IGCSimpleKernel &&) = default;
- /** Allow instances of this class to be moved */
- IGCSimpleKernel &operator=(IGCSimpleKernel &&) = default;
- /** Default destructor */
- ~IGCSimpleKernel() = default;
-
- /** Configure the kernel
- *
- * @param[in] input Source tensor.
- * @param[out] output Destination tensor.
- * @param[in] num_elems_processed_per_iteration Number of processed elements per iteration.
- * @param[in] border_undefined (Optional) True if the border mode is undefined. False if it's replicate or constant.
- * @param[in] border_size (Optional) Size of the border.
- */
- void configure(const IGCTensor *input, IGCTensor *output, unsigned int num_elems_processed_per_iteration, bool border_undefined = false, const BorderSize &border_size = BorderSize());
-
-protected:
- const IGCTensor *_input;
- IGCTensor *_output;
-};
-}
-
-#endif /*ARM_COMPUTE_IGCSIMPLEKERNEL_H */
diff --git a/arm_compute/core/GLES_COMPUTE/IGCTensor.h b/arm_compute/core/GLES_COMPUTE/IGCTensor.h
deleted file mode 100644
index c382095846..0000000000
--- a/arm_compute/core/GLES_COMPUTE/IGCTensor.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_IGCTENSOR_H
-#define ARM_COMPUTE_IGCTENSOR_H
-
-#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
-#include "arm_compute/core/ITensor.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-/** Interface for GLES Compute tensor */
-class IGCTensor : public ITensor
-{
-public:
- /** Default constructor. */
- IGCTensor();
-
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- IGCTensor(const IGCTensor &) = delete;
-
- /** Prevent instances of this class from being copy assigned (As this class contains pointers) */
- IGCTensor &operator=(const IGCTensor &) = delete;
-
- /** Allow instances of this class to be moved */
- IGCTensor(IGCTensor &&) = default;
-
- /** Allow instances of this class to be moved */
- IGCTensor &operator=(IGCTensor &&) = default;
-
- /** Virtual destructor */
- virtual ~IGCTensor() = default;
-
- /** Map on an allocated buffer.
- *
- * @param[in] blocking (Optional) If true, then the mapping will be ready to use by the time
- * this method returns, else it is the caller's responsibility
- * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer.
- */
- void map(bool blocking = true);
- /** Unmap an allocated and mapped buffer.
- */
- void unmap();
- /** Clear the contents of the tensor synchronously.
- */
- void clear();
-
- // Inherited methods overridden:
- uint8_t *buffer() const override;
- /** Interface to be implemented by the child class to return the tensor's gles compute buffer id.
- *
- * @return A SSBO buffer id.
- */
- virtual GLuint gc_buffer() const = 0;
-
- /** Flag indicating whether the tensor has been left aligned by a kernel and therefore needs shifting.
- *
- * @return True if the tensor is left aligned.
- */
- bool needs_shifting() const;
- /** Set the flag indicating whether or not a tensor needs shifting.
- *
- * @param[in] needs_shifting Indicates if the tensor is left aligned or not.
- *
- */
- void set_needs_shifting(bool needs_shifting);
-
-protected:
- /** Method to be implemented by the child class to map the SSBO.
- *
- * @param[in] blocking If true, then the mapping will be ready to use by the time
- * this method returns, else it is the caller's responsibility
- * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer.
- */
- virtual uint8_t *do_map(bool blocking) = 0;
- /** Method to be implemented by the child class to unmap the SSBO.
- *
- * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before
- * the memory is accessed by the device.
- */
- virtual void do_unmap() = 0;
-
-private:
- uint8_t *_mapping;
- bool _needs_shifting;
-};
-
-/** Interface for GLES Compute image */
-using IGCImage = IGCTensor;
-}
-#endif /*ARM_COMPUTE_IGCTENSOR_H */
diff --git a/arm_compute/core/GLES_COMPUTE/OpenGLES.h b/arm_compute/core/GLES_COMPUTE/OpenGLES.h
deleted file mode 100644
index 445443602d..0000000000
--- a/arm_compute/core/GLES_COMPUTE/OpenGLES.h
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_OPENGLES_H
-#define ARM_COMPUTE_OPENGLES_H
-
-#include "arm_compute/core/Log.h"
-
-#include <EGL/egl.h>
-#include <EGL/eglext.h>
-#include <EGL/eglplatform.h>
-#include <GLES3/gl31.h>
-#include <GLES3/gl3ext.h>
-#include <cstddef>
-
-#ifdef ARM_COMPUTE_DEBUG_ENABLED
-#define ARM_COMPUTE_GL_CHECK(x) \
- x; \
- { \
- GLenum error = glGetError(); \
- if(error != GL_NO_ERROR) \
- ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("glGetError() = %i (0x%.8x)\n", error, error); \
- }
-#else /* ARM_COMPUTE_DEBUG_ENABLED */
-#define ARM_COMPUTE_GL_CHECK(x) x
-#endif /* ARM_COMPUTE_DEBUG_ENABLED */
-
-namespace arm_compute
-{
-namespace gles
-{
-/** Class interface for specifying NDRange values. */
-class NDRange
-{
-private:
- size_t _sizes[3];
- size_t _dimensions;
-
-public:
- /** Default constructor - resulting range has zero dimensions. */
- NDRange()
- : _dimensions(0)
- {
- _sizes[0] = 0;
- _sizes[1] = 0;
- _sizes[2] = 0;
- }
-
- /** Constructs one-dimensional range.
- *
- * @param[in] size0 Size of the first dimension.
- */
- NDRange(size_t size0)
- : _dimensions(1)
- {
- _sizes[0] = size0;
- _sizes[1] = 1;
- _sizes[2] = 1;
- }
-
- /** Constructs two-dimensional range.
- *
- * @param[in] size0 Size of the first dimension.
- * @param[in] size1 Size of the second dimension.
- */
- NDRange(size_t size0, size_t size1)
- : _dimensions(2)
- {
- _sizes[0] = size0;
- _sizes[1] = size1;
- _sizes[2] = 1;
- }
-
- /** Constructs three-dimensional range.
- *
- * @param[in] size0 Size of the first dimension.
- * @param[in] size1 Size of the second dimension.
- * @param[in] size2 Size of the third dimension.
- */
- NDRange(size_t size0, size_t size1, size_t size2)
- : _dimensions(3)
- {
- _sizes[0] = size0;
- _sizes[1] = size1;
- _sizes[2] = size2;
- }
-
- /** Conversion operator to const size_t *.
- *
- * @returns A pointer to the size of the first dimension.
- */
- operator const size_t *() const
- {
- return _sizes;
- }
-
- /** Queries the number of dimensions in the range.
- *
- * @returns The number of dimensions.
- */
- size_t dimensions() const
- {
- return _dimensions;
- }
-
- /** Returns the size of the object in bytes based on the runtime number of dimensions
- *
- * @returns The size of the object in bytes.
- */
- size_t size() const
- {
- return _dimensions * sizeof(size_t);
- }
-
- /** Returns the sizes array for each dimensions.
- *
- * @returns The sizes array
- */
- size_t *get()
- {
- return _sizes;
- }
-
- /** Returns the sizes array for each dimensions.
- *
- * @returns The sizes array
- */
- const size_t *get() const
- {
- return _sizes;
- }
-};
-
-static const NDRange NullRange;
-static const NDRange Range_128_1 = NDRange(128, 1);
-} // namespace gles
-
-/** Check if the OpenGL ES 3.1 API is available at runtime.
- *
- * @returns true if the OpenGL ES 3.1 API is available.
- */
-bool opengles31_is_available();
-} // namespace arm_compute
-
-#endif /* ARM_COMPUTE_OPENGLES_H */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.h
deleted file mode 100644
index d55f98fa66..0000000000
--- a/arm_compute/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_GCABSOLUTEDIFFERENCEKERNEL_H
-#define ARM_COMPUTE_GCABSOLUTEDIFFERENCEKERNEL_H
-
-#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
-
-namespace arm_compute
-{
-class IGCTensor;
-
-/** Interface for the absolute difference kernel.
- *
- * Absolute difference is computed by:
- * @f[ output(x,y) = | input1(x,y) - input2(x,y) | @f]
- */
-class GCAbsoluteDifferenceKernel : public IGCKernel
-{
-public:
- /** Default constructor. */
- GCAbsoluteDifferenceKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- GCAbsoluteDifferenceKernel(const GCAbsoluteDifferenceKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- GCAbsoluteDifferenceKernel &operator=(const GCAbsoluteDifferenceKernel &) = delete;
- /** Allow instances of this class to be moved */
- GCAbsoluteDifferenceKernel(GCAbsoluteDifferenceKernel &&) = default;
- /** Allow instances of this class to be moved */
- GCAbsoluteDifferenceKernel &operator=(GCAbsoluteDifferenceKernel &&) = default;
- /** Default destructor */
- ~GCAbsoluteDifferenceKernel() = default;
-
- /** Set the inputs and output images.
- *
- * @param[in] input1 Source tensor. Data types supported: U8
- * @param[in] input2 Source tensor. Data types supported: U8
- * @param[out] output Destination tensor. Data types supported: U8
- */
- void configure(const IGCTensor *input1, const IGCTensor *input2, IGCTensor *output);
-
- // Inherited methods overridden:
- void run(const Window &window) override;
-
-private:
- const IGCTensor *_input1; /**< Source tensor 1. */
- const IGCTensor *_input2; /**< Source tensor 2. */
- IGCTensor *_output; /**< Destination tensor. */
-};
-}
-#endif /* ARM_COMPUTE_GCABSOLUTEDIFFERENCEKERNEL_H */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.h
deleted file mode 100644
index 65e018a50a..0000000000
--- a/arm_compute/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_GCACTIVATIONLAYERKERNEL_H
-#define ARM_COMPUTE_GCACTIVATIONLAYERKERNEL_H
-
-#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class IGCTensor;
-class GCCoreRuntimeContext;
-
-/** Interface for the activation layer kernel. */
-class GCActivationLayerKernel : public IGCKernel
-{
-public:
- /** Default constructor
- *
- * @param[in, out] ctx Core context to use
- */
- explicit GCActivationLayerKernel(GCCoreRuntimeContext *ctx = nullptr);
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- GCActivationLayerKernel(const GCActivationLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- GCActivationLayerKernel &operator=(const GCActivationLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- GCActivationLayerKernel(GCActivationLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- GCActivationLayerKernel &operator=(GCActivationLayerKernel &&) = default;
- /** Default destructor */
- ~GCActivationLayerKernel() = default;
- /** Set the input and output tensor.
- *
- * @note If the output tensor is a nullptr, the activation function will be performed in-place
- *
- * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
- * of the activation function. Data types supported: F16/F32.
- * @param[out] output Destination tensor. Data type should match the input data type.
- * @param[in] act_info Activation layer information.
- */
- void configure(IGCTensor *input, IGCTensor *output, ActivationLayerInfo act_info);
-
- // Inherited methods overridden:
- void run(const Window &window) override;
-
-private:
- IGCTensor *_input;
- IGCTensor *_output;
- GCCoreRuntimeContext *_ctx;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_GCACTIVATIONLAYERKERNEL_H */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCArithmeticAdditionKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCArithmeticAdditionKernel.h
deleted file mode 100644
index 7e8159c638..0000000000
--- a/arm_compute/core/GLES_COMPUTE/kernels/GCArithmeticAdditionKernel.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_GCARITHMETICADDITIONKERNEL_H
-#define ARM_COMPUTE_GCARITHMETICADDITIONKERNEL_H
-
-#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class IGCTensor;
-
-/** Interface for the arithmetic addition kernel
- *
- * Arithmetic addition is computed by:
- * @f[ output(x,y) = input1(x,y) + input2(x,y) @f]
- */
-class GCArithmeticAdditionKernel : public IGCKernel
-{
-public:
- /** Default constructor */
- GCArithmeticAdditionKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- GCArithmeticAdditionKernel(const GCArithmeticAdditionKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- GCArithmeticAdditionKernel &operator=(const GCArithmeticAdditionKernel &) = delete;
- /** Allow instances of this class to be moved */
- GCArithmeticAdditionKernel(GCArithmeticAdditionKernel &&) = default;
- /** Allow instances of this class to be moved */
- GCArithmeticAdditionKernel &operator=(GCArithmeticAdditionKernel &&) = default;
- /** Default destructor */
- ~GCArithmeticAdditionKernel() = default;
- /** Initialise the kernel's inputs, output and convertion policy.
- *
- * @param[in] input1 First tensor input. Data types supported: F16.
- * @param[in] input2 Second tensor input. Data types supported: F16.
- * @param[out] output Output tensor. Data types supported: F16.
- * @param[in] policy Policy to use to handle overflow.
- */
- void configure(const IGCTensor *input1, const IGCTensor *input2, IGCTensor *output, ConvertPolicy policy);
- /** Static function to check if given info will lead to a valid configuration of @ref GCArithmeticAdditionKernel
- *
- * @param[in] input1 First tensor input info. Data types supported: F16.
- * @param[in] input2 Second tensor input info. Data types supported: F16.
- * @param[out] output Output tensor info. Data types supported: F16.
- * @param[in] policy Policy to use to handle overflow.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy);
-
- // Inherited methods overridden:
- void run(const Window &window) override;
-
-private:
- const IGCTensor *_input1; /**< Source tensor 1 */
- const IGCTensor *_input2; /**< Source tensor 2 */
- IGCTensor *_output; /**< Destination tensor */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_GCARITHMETICADDITIONKERNEL_H */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h
deleted file mode 100644
index eb7a99c59e..0000000000
--- a/arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_GCBATCHNORMALIZATIONLAYERKERNEL_H
-#define ARM_COMPUTE_GCBATCHNORMALIZATIONLAYERKERNEL_H
-
-#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
-
-namespace arm_compute
-{
-class IGCTensor;
-
-/** Interface for the BatchNormalization layer kernel.
- */
-class GCBatchNormalizationLayerKernel : public IGCKernel
-{
-public:
- /** Constructor */
- GCBatchNormalizationLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- GCBatchNormalizationLayerKernel(const GCBatchNormalizationLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- GCBatchNormalizationLayerKernel &operator=(const GCBatchNormalizationLayerKernel &) = delete;
- /** Default Move Constructor. */
- GCBatchNormalizationLayerKernel(GCBatchNormalizationLayerKernel &&) = default;
- /** Default move assignment operator */
- GCBatchNormalizationLayerKernel &operator=(GCBatchNormalizationLayerKernel &&) = default;
- /** Default destructor */
- ~GCBatchNormalizationLayerKernel() = default;
-
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, FM].
- * The rest are optional and used for representing batches. Data types supported: F16/F32.
- * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input
- * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
- * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
- * @param[in] beta (Optional) Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for beta is 0. Data types supported: Same as @p input
- * @param[in] gamma (Optional) Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for gamma is 1. Data types supported: Same as @p input
- * @param[in] epsilon (optional) Small value to avoid division with zero.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
- */
- void configure(const IGCTensor *input, IGCTensor *output, const IGCTensor *mean, const IGCTensor *var, const IGCTensor *beta = nullptr, const IGCTensor *gamma = nullptr, float epsilon = 0.001f,
- ActivationLayerInfo act_info = ActivationLayerInfo());
- /** Static function to check if given info will lead to a valid configuration of @ref GCBatchNormalizationLayerKernel
- *
- * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result.
- * 3 lower dimensions represent a single input with dimensions [width, height, FM].
- * The rest are optional and used for representing batches. Data types supported: F16/F32.
- * @param[in] output Destination tensor info. Output will have the same number of dimensions as input. Data type supported: same as @p input
- * @param[in] mean Mean values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
- * @param[in] var Variance values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
- * @param[in] beta (Optional) Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for beta is 0. Data types supported: Same as @p input
- * @param[in] gamma (Optional) Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for gamma is 1. Data types supported: Same as @p input
- * @param[in] epsilon (Optional) Small value to avoid division with zero. Default value is 0.001f.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output,
- const ITensorInfo *mean, const ITensorInfo *var,
- const ITensorInfo *beta = nullptr, const ITensorInfo *gamma = nullptr,
- float epsilon = 0.001f, ActivationLayerInfo act_info = ActivationLayerInfo());
-
- // Inherited methods overridden:
- void run(const Window &window) override;
-
-private:
- const IGCTensor *_input;
- IGCTensor *_output;
- const IGCTensor *_mean;
- const IGCTensor *_var;
- const IGCTensor *_beta;
- const IGCTensor *_gamma;
- float _epsilon;
-};
-}
-#endif /*ARM_COMPUTE_GCBATCHNORMALIZATIONLAYERKERNEL_H */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCCol2ImKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCCol2ImKernel.h
deleted file mode 100644
index d96fb56771..0000000000
--- a/arm_compute/core/GLES_COMPUTE/kernels/GCCol2ImKernel.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_GCCOL2IMKERNEL_H
-#define ARM_COMPUTE_GCCOL2IMKERNEL_H
-
-#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
-
-namespace arm_compute
-{
-class IGCTensor;
-
-/** Interface for the col2im reshaping kernel.
- *
- * Rearranges each matrix column into image blocks. It's the inverse operation of @ref GCIm2ColKernel.
- *
- * For example, a vector of 9 elements can be reshaped to a block(image) of 3x3:
- *
- * @f[
- * \left( \begin{array}{ccccccccc}
- * a0 & a1 & a2 & a3 & a4 & a5 & a6 & a7 & a8 \\
- * \end{array} \right)
- * \rightarrow
- * \left( \begin{array}{ccc}
- * a0 & a1 & a2 \\
- * a3 & a4 & a5 \\
- * a6 & a7 & a8 \\
- * \end{array} \right)
- * @f]
- */
-class GCCol2ImKernel : public IGCKernel
-{
-public:
- /** Default constructor */
- GCCol2ImKernel();
-
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- GCCol2ImKernel(const GCCol2ImKernel &) = delete;
-
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- GCCol2ImKernel &operator=(const GCCol2ImKernel &) = delete;
-
- /** Allow instances of this class to be moved */
- GCCol2ImKernel(GCCol2ImKernel &&) = default;
-
- /** Allow instances of this class to be moved */
- GCCol2ImKernel &operator=(GCCol2ImKernel &&) = default;
-
- /** Default destructor */
- ~GCCol2ImKernel() = default;
-
- /** Set the input and output of the kernel.
- *
- * @param[in] input The input tensor to convert. Data types supported: F16/F32
- * @param[out] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM],
- * while the rest represent batch of outputs. Data types supported: Same as @p input
- * @param[in] convolved_dims Output convolved dimensions.
- */
- void configure(const IGCTensor *input, IGCTensor *output, std::pair<unsigned int, unsigned int> convolved_dims);
-
- // Inherited methods overridden:
- void run(const Window &window) override;
-
-private:
- const IGCTensor *_input;
- IGCTensor *_output;
- std::pair<unsigned int, unsigned int> _convolved_dims;
-};
-}
-
-#endif /*ARM_COMPUTE_GCCOL2IMKERNEL_H */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.h
deleted file mode 100644
index 9c7754947a..0000000000
--- a/arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_GCDEPTHCONCATENATEKERNEL_H
-#define ARM_COMPUTE_GCDEPTHCONCATENATEKERNEL_H
-
-#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class IGCTensor;
-
-/** Interface for the depth concatenate kernel.
- * The input tensor will be concatenated into the output tensor.
- */
-class GCDepthConcatenateLayerKernel : public IGCKernel
-{
-public:
- /** Default constructor */
- GCDepthConcatenateLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- GCDepthConcatenateLayerKernel(const GCDepthConcatenateLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- GCDepthConcatenateLayerKernel &operator=(const GCDepthConcatenateLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- GCDepthConcatenateLayerKernel(GCDepthConcatenateLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- GCDepthConcatenateLayerKernel &operator=(GCDepthConcatenateLayerKernel &&) = default;
- /** Default destructor */
- ~GCDepthConcatenateLayerKernel() = default;
- /** Initialise the kernel's inputs and output
- *
- * @param[in] input Input tensor. Data types supported: F16/F32.
- * @param[in] depth_offset The offset on the Z axis.
- * @param[in,out] output Output tensor. Data types supported: Same as @p input.
- *
- * @note: The output tensor's low two dimensions can't be smaller than the input one's.
- * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2.
- *
- */
- void configure(const IGCTensor *input, unsigned int depth_offset, IGCTensor *output);
-
- // Inherited methods overridden:
- void run(const Window &window) override;
-
-private:
- const IGCTensor *_input;
- IGCTensor *_output;
- int _depth_offset;
-};
-}
-#endif /* ARM_COMPUTE_GCDEPTHCONCATENATEKERNEL_H */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.h
deleted file mode 100644
index 8faa54a205..0000000000
--- a/arm_compute/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_GCDEPTHWISECONVOLUTIONKERNEL3x3_H
-#define ARM_COMPUTE_GCDEPTHWISECONVOLUTIONKERNEL3x3_H
-
-#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
-
-namespace arm_compute
-{
-class IGCTensor;
-
-/** Interface for the kernel to run a 3x3 depthwise convolution on a tensor.
- */
-class GCDepthwiseConvolutionLayer3x3Kernel : public IGCKernel
-{
-public:
- /** Default constructor */
- GCDepthwiseConvolutionLayer3x3Kernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- GCDepthwiseConvolutionLayer3x3Kernel(const GCDepthwiseConvolutionLayer3x3Kernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- GCDepthwiseConvolutionLayer3x3Kernel &operator=(const GCDepthwiseConvolutionLayer3x3Kernel &) = delete;
- /** Default Move Constructor. */
- GCDepthwiseConvolutionLayer3x3Kernel(GCDepthwiseConvolutionLayer3x3Kernel &&) = default;
- /** Default move assignment operator */
- GCDepthwiseConvolutionLayer3x3Kernel &operator=(GCDepthwiseConvolutionLayer3x3Kernel &&) = default;
- /** Initialize the function's source, destination, conv and border_size.
- *
- * @param[in] input Source tensor. DataType supported: F16.
- * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM]. Data type supported: Same as @p input.
- * @param[in] biases (Optional) Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input.
- * @param[out] output Destination tensor. Data type supported: Same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- */
- void configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1);
-
- // Inherited methods overridden:
- void run(const Window &window) override;
- BorderSize border_size() const override;
-
-private:
- BorderSize _border_size;
- const IGCTensor *_input;
- IGCTensor *_output;
- const IGCTensor *_weights;
- const IGCTensor *_biases;
- unsigned int _conv_stride_x;
- unsigned int _conv_stride_y;
- unsigned int _conv_pad_left;
- unsigned int _conv_pad_top;
- gles::NDRange _lws;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_GCDEPTHWISECONVOLUTIONKERNEL3x3_H */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h
deleted file mode 100644
index 43f94f8662..0000000000
--- a/arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_GCDIRECTCONVOLUTIONLAYERKERNEL_H
-#define ARM_COMPUTE_GCDIRECTCONVOLUTIONLAYERKERNEL_H
-
-#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
-#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
-
-namespace arm_compute
-{
-class IGCTensor;
-
-/** Interface for the direct convolution kernel.
- */
-template <unsigned int kernel_size>
-class GCDirectConvolutionLayerKernel : public IGCKernel
-{
-public:
- /** Default constructor */
- GCDirectConvolutionLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- GCDirectConvolutionLayerKernel(const GCDirectConvolutionLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- GCDirectConvolutionLayerKernel &operator=(const GCDirectConvolutionLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- GCDirectConvolutionLayerKernel(GCDirectConvolutionLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- GCDirectConvolutionLayerKernel &operator=(GCDirectConvolutionLayerKernel &&) = default;
- /** Default destructor */
- ~GCDirectConvolutionLayerKernel() = default;
- /** Set the input and output of the kernel.
- *
- * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F16/F32
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
- * @param[in] bias Biases tensor. Shared bias supported. Biases are 1D tensor with dimensions [OFM]. Data type supported:Same as @p input.
- * @param[out] output The output tensor. First 2 lower dimensions represent a transform of each 3D input,
- * while every dimension above represents a batch. Data types supported: Same as @p input
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- */
- void configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *bias, IGCTensor *output,
- const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info = ActivationLayerInfo());
-
- // Inherited methods overridden:
- BorderSize border_size() const override;
-
- // Inherited methods overridden:
- void run(const Window &window) override;
-
-private:
- const IGCTensor *_input;
- const IGCTensor *_bias;
- const IGCTensor *_weights;
- IGCTensor *_output;
- BorderSize _border_size;
- int _conv_stride_x;
- int _conv_stride_y;
- int _conv_pad_x;
- int _conv_pad_y;
- gles::NDRange _lws;
-};
-
-/** Interface for the 1x1 direct convolution kernel */
-using GCDirectConvolutionLayer1x1Kernel = GCDirectConvolutionLayerKernel<1>;
-/** Interface for the 3x3 direct convolution kernel */
-using GCDirectConvolutionLayer3x3Kernel = GCDirectConvolutionLayerKernel<3>;
-/** Interface for the 5x5 direct convolution kernel */
-using GCDirectConvolutionLayer5x5Kernel = GCDirectConvolutionLayerKernel<5>;
-}
-#endif /*ARM_COMPUTE_GCDIRECTCONVOLUTIONLAYERKERNEL_H */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.h
deleted file mode 100644
index e3dda67a8a..0000000000
--- a/arm_compute/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_GCDROPOUTLAYERKERNEL_H
-#define ARM_COMPUTE_GCDROPOUTLAYERKERNEL_H
-
-#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
-
-namespace arm_compute
-{
-class IGCTensor;
-
-/** Interface for the dropout layer kernel.
- *
- * Dropout is used to improve over-fit on neural networks.
- *
- */
-class GCDropoutLayerKernel : public IGCKernel
-{
-public:
- /** Default constructor */
- GCDropoutLayerKernel();
-
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- GCDropoutLayerKernel(const GCDropoutLayerKernel &) = delete;
-
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- GCDropoutLayerKernel &operator=(const GCDropoutLayerKernel &) = delete;
-
- /** Allow instances of this class to be moved */
- GCDropoutLayerKernel(GCDropoutLayerKernel &&) = default;
-
- /** Allow instances of this class to be moved */
- GCDropoutLayerKernel &operator=(GCDropoutLayerKernel &&) = default;
-
- /** Set the input and output of the kernel.
- *
- * @param[in] input The input tensor for this op. Data types supported: F16/F32
- * @param[out] mask The mask tensor. Data types supported: Same as @p input
- * @param[out] output The output tensor. Data types supported: Same as @p input
- * @param[in] ratio Dropout ratio
- * @param[in] forward Forward or backward propagation
- *
- */
- void configure(const IGCTensor *input, IGCTensor *mask, IGCTensor *output, float ratio, bool forward);
-
- // Inherited methods overridden:
- void run(const Window &window) override;
-
-private:
- const IGCTensor *_input;
- IGCTensor *_mask;
- IGCTensor *_output;
- unsigned int _num_elems_processed_per_iteration;
-};
-}
-
-#endif /*ARM_COMPUTE_GCDROPOUTLAYERKERNEL_H */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h
deleted file mode 100644
index 4dd7aa0ec1..0000000000
--- a/arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_GCFILLBORDERKERNEL_H
-#define ARM_COMPUTE_GCFILLBORDERKERNEL_H
-
-#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
-#include "arm_compute/core/PixelValue.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class IGCTensor;
-
-/** Interface for filling the border of a kernel */
-class GCFillBorderKernel : public IGCKernel
-{
-public:
- /** Default constructor */
- GCFillBorderKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- GCFillBorderKernel(const GCFillBorderKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- GCFillBorderKernel &operator=(const GCFillBorderKernel &) = delete;
- /** Allow instances of this class to be moved */
- GCFillBorderKernel(GCFillBorderKernel &&) = default;
- /** Allow instances of this class to be moved */
- GCFillBorderKernel &operator=(GCFillBorderKernel &&) = default;
- /** Default destructor */
- ~GCFillBorderKernel() = default;
-
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in,out] tensor Tensor to process Data types supported: F16/F32.
- * @param[in] border_size Size of the border to fill in elements.
- * @param[in] border_mode Border mode to use for the convolution.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(const IGCTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue());
-
- /** Function to set the constant value on fill border kernel depending on type.
- *
- * @param[in] idx Index of the kernel argument to set.
- * @param[in] constant_border_value Constant value to use for borders if border_mode is set to CONSTANT.
- */
- template <class T>
- void set_constant_border(unsigned int idx, const PixelValue &constant_border_value);
-
- // Inherited methods overridden:
- void run(const Window &window) override;
- bool is_parallelisable() const override;
-
-private:
- const IGCTensor *_tensor;
-};
-}
-#endif /*ARM_COMPUTE_GCFILLBORDERKERNEL_H */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.h
deleted file mode 100644
index cbc60da443..0000000000
--- a/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_GCGEMMINTERLEAVE4X4KERNEL_H
-#define ARM_COMPUTE_GCGEMMINTERLEAVE4X4KERNEL_H
-
-#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
-
-namespace arm_compute
-{
-class IGCTensor;
-
-/** OpenGL ES kernel which interleaves the elements of a matrix A in chunk of 4x4
- *
- * This function puts the values in a 4x4 block of Matrix A on the same row (Interleaved values)
- *
- * @f[
- * \left( \begin{array}{cccc}
- * a00 & a01 & a02 & a03 \\
- * a10 & a11 & a12 & a13 \\
- * a20 & a21 & a22 & a23 \\
- * a30 & a31 & a32 & a33 \\
- * \end{array} \right)
- * \rightarrow
- * \left( \begin{array}{ccccccccccccccccc}
- * a00 & a10 & a20 & a30 & a01 & a11 & a21 & a31 & a02 & a12 & a22 & a32 & a03 & a13 & a23 & a33 \\
- * \end{array} \right)
- * @f]
- *
- * After this operation, the output matrix will have the following shape: [ height * 4, ceil(width / 4.0f) ]
- */
-class GCGEMMInterleave4x4Kernel : public IGCKernel
-{
-public:
- /** Default constructor */
- GCGEMMInterleave4x4Kernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- GCGEMMInterleave4x4Kernel(const GCGEMMInterleave4x4Kernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- GCGEMMInterleave4x4Kernel &operator=(const GCGEMMInterleave4x4Kernel &) = delete;
- /** Allow instances of this class to be moved */
- GCGEMMInterleave4x4Kernel(GCGEMMInterleave4x4Kernel &&) = default;
- /** Allow instances of this class to be moved */
- GCGEMMInterleave4x4Kernel &operator=(GCGEMMInterleave4x4Kernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor. Data types supported: F16, F32
- * @param[out] output Output tensor. Data type supported: same as @p input
- */
- void configure(const IGCTensor *input, IGCTensor *output);
-
- // Inherited methods overridden
- void run(const Window &window) override;
-
-private:
- const IGCTensor *_input;
- IGCTensor *_output;
-};
-}
-#endif /* ARM_COMPUTE_GCGEMMINTERLEAVE4X4KERNEL_H */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.h
deleted file mode 100644
index 95f991ee73..0000000000
--- a/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_GCGEMMMATRIXACCUMULATEBIASESKERNEL_H
-#define ARM_COMPUTE_GCGEMMMATRIXACCUMULATEBIASESKERNEL_H
-
-#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
-
-namespace arm_compute
-{
-/** Interface to add a bias to each row of the input tensor
- *
- */
-class GCGEMMMatrixAccumulateBiasesKernel : public IGCKernel
-{
-public:
- /** Default constructor */
- GCGEMMMatrixAccumulateBiasesKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- GCGEMMMatrixAccumulateBiasesKernel(const GCGEMMMatrixAccumulateBiasesKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- GCGEMMMatrixAccumulateBiasesKernel &operator=(const GCGEMMMatrixAccumulateBiasesKernel &) = delete;
- /** Allow instances of this class to be moved */
- GCGEMMMatrixAccumulateBiasesKernel(GCGEMMMatrixAccumulateBiasesKernel &&) = default;
- /** Allow instances of this class to be moved */
- GCGEMMMatrixAccumulateBiasesKernel &operator=(GCGEMMMatrixAccumulateBiasesKernel &&) = default;
- /** Set the accumulate buffer and the biases of the kernel.
- *
- * @param[in, out] accum The accumulate tensor to convert. Data types supported: F16/F32
- * @param[in] biases The shared biases tensor to append. It must be 1D tensor. Data types supported: Same as @p input
- */
- void configure(IGCTensor *accum, const IGCTensor *biases);
-
- // Inherited methods overridden:
- void run(const Window &window) override;
-
-private:
- IGCTensor *_accum;
- const IGCTensor *_biases;
- gles::NDRange _lws;
-};
-}
-
-#endif /*ARM_COMPUTE_GCGEMMMATRIXACCUMULATEBIASESKERNEL_H */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.h
deleted file mode 100644
index e4157a1327..0000000000
--- a/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_GCGEMMMATRIXADDITIONKERNEL_H
-#define ARM_COMPUTE_GCGEMMMATRIXADDITIONKERNEL_H
-
-#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
-
-namespace arm_compute
-{
-class IGCTensor;
-
-/** OpenGL ES kernel to perform the in-place matrix addition between 2 matrices, taking into account that the second matrix might be weighted by a scalar value beta.
- * The matrices must have the same dimensions
- *
- * @note This kernel is computed if and only if beta != 0.0.
- */
-class GCGEMMMatrixAdditionKernel : public IGCKernel
-{
-public:
- /** Default constructor */
- GCGEMMMatrixAdditionKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- GCGEMMMatrixAdditionKernel(const GCGEMMMatrixAdditionKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- GCGEMMMatrixAdditionKernel &operator=(const GCGEMMMatrixAdditionKernel &) = delete;
- /** Allow instances of this class to be moved */
- GCGEMMMatrixAdditionKernel(GCGEMMMatrixAdditionKernel &&) = default;
- /** Allow instances of this class to be moved */
- GCGEMMMatrixAdditionKernel &operator=(GCGEMMMatrixAdditionKernel &&) = default;
- /** Initialise the kernel's input, output and beta value
- *
- * @note The input and output tensors must have the same dimensions
- *
- * @param[in] input Input tensor (Matrix C). Data types supported: F32
- * @param[in, out] output Output tensor. If this kernel is used to finalize the GEMM result (alpha * AB + beta * C), output must contain the result obtained by @ref GCGEMMMatrixMultiplyKernel. Data type supported: same as @p input
- * @param[in] beta Weight of matrix C
- */
- void configure(const IGCTensor *input, IGCTensor *output, float beta);
-
- // Inherited methods overridden:
- void run(const Window &window) override;
-
-private:
- const IGCTensor *_input;
- IGCTensor *_output;
-};
-}
-
-#endif /* ARM_COMPUTE_GCGEMMMATRIXADDITIONKERNEL_H */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h
deleted file mode 100644
index 4dcae2e536..0000000000
--- a/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_GCGEMMMATRIXMULTIPLYKERNEL_H
-#define ARM_COMPUTE_GCGEMMMATRIXMULTIPLYKERNEL_H
-
-#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
-#include "arm_compute/core/GPUTarget.h"
-
-namespace arm_compute
-{
-class IGCTensor;
-
-/** GLES Compute kernel to multiply two input matrices "A" and "B" or to multiply a vector "A" by a matrix "B". All elements of the output matrix/vector will be multiplied by alpha
- *
- * @attention The second input tensor must have at least 2 dimensions (matrix)
- *
- */
-class GCGEMMMatrixMultiplyKernel : public IGCKernel
-{
-public:
- /** Default constructor */
- GCGEMMMatrixMultiplyKernel();
-
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- GCGEMMMatrixMultiplyKernel(const GCGEMMMatrixMultiplyKernel &) = delete;
-
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- GCGEMMMatrixMultiplyKernel &operator=(const GCGEMMMatrixMultiplyKernel &) = delete;
-
- /** Allow instances of this class to be moved */
- GCGEMMMatrixMultiplyKernel(GCGEMMMatrixMultiplyKernel &&) = default;
-
- /** Allow instances of this class to be moved */
- GCGEMMMatrixMultiplyKernel &operator=(GCGEMMMatrixMultiplyKernel &&) = default;
-
- /** Initialise the kernel's input, output and alpha
- *
- * @param[in] input0 Input tensor containing the interleaved Matrix A or the vector A. Data types supported: F16/F32
- * @param[in] input1 Input tensor containing the transposed Matrix B if the first input tensor A is not a vector.
- * If the output tensor is a vector, input1 must contain the matrix B not reshaped. Data type supported: same as @p input0
- * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
- * @param[in] alpha Weight of the matrix product
- * @param[in] is_interleaved_transposed (Optional) True if input0 and input1 have been reshaped respectively using @ref GCGEMMInterleave4x4Kernel and @ref GCGEMMTranspose1xWKernel
- * @param[in] reshape_info (Optional) GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped
- */
- void configure(const IGCTensor *input0, const IGCTensor *input1, IGCTensor *output, float alpha, bool is_interleaved_transposed = true, const GEMMReshapeInfo &reshape_info = GEMMReshapeInfo());
- /** Static function to check if given info will lead to a valid configuration of @ref GCGEMMMatrixMultiplyKernel
- *
- * @param[in] input0 Input tensor containing the Matrix A. Data types supported: F16/F32
- * @param[in] input1 Input tensor containing the Matrix B. Data type supported: same as @p input0
- * @param[in] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
- * @param[in] alpha Weight of the matrix product
- * @param[in] is_interleaved_transposed True if input0 and input1 have been reshaped respectively using @ref GCGEMMInterleave4x4Kernel and @ref GCGEMMTranspose1xWKernel
- * @param[in] reshape_info GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped
- * @param[in] gpu_target GPU Target
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, float alpha, bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info,
- GPUTarget gpu_target);
-
- // Inherited methods overridden:
- void run(const Window &window) override;
-
-private:
- const IGCTensor *_input0;
- const IGCTensor *_input1;
- IGCTensor *_output;
-};
-}
-#endif /* ARM_COMPUTE_GCGEMMMATRIXMULTIPLYKERNEL_H */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.h
deleted file mode 100644
index 29a4c8d209..0000000000
--- a/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_GCGEMMTRANSPOSE1XWKERNEL_H
-#define ARM_COMPUTE_GCGEMMTRANSPOSE1XWKERNEL_H
-
-#include "arm_compute/core/GLES_COMPUTE/IGCSimple2DKernel.h"
-
-namespace arm_compute
-{
-class IGCTensor;
-
-/** OpenGLES kernel which transposes the elements of a matrix in chunks of 1xW, where W is equal to (16 / element size of the tensor)
- *
- * Following an example of how the transposition1xW works when the input data type is F32
- *
- * @f[
- * \left( \begin{array}{cccc}
- * a00 & a01 & a02 & a03 \\
- * a10 & a11 & a12 & a13 \\
- * a20 & a21 & a22 & a23 \\
- * a30 & a31 & a32 & a33 \\
- * \end{array} \right)
- * \rightarrow
- * \left( \begin{array}{ccccccccccccccccc}
- * a00 & a01 & a02 & a03 & a10 & a11 & a12 & a13 & a20 & a21 & a22 & a23 & a30 & a31 & a32 & a33 \\
- * \end{array} \right)
- * @f]
- *
- * @note The output matrix will have the following shape: [ height * W, ceil(width / W) ], where W = (16 / element size of the tensor)
- *
- */
-class GCGEMMTranspose1xWKernel : public IGCSimple2DKernel
-{
-public:
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor. Data types supported: F16, F32
- * @param[out] output Output tensor. Data type supported: same as @p input
- */
- void configure(const IGCTensor *input, IGCTensor *output);
-
- // Inherited methods overridden:
- void run(const Window &window) override;
-};
-}
-#endif /* ARM_COMPUTE_GCGEMMTRANSPOSE1XWKERNEL_H */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h
deleted file mode 100644
index 7d1a53c4c3..0000000000
--- a/arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_GCIM2COLKERNEL_H
-#define ARM_COMPUTE_GCIM2COLKERNEL_H
-
-#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
-
-namespace arm_compute
-{
-class IGCTensor;
-class Size2D;
-
-/** Interface for the im2col reshape kernel.
- *
- * Rearranges image blocks into columns. It is used to strip out each convolution block to a single column.
- * It is used to transform a convolution to a plain matrix multiplication.
- *
- * For example taking into account the image below and assuming 3x3 image blocks with stride of 1 we have:
- * @f[
- * \left( \begin{array}{cccc}
- * a00 & a01 & a02 & a03 \\
- * a10 & a11 & a12 & a13 \\
- * a20 & a21 & a22 & a23 \\
- * a30 & a31 & a32 & a33 \\
- * \end{array} \right)
- * =
- * \left( \begin{array}{ccccccccc}
- * a00 & a01 & a02 & a10 & a11 & a12 & a20 & a21 & a22 \\
- * a01 & a02 & a03 & a11 & a12 & a13 & a21 & a22 & a23 \\
- * a10 & a11 & a12 & a20 & a21 & a22 & a30 & a31 & a32 \\
- * a11 & a12 & a13 & a21 & a22 & a23 & a31 & a32 & a33 \\
- * \end{array} \right)
- * @f]
- */
-class GCIm2ColKernel : public IGCKernel
-{
-public:
- /** Default constructor */
- GCIm2ColKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- GCIm2ColKernel(const GCIm2ColKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- GCIm2ColKernel &operator=(const GCIm2ColKernel &) = delete;
- /** Allow instances of this class to be moved */
- GCIm2ColKernel(GCIm2ColKernel &&) = default;
- /** Allow instances of this class to be moved */
- GCIm2ColKernel &operator=(GCIm2ColKernel &&) = default;
- /** Set the input and output of the kernel.
- *
- * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F16/F32
- * @param[out] output The output tensor. First 2 lower dimensions represent a transform of each 3D input,
- * while every dimension above represents a batch. Data types supported: Same as @p input
- * @param[in] kernel_dims The kernel dimensions (width and height).
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- * @param[in] has_bias In case biases are provided expands the matrix with 1.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- */
- void configure(const IGCTensor *input, IGCTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation = Size2D(1U, 1U));
-
- // Inherited methods overridden:
- void run(const Window &window) override;
-
- /** Static function to check if given info will lead to a valid configuration of @ref CLIm2ColKernel
- *
- * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F16/F32
- * @param[in] output The output tensor. First 2 lower dimensions represent a transform of each 3D input,
- * while every dimension above represents a batch. Data types supported: Same as @p input
- * @param[in] kernel_dims The kernel dimensions (width and height).
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- * @param[in] has_bias In case biases are provided expands the matrix with 1.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation = Size2D(1U, 1U));
-
-private:
- /** Run the reshape kernel optimised for the special case (stride is 1, padding is 0 and kernel's low 3 dimensions are same as input)
- *
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
- * @param[in,out] queue Command queue on which to enqueue the kernel.
- */
- void run_reduced(const Window &window);
- /** run the generic convolution layer input reshape kernel
- *
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
- * @param[in,out] queue Command queue on which to enqueue the kernel.
- */
- void run_generic(const Window &window);
-
- /** Common signature for the kernel to run */
- using Im2ColFunction = void (GCIm2ColKernel::*)(const Window &);
-
-private:
- const IGCTensor *_input;
- IGCTensor *_output;
- std::pair<unsigned int, unsigned int> _convolved_dims;
- std::pair<unsigned int, unsigned int> _kernel_dims;
- unsigned int _num_elems_processed_per_iteration;
- Im2ColFunction _run_func;
-};
-}
-
-#endif /*ARM_COMPUTE_GCIM2COLKERNEL_H */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.h
deleted file mode 100644
index dd00caecfb..0000000000
--- a/arm_compute/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_GCNORMALIZATIONLAYERKERNEL_H
-#define ARM_COMPUTE_GCNORMALIZATIONLAYERKERNEL_H
-
-#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
-
-namespace arm_compute
-{
-class IGCTensor;
-
-/** Interface for the normalization layer kernel.
- */
-class GCNormalizationLayerKernel : public IGCKernel
-{
-public:
- /** Constructor */
- GCNormalizationLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- GCNormalizationLayerKernel(const GCNormalizationLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- GCNormalizationLayerKernel &operator=(const GCNormalizationLayerKernel &) = delete;
- /** Default Move Constructor. */
- GCNormalizationLayerKernel(GCNormalizationLayerKernel &&) = default;
- /** Default move assignment operator */
- GCNormalizationLayerKernel &operator=(GCNormalizationLayerKernel &&) = default;
- /** Default destrutor */
- ~GCNormalizationLayerKernel() = default;
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
- * and an optional 4th dimension for batch of inputs. Data types supported: F32.
- * @param[in] squared_input Source with each element has been squared. 3 lower dims represent a single input with dimensions [width, height, IFM],
- * Data types should match the input type.
- * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data types should match the input type.
- * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters.
- */
- void configure(const IGCTensor *input, const IGCTensor *squared_input, IGCTensor *output, NormalizationLayerInfo norm_info);
-
- // Inherited methods overridden:
- void run(const Window &window) override;
- BorderSize border_size() const override;
-
-private:
- const IGCTensor *_input;
- const IGCTensor *_squared_input;
- IGCTensor *_output;
- BorderSize _border_size;
-};
-}
-#endif /*ARM_COMPUTE_GCNORMALIZATIONLAYERKERNEL_H */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCNormalizePlanarYUVLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCNormalizePlanarYUVLayerKernel.h
deleted file mode 100644
index 5156da8b2c..0000000000
--- a/arm_compute/core/GLES_COMPUTE/kernels/GCNormalizePlanarYUVLayerKernel.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTEH
-#define ARM_COMPUTE_GCNORMALIZEPLANARYUVLAYERKERNEL_H
-
-#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
-
-namespace arm_compute
-{
-class IGCTensor;
-
-/** Interface for the NormalizePlanarYUV layer kernel.
- */
-class GCNormalizePlanarYUVLayerKernel : public IGCKernel
-{
-public:
- /** Constructor */
- GCNormalizePlanarYUVLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- GCNormalizePlanarYUVLayerKernel(const GCNormalizePlanarYUVLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- GCNormalizePlanarYUVLayerKernel &operator=(const GCNormalizePlanarYUVLayerKernel &) = delete;
- /** Default Move Constructor. */
- GCNormalizePlanarYUVLayerKernel(GCNormalizePlanarYUVLayerKernel &&) = default;
- /** Default move assignment operator */
- GCNormalizePlanarYUVLayerKernel &operator=(GCNormalizePlanarYUVLayerKernel &&) = default;
- /** Default destructor */
- ~GCNormalizePlanarYUVLayerKernel() = default;
-
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, channels].
- * Data types supported: F16.
- * @param[out] output Destination tensor. Data type supported: same as @p input
- * @param[in] mean Mean values tensor. 1 dimension with size equal to the number of input channels. Data types supported: same as @p input
- * @param[in] std Standard deviation values tensor. 1 dimension with size equal to the feature maps [FM].
- * Data types supported: same as @p input
- */
- void configure(const IGCTensor *input, IGCTensor *output, const IGCTensor *mean, const IGCTensor *std);
- /** Static function to check if given info will lead to a valid configuration of @ref GCNormalizePlanarYUVLayerKernel
- *
- * @param[in] input Source tensor info. 3 lower dimensions represent a single input with dimensions [width, height, channels].
- * Data types supported: F16.
- * @param[out] output Destination tensor info. Data type supported: same as @p input
- * @param[in] mean Mean values tensor info. 1 dimension with size equal to the number of input channels. Data types supported: same as @p input
- * @param[in] std Standard deviation values tensor info. 1 dimension with size equal to the number of input channels.
- * Data types supported: same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *mean, const ITensorInfo *std);
-
- // Inherited methods overridden:
- void run(const Window &window) override;
-
-private:
- const IGCTensor *_input;
- IGCTensor *_output;
- const IGCTensor *_mean;
- const IGCTensor *_std;
-};
-}
-#endif /*ARM_COMPUTE_GCNORMALIZEPLANARYUVLAYERKERNEL_H */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.h
deleted file mode 100644
index 0c4b656175..0000000000
--- a/arm_compute/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_GCPIXELWISEMULTIPLICATIONKERNEL_H
-#define ARM_COMPUTE_GCPIXELWISEMULTIPLICATIONKERNEL_H
-
-#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class IGCTensor;
-
-/** Interface for the pixelwise multiplication kernel.
- *
- */
-class GCPixelWiseMultiplicationKernel : public IGCKernel
-{
-public:
- /** Default constructor.*/
- GCPixelWiseMultiplicationKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- GCPixelWiseMultiplicationKernel(const GCPixelWiseMultiplicationKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- GCPixelWiseMultiplicationKernel &operator=(const GCPixelWiseMultiplicationKernel &) = delete;
- /** Allow instances of this class to be moved */
- GCPixelWiseMultiplicationKernel(GCPixelWiseMultiplicationKernel &&) = default;
- /** Allow instances of this class to be moved */
- GCPixelWiseMultiplicationKernel &operator=(GCPixelWiseMultiplicationKernel &&) = default;
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] input1 An input tensor. Data types supported: F32.
- * @param[in] input2 An input tensor. Data types supported: same as @p input1.
- * @param[out] output The output tensor, Data types supported: same as @p input1.
- * @param[in] scale Scale to apply after multiplication.
- * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
- */
- void configure(const IGCTensor *input1, const IGCTensor *input2, IGCTensor *output, float scale);
-
- // Inherited methods overridden:
- void run(const Window &window) override;
-
-private:
- const IGCTensor *_input1;
- const IGCTensor *_input2;
- IGCTensor *_output;
-};
-}
-
-#endif /*ARM_COMPUTE_GCPIXELWISEMULTIPLICATIONKERNEL_H */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.h
deleted file mode 100644
index 7a2fb84f34..0000000000
--- a/arm_compute/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_GCPOOLINGLAYERKERNEL_H
-#define ARM_COMPUTE_GCPOOLINGLAYERKERNEL_H
-
-#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
-
-#include "arm_compute/core/Error.h"
-
-namespace arm_compute
-{
-class IGCTensor;
-
-/** Interface for the pooling layer kernel */
-class GCPoolingLayerKernel : public IGCKernel
-{
-public:
- /** Default constructor */
- GCPoolingLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- GCPoolingLayerKernel(const GCPoolingLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- GCPoolingLayerKernel &operator=(const GCPoolingLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- GCPoolingLayerKernel(GCPoolingLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- GCPoolingLayerKernel &operator=(GCPoolingLayerKernel &&) = default;
- /** Default destructor */
- ~GCPoolingLayerKernel() = default;
-
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. Data types supported: F16/F32.
- * @param[out] output Destination tensor. Data types supported: Same as @p input.
- * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
- * @param[out] indices (optional) The indices of the maximal values. Data type supported: U32.
- */
- void configure(const IGCTensor *input, IGCTensor *output, const PoolingLayerInfo &pool_info, IGCTensor *indices = nullptr);
-
- /** Static function to check if given info will lead to a valid configuration of @ref GCPoolingLayerKernel
- *
- * @param[in] input Source tensor info. Data types supported: F16/F32.
- * @param[in] output Destination tensor info. Data types supported: Same as @p input.
- * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
- * @param[in] indices (optional) The indices of the maximal values. Data type supported: U32.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices = nullptr);
-
- // Inherited methods overridden:
- void run(const Window &window) override;
- BorderSize border_size() const override;
-
-private:
- const IGCTensor *_input;
- IGCTensor *_output;
- IGCTensor *_indices;
- PoolingLayerInfo _pool_info;
- BorderSize _border_size;
- unsigned int _num_elems_processed_per_iteration;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_GCPOOLINGLAYERKERNEL_H */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCScaleKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCScaleKernel.h
deleted file mode 100644
index 754f15cbd8..0000000000
--- a/arm_compute/core/GLES_COMPUTE/kernels/GCScaleKernel.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_GCSCALEKERNEL_H
-#define ARM_COMPUTE_GCSCALEKERNEL_H
-
-#include "arm_compute/core/GLES_COMPUTE/IGCSimple3DKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class IGCTensor;
-
-/** Interface for the scale kernel */
-class GCScaleKernel : public IGCSimple3DKernel
-{
-public:
- /** Initialise the kernel's inputs, output and interpolation policy
- *
- * @param[in] input Source tensor. Data types supported: F16
- * @param[out] output Destination tensor. Data types supported: Same as @p input
- * All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
- * @param[in] policy Interpolation type to use
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- * @param[in] sampling_policy (Optional) Sampling policy used by the interpolation. Defaults to @ref SamplingPolicy::CENTER
- */
- void configure(const IGCTensor *input, IGCTensor *output, InterpolationPolicy policy, bool border_undefined, SamplingPolicy sampling_policy = SamplingPolicy::CENTER);
-
- // Inherited methods overridden:
- void run(const Window &window) override;
- BorderSize border_size() const override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_GCSCALEKERNEL_H */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h
deleted file mode 100644
index 280efe11f8..0000000000
--- a/arm_compute/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_GCSOFTMAXLAYERKERNEL_H
-#define ARM_COMPUTE_GCSOFTMAXLAYERKERNEL_H
-
-#include "arm_compute/core/GLES_COMPUTE/IGCSimple3DKernel.h"
-
-namespace arm_compute
-{
-class IGCTensor;
-
-/** Interface for the identifying the max value of 1D Logits */
-class GCLogits1DMaxKernel : public IGCSimple3DKernel
-{
-public:
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. Data types supported: F16/F32
- * @param[out] output Destination tensor. Data types supported: same as @p input
- */
- void configure(const IGCTensor *input, IGCTensor *output);
-};
-
-/** Interface for shifting the logits values around the max value and exponentiating the result */
-class GCLogits1DShiftExpSumKernel : public IGCKernel
-{
-public:
- /** Default constructor */
- GCLogits1DShiftExpSumKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- GCLogits1DShiftExpSumKernel(const GCLogits1DShiftExpSumKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- GCLogits1DShiftExpSumKernel &operator=(const GCLogits1DShiftExpSumKernel &) = delete;
- /** Allow instances of this class to be moved */
- GCLogits1DShiftExpSumKernel(GCLogits1DShiftExpSumKernel &&) = default;
- /** Allow instances of this class to be moved */
- GCLogits1DShiftExpSumKernel &operator=(GCLogits1DShiftExpSumKernel &&) = default;
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. Data types supported: F16/F32
- * @param[in] max Max values tensor. Data types supported: same as @p input
- * @param[out] output Destination tensor. Data types supported: same as @p input
- * @param[out] sum Sum of 1D logits tensor. Data types supported: same as @p input
- */
- void configure(const IGCTensor *input, const IGCTensor *max, IGCTensor *output, IGCTensor *sum);
-
- // Inherited methods overridden:
- void run(const Window &window) override;
-
-private:
- const IGCTensor *_input;
- const IGCTensor *_max;
- IGCTensor *_output;
- IGCTensor *_sum;
-};
-
-/** Interface for calculating the final step of the Softmax Layer where each logit value is multiplied by the inverse of the sum of the logits. */
-class GCLogits1DNormKernel : public IGCKernel
-{
-public:
- /** Default constructor */
- GCLogits1DNormKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- GCLogits1DNormKernel(const GCLogits1DNormKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- GCLogits1DNormKernel &operator=(const GCLogits1DNormKernel &) = delete;
- /** Allow instances of this class to be moved */
- GCLogits1DNormKernel(GCLogits1DNormKernel &&) = default;
- /** Allow instances of this class to be moved */
- GCLogits1DNormKernel &operator=(GCLogits1DNormKernel &&) = default;
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. Data types supported: F16/F32
- * @param[in] sum Sum tensor. Dimensions should be dim(input)-1. Data types supported: same as @p input
- * @param[out] output Destination tensor. Data types supported: same as @p input
- */
- void configure(const IGCTensor *input, const IGCTensor *sum, IGCTensor *output);
-
- // Inherited methods overridden:
- void run(const Window &window) override;
-
-private:
- const IGCTensor *_input;
- const IGCTensor *_sum;
- IGCTensor *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_GCSOFTMAXLAYERKERNEL_H */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.h
deleted file mode 100644
index 5243e54daf..0000000000
--- a/arm_compute/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_GCTENSORSHIFTKERNEL_H
-#define ARM_COMPUTE_GCTENSORSHIFTKERNEL_H
-
-#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
-#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
-
-namespace arm_compute
-{
-class IGCTensor;
-/** Interface for the kernel to shift valid data on a tensor.
- *
- * For example shifting 3x3 valid data with padding of 1 to right:
- * @f[
- * \left( \begin{array}{ccccc}
- * 0 & 0 & 0 & 0 & 0 \\
- * a00 & a01 & a02 & 0 & 0 \\
- * a10 & a11 & a12 & 0 & 0 \\
- * a20 & a21 & a22 & 0 & 0 \\
- * 0 & 0 & 0 & 0 & 0 \\
- * \end{array} \right)
- * =
- * \left( \begin{array}{ccccc}
- * 0 & 0 & 0 & 0 & 0 \\
- * 0 & a00 & a01 & a02 & 0 \\
- * 0 & a10 & a11 & a12 & 0 \\
- * 0 & a20 & a21 & a22 & 0 \\
- * 0 & 0 & 0 & 0 & 0 \\
- * \end{array} \right)
- * @f]
- */
-class GCTensorShiftKernel : public IGCKernel
-{
-public:
- /** Default constructor */
- GCTensorShiftKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- GCTensorShiftKernel(const GCTensorShiftKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- GCTensorShiftKernel &operator=(const GCTensorShiftKernel &) = delete;
- /** Allow instances of this class to be moved */
- GCTensorShiftKernel(GCTensorShiftKernel &&) = default;
- /** Allow instances of this class to be moved */
- GCTensorShiftKernel &operator=(GCTensorShiftKernel &&) = default;
- /** Default destructor */
- ~GCTensorShiftKernel() = default;
- /** Set the input of the kernel.
- *
- * @param[in,out] input Source tensor. Data types supported: F16/F32
- */
- void configure(IGCTensor *input);
-
- // Inherited methods overridden:
- void run(const Window &window) override;
-
-private:
- IGCTensor *_input;
- gles::NDRange _lws;
- int _left_padding;
-};
-}
-#endif /*ARM_COMPUTE_GCTENSORSHIFTKERNEL_H */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCTransposeKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCTransposeKernel.h
deleted file mode 100644
index a981ae6d1f..0000000000
--- a/arm_compute/core/GLES_COMPUTE/kernels/GCTransposeKernel.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_GCTRANSPOSEKERNEL_H
-#define ARM_COMPUTE_GCTRANSPOSEKERNEL_H
-
-#include "arm_compute/core/GLES_COMPUTE/IGCSimple2DKernel.h"
-
-namespace arm_compute
-{
-class IGCTensor;
-
-/** OpenGL ES kernel which transposes the elements of a matrix.
- *
- * [width, height, batch] -> [height, width, batch]
- *
- */
-class GCTransposeKernel : public IGCSimple2DKernel
-{
-public:
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor. Data types supported: F16/F32
- * @param[out] output Output tensor. Data type supported: Same as @p input
- */
- void configure(const IGCTensor *input, IGCTensor *output);
-
- // Inherited methods overridden:
- void run(const Window &window) override;
-};
-}
-#endif /* ARM_COMPUTE_GCTRANSPOSEKERNEL_H */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCWeightsReshapeKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCWeightsReshapeKernel.h
deleted file mode 100644
index 134346b8da..0000000000
--- a/arm_compute/core/GLES_COMPUTE/kernels/GCWeightsReshapeKernel.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_GCWEIGHTSRESHAPEKERNEL_H
-#define ARM_COMPUTE_GCWEIGHTSRESHAPEKERNEL_H
-
-#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
-
-namespace arm_compute
-{
-/** GLES Compute kernel to perform reshaping on the weights used by convolution and locally connected layer
- *
- * Rearranges each 3-dimensional kernel to a single row leading to a matrix with linearized kernels.
- * In combination with the @ref GCIm2ColKernel can transform a convolution to a matrix multiplication.
- *
- * For example assuming a 3D weight kernel of 3x3 dimensions and depth of 2 we have:
- * @f[
- * \left( \begin{array}{ccc}
- * a000 & a001 & a002 \\
- * a010 & a011 & a012 \\
- * a020 & a021 & a022 \\
- * \end{array} \right)
- * \left( \begin{array}{ccc}
- * a100 & a101 & a102 \\
- * a110 & a111 & a112 \\
- * a120 & a121 & a122 \\
- * \end{array} \right)
- * \rightarrow
- * \left( \begin{array}{ccccccccc}
- * a000 & a001 & a002 & a010 & a011 & a012 & a020 & a021 & a022 & a100 & a101 & a102 & a110 & a111 & a112 & a120 & a121 & a122 \\
- * \end{array} \right)
- * @f]
- */
-class GCWeightsReshapeKernel : public IGCKernel
-{
-public:
- /** Constructor.*/
- GCWeightsReshapeKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- GCWeightsReshapeKernel(const GCWeightsReshapeKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- GCWeightsReshapeKernel &operator=(const GCWeightsReshapeKernel &) = delete;
- /** Allow instances of this class to be moved */
- GCWeightsReshapeKernel(GCWeightsReshapeKernel &&) = default;
- /** Allow instances of this class to be moved */
- GCWeightsReshapeKernel &operator=(GCWeightsReshapeKernel &&) = default;
- /** Default destructor */
- ~GCWeightsReshapeKernel() = default;
-
- /** Set the input and output of the kernel.
- *
- * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared,
- * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, batches] if unshared. Data types supported: F16, F32
- * @param[in] biases The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with
- * dimensions [OFM, batches] if unshared. Data types supported: Same as @p input
- * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types.
- * @param[out] output The output tensor. Should be a 2D Tensor. Data types supported: Same as @p input
- */
- void configure(const IGCTensor *input, const IGCTensor *biases, IGCTensor *output);
-
- // Inherited methods overridden:
- void run(const Window &window) override;
-
-private:
- const IGCTensor *_input;
- const IGCTensor *_biases;
- IGCTensor *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_GCWEIGHTSRESHAPEKERNEL_H */ \ No newline at end of file
diff --git a/arm_compute/core/GPUTarget.h b/arm_compute/core/GPUTarget.h
index 4959ee5e8a..b107a52d9f 100644
--- a/arm_compute/core/GPUTarget.h
+++ b/arm_compute/core/GPUTarget.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,10 +21,10 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_GPUTARGET_H
-#define ARM_COMPUTE_GPUTARGET_H
+#ifndef ACL_ARM_COMPUTE_CORE_GPUTARGET_H
+#define ACL_ARM_COMPUTE_CORE_GPUTARGET_H
-#include "arm_compute/core/Helpers.h"
+#include "support/Traits.h"
#include <string>
@@ -33,25 +33,38 @@ namespace arm_compute
/** Available GPU Targets */
enum class GPUTarget
{
- UNKNOWN = 0x101,
- GPU_ARCH_MASK = 0xF00,
- MIDGARD = 0x100,
- BIFROST = 0x200,
- VALHALL = 0x300,
- T600 = 0x110,
- T700 = 0x120,
- T800 = 0x130,
- G71 = 0x210,
- G72 = 0x220,
- G51 = 0x230,
- G51BIG = 0x231,
- G51LIT = 0x232,
- G52 = 0x240,
- G52LIT = 0x241,
- G76 = 0x250,
- G77 = 0x310,
- TBOX = 0x320,
- TODX = 0x330,
+ UNKNOWN = 0x101,
+ GPU_ARCH_MASK = 0xF00,
+ GPU_GENERATION_MASK = 0x0F0,
+ MIDGARD = 0x100,
+ BIFROST = 0x200,
+ VALHALL = 0x300,
+ FIFTHGEN = 0X400,
+ T600 = 0x110,
+ T700 = 0x120,
+ T800 = 0x130,
+ G71 = 0x210,
+ G72 = 0x220,
+ G51 = 0x221,
+ G51BIG = 0x222,
+ G51LIT = 0x223,
+ G31 = 0x224,
+ G76 = 0x230,
+ G52 = 0x231,
+ G52LIT = 0x232,
+ G77 = 0x310,
+ G57 = 0x311,
+ G78 = 0x320,
+ G68 = 0x321,
+ G78AE = 0x330,
+ G710 = 0x340,
+ G610 = 0x341,
+ G510 = 0x342,
+ G310 = 0x343,
+ G715 = 0x350,
+ G615 = 0x351,
+ G720 = 0x410,
+ G620 = 0X411
};
/** Enable bitwise operations on GPUTarget enumerations */
@@ -104,4 +117,4 @@ inline bool gpu_target_is_in(GPUTarget target_to_check, GPUTarget target)
return target_to_check == target;
}
} // namespace arm_compute
-#endif /* ARM_COMPUTE_GPUTARGET_H */
+#endif // ACL_ARM_COMPUTE_CORE_GPUTARGET_H
diff --git a/arm_compute/core/HOGInfo.h b/arm_compute/core/HOGInfo.h
deleted file mode 100644
index 3cc472b274..0000000000
--- a/arm_compute/core/HOGInfo.h
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_HOGINFO_H
-#define ARM_COMPUTE_HOGINFO_H
-
-#include "arm_compute/core/Size2D.h"
-#include "arm_compute/core/Types.h"
-
-#include <cstddef>
-
-namespace arm_compute
-{
-/** Store the HOG's metadata */
-class HOGInfo
-{
-public:
- /** Default constructor */
- HOGInfo();
- /** Default destructor */
- virtual ~HOGInfo() = default;
- /** Allow instances of this class to be copy constructed */
- HOGInfo(const HOGInfo &) = default;
- /** Allow instances of this class to be copied */
- HOGInfo &operator=(const HOGInfo &) = default;
- /** Allow instances of this class to be move constructed */
- HOGInfo(HOGInfo &&) = default;
- /** Allow instances of this class to be moved */
- HOGInfo &operator=(HOGInfo &&) = default;
- /** Constructor
- *
- * @param[in] cell_size Cell size in pixels
- * @param[in] block_size Block size in pixels. Must be a multiple of cell_size.
- * @param[in] detection_window_size Detection window size in pixels. Must be a multiple of block_size and block_stride.
- * @param[in] block_stride Distance in pixels between 2 consecutive blocks along the x and y direction. Must be a multiple of cell size
- * @param[in] num_bins Number of histogram bins for each cell
- * @param[in] normalization_type (Optional) Normalization type to use for each block
- * @param[in] l2_hyst_threshold (Optional) Threshold used for L2HYS_NORM normalization method
- * @param[in] phase_type (Optional) Type of @ref PhaseType
- */
- HOGInfo(const Size2D &cell_size, const Size2D &block_size, const Size2D &detection_window_size, const Size2D &block_stride, size_t num_bins,
- HOGNormType normalization_type = HOGNormType::L2HYS_NORM, float l2_hyst_threshold = 0.2f, PhaseType phase_type = PhaseType::UNSIGNED);
- /** Initialize the metadata structure with the given parameters
- *
- * @param[in] cell_size Cell size in pixels
- * @param[in] block_size Block size in pixels. Must be a multiple of cell_size.
- * @param[in] detection_window_size Detection window size in pixels. Must be a multiple of block_size and block_stride.
- * @param[in] block_stride Distance in pixels between 2 consecutive blocks along the x and y direction. Must be a multiple of cell size
- * @param[in] num_bins Number of histogram bins for each cell
- * @param[in] normalization_type (Optional) Normalization type to use for each block
- * @param[in] l2_hyst_threshold (Optional) Threshold used for L2HYS_NORM normalization method
- * @param[in] phase_type (Optional) Type of @ref PhaseType
- */
- void init(const Size2D &cell_size, const Size2D &block_size, const Size2D &detection_window_size, const Size2D &block_stride, size_t num_bins,
- HOGNormType normalization_type = HOGNormType::L2HYS_NORM, float l2_hyst_threshold = 0.2f, PhaseType phase_type = PhaseType::UNSIGNED);
- /** The cell size in pixels
- *
- * @return The cell size in pixels
- */
- const Size2D &cell_size() const;
- /** The block size in pixels
- *
- * @return The block size in pixels
- */
- const Size2D &block_size() const;
- /** The detection window size in pixels
- *
- * @return The detection window size in pixels
- */
- const Size2D &detection_window_size() const;
- /** The block stride in pixels. The block stride is the distance between 2 consecutive blocks
- *
- * @return The block stride in pixels
- */
- const Size2D &block_stride() const;
- /** The number of histogram bins for each cell
- *
- * @return The number of histogram bins for each cell
- */
- size_t num_bins() const;
- /** The normalization type
- *
- * @return The normalization type
- */
- HOGNormType normalization_type() const;
- /** Threshold used for L2HYS_NORM normalization type
- *
- * @return Threshold used for L2HYS_NORM normalization type
- */
- float l2_hyst_threshold() const;
- /** The type of @ref PhaseType
- *
- * @return The type of @ref PhaseType
- */
- PhaseType phase_type() const;
- /** The size of HOG descriptor
- *
- * @return The size of HOG descriptor
- */
- size_t descriptor_size() const;
- /** Calculates the number of cells for each block
- *
- * @return The Size2D data object which stores the number of cells along the x and y directions
- */
- Size2D num_cells_per_block() const;
-
- /** Calculates the number of cells per block stride
- *
- * @return The Size2D data object which stores the number of cells per block stride along the x and y directions
- */
- Size2D num_cells_per_block_stride() const;
- /** Calculates the number of block positions for the given image size
- *
- * @param[in] image_size The input image size data object
- *
- * @return The Size2D data object which stores the number of block positions along the x and y directions
- */
- Size2D num_block_positions_per_image(const Size2D &image_size) const;
-
-private:
- Size2D _cell_size;
- Size2D _block_size;
- Size2D _detection_window_size;
- Size2D _block_stride;
- size_t _num_bins;
- HOGNormType _normalization_type;
- float _l2_hyst_threshold;
- PhaseType _phase_type;
- size_t _descriptor_size;
-};
-}
-#endif /*ARM_COMPUTE_HOGINFO_H */
diff --git a/arm_compute/core/Helpers.h b/arm_compute/core/Helpers.h
index 09c672ecfa..960201510a 100644
--- a/arm_compute/core/Helpers.h
+++ b/arm_compute/core/Helpers.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 ARM Limited.
+ * Copyright (c) 2016-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,23 +24,17 @@
#ifndef ARM_COMPUTE_HELPERS_H
#define ARM_COMPUTE_HELPERS_H
-#include "arm_compute/core/Coordinates.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/IAccessWindow.h"
-#include "arm_compute/core/Steps.h"
-#include "arm_compute/core/Strides.h"
-#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
-#include "support/MemorySupport.h"
#include <array>
#include <cstddef>
#include <cstdint>
-#include <memory>
#include <tuple>
-#include <type_traits>
-#include <utility>
namespace arm_compute
{
@@ -48,307 +42,6 @@ class IKernel;
class ITensor;
class ITensorInfo;
-/** Disable bitwise operations by default */
-template <typename T>
-struct enable_bitwise_ops
-{
- static constexpr bool value = false; /**< Disabled */
-};
-
-#ifndef DOXYGEN_SKIP_THIS
-template <typename T>
-typename std::enable_if<enable_bitwise_ops<T>::value, T>::type operator&(T lhs, T rhs)
-{
- using underlying_type = typename std::underlying_type<T>::type;
- return static_cast<T>(static_cast<underlying_type>(lhs) & static_cast<underlying_type>(rhs));
-}
-#endif /* DOXYGEN_SKIP_THIS */
-
-/** Helper function to create and return a unique_ptr pointed to a CL/GLES kernel object
- * It also calls the kernel's configuration.
- *
- * @param[in] args All the arguments that need pass to kernel's configuration.
- *
- * @return A unique pointer pointed to a CL/GLES kernel object
- */
-template <typename Kernel, typename... T>
-std::unique_ptr<Kernel> create_configure_kernel(T &&... args)
-{
- std::unique_ptr<Kernel> k = arm_compute::support::cpp14::make_unique<Kernel>();
- k->configure(std::forward<T>(args)...);
- return k;
-}
-
-/** Helper function to create and return a unique_ptr pointed to a CL/GLES kernel object
- *
- * @return A unique pointer pointed to a Kernel kernel object
- */
-template <typename Kernel>
-std::unique_ptr<Kernel> create_kernel()
-{
- std::unique_ptr<Kernel> k = arm_compute::support::cpp14::make_unique<Kernel>();
- return k;
-}
-
-namespace traits
-{
-/** Check if a type T is contained in a tuple Tuple of types */
-template <typename T, typename Tuple>
-struct is_contained;
-
-template <typename T>
-struct is_contained<T, std::tuple<>> : std::false_type
-{
-};
-
-template <typename T, typename... Ts>
-struct is_contained<T, std::tuple<T, Ts...>> : std::true_type
-{
-};
-
-template <typename T, typename U, typename... Ts>
-struct is_contained<T, std::tuple<U, Ts...>> : is_contained<T, std::tuple<Ts...>>
-{
-};
-}
-
-/** Computes bilinear interpolation using the pointer to the top-left pixel and the pixel's distance between
- * the real coordinates and the smallest following integer coordinates. Input must be in single channel format.
- *
- * @param[in] pixel_ptr Pointer to the top-left pixel value of a single channel input.
- * @param[in] stride Stride to access the bottom-left and bottom-right pixel values
- * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer
- * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer
- *
- * @note dx and dy must be in the range [0, 1.0]
- *
- * @return The bilinear interpolated pixel value
- */
-template <typename T>
-inline T delta_bilinear_c1(const T *pixel_ptr, size_t stride, float dx, float dy)
-{
- ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr);
-
- const float dx1 = 1.0f - dx;
- const float dy1 = 1.0f - dy;
-
- const T a00 = *pixel_ptr;
- const T a01 = *(pixel_ptr + 1);
- const T a10 = *(pixel_ptr + stride);
- const T a11 = *(pixel_ptr + stride + 1);
-
- const float w1 = dx1 * dy1;
- const float w2 = dx * dy1;
- const float w3 = dx1 * dy;
- const float w4 = dx * dy;
-
- return static_cast<T>(a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4);
-}
-
-/** Computes bilinear interpolation for quantized input and output, using the pointer to the top-left pixel and the pixel's distance between
- * the real coordinates and the smallest following integer coordinates. Input must be QASYMM8 and in single channel format.
- *
- * @param[in] pixel_ptr Pointer to the top-left pixel value of a single channel input.
- * @param[in] stride Stride to access the bottom-left and bottom-right pixel values
- * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer
- * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer
- * @param[in] iq_info Input QuantizationInfo
- * @param[in] oq_info Output QuantizationInfo
- *
- * @note dx and dy must be in the range [0, 1.0]
- *
- * @return The bilinear interpolated pixel value
- */
-inline uint8_t delta_bilinear_c1_quantized(const uint8_t *pixel_ptr, size_t stride, float dx, float dy, UniformQuantizationInfo iq_info, UniformQuantizationInfo oq_info)
-{
- ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr);
-
- const float dx1 = 1.0f - dx;
- const float dy1 = 1.0f - dy;
-
- const float a00 = dequantize_qasymm8(*pixel_ptr, iq_info);
- const float a01 = dequantize_qasymm8(*(pixel_ptr + 1), iq_info);
- const float a10 = dequantize_qasymm8(*(pixel_ptr + stride), iq_info);
- const float a11 = dequantize_qasymm8(*(pixel_ptr + stride + 1), iq_info);
-
- const float w1 = dx1 * dy1;
- const float w2 = dx * dy1;
- const float w3 = dx1 * dy;
- const float w4 = dx * dy;
- float res = a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4;
- return static_cast<uint8_t>(quantize_qasymm8(res, oq_info));
-}
-
-/** Computes bilinear interpolation for quantized input and output, using the pointer to the top-left pixel and the pixel's distance between
- * the real coordinates and the smallest following integer coordinates. Input must be QASYMM8_SIGNED and in single channel format.
- *
- * @param[in] pixel_ptr Pointer to the top-left pixel value of a single channel input.
- * @param[in] stride Stride to access the bottom-left and bottom-right pixel values
- * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer
- * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer
- * @param[in] iq_info Input QuantizationInfo
- * @param[in] oq_info Output QuantizationInfo
- *
- * @note dx and dy must be in the range [0, 1.0]
- *
- * @return The bilinear interpolated pixel value
- */
-inline int8_t delta_bilinear_c1_quantized(const int8_t *pixel_ptr, size_t stride, float dx, float dy, UniformQuantizationInfo iq_info, UniformQuantizationInfo oq_info)
-{
- ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr);
-
- const float dx1 = 1.0f - dx;
- const float dy1 = 1.0f - dy;
-
- const float a00 = dequantize_qasymm8_signed(*pixel_ptr, iq_info);
- const float a01 = dequantize_qasymm8_signed(*(pixel_ptr + 1), iq_info);
- const float a10 = dequantize_qasymm8_signed(*(pixel_ptr + stride), iq_info);
- const float a11 = dequantize_qasymm8_signed(*(pixel_ptr + stride + 1), iq_info);
-
- const float w1 = dx1 * dy1;
- const float w2 = dx * dy1;
- const float w3 = dx1 * dy;
- const float w4 = dx * dy;
- float res = a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4;
- return static_cast<int8_t>(quantize_qasymm8_signed(res, oq_info));
-}
-
-/** Computes linear interpolation using the pointer to the top pixel and the pixel's distance between
- * the real coordinates and the smallest following integer coordinates. Input must be in single channel format.
- *
- * @param[in] pixel_ptr Pointer to the top pixel value of a single channel input.
- * @param[in] stride Stride to access the bottom pixel value
- * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer
- *
- * @note dy must be in the range [0, 1.0]
- *
- * @return The linear interpolated pixel value
- */
-template <typename T>
-inline T delta_linear_c1_y(const T *pixel_ptr, size_t stride, float dy)
-{
- ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr);
-
- const float dy1 = 1.0f - dy;
-
- const T a00 = *pixel_ptr;
- const T a10 = *(pixel_ptr + stride);
-
- const float w1 = dy1;
- const float w3 = dy;
-
- return static_cast<T>(a00 * w1 + a10 * w3);
-}
-/** Computes linear interpolation using the pointer to the left pixel and the pixel's distance between
- * the real coordinates and the smallest following integer coordinates. Input must be in single channel format.
- *
- * @param[in] pixel_ptr Pointer to the left pixel value of a single channel input.
- * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer
- *
- * @note dx must be in the range [0, 1.0]
- *
- * @return The linear interpolated pixel value
- */
-template <typename T>
-inline T delta_linear_c1_x(const T *pixel_ptr, float dx)
-{
- ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr);
-
- const T a00 = *pixel_ptr;
- const T a01 = *(pixel_ptr + 1);
-
- const float dx1 = 1.0f - dx;
-
- const float w1 = dx1;
- const float w2 = dx;
-
- return static_cast<T>(a00 * w1 + a01 * w2);
-}
-/** Return the pixel at (x,y) using bilinear interpolation.
- *
- * @warning Only works if the iterator was created with an IImage
- *
- * @param[in] first_pixel_ptr Pointer to the first pixel of a single channel input.
- * @param[in] stride Stride in bytes of the image;
- * @param[in] x X position of the wanted pixel
- * @param[in] y Y position of the wanted pixel
- *
- * @return The pixel at (x, y) using bilinear interpolation.
- */
-template <typename T>
-inline T pixel_bilinear_c1(const T *first_pixel_ptr, size_t stride, float x, float y)
-{
- ARM_COMPUTE_ERROR_ON(first_pixel_ptr == nullptr);
-
- const int32_t xi = std::floor(x);
- const int32_t yi = std::floor(y);
-
- const float dx = x - xi;
- const float dy = y - yi;
-
- return delta_bilinear_c1(first_pixel_ptr + xi + yi * stride, stride, dx, dy);
-}
-
-/** Return the pixel at (x,y) using bilinear interpolation by clamping when out of borders. The image must be single channel input
- *
- * @warning Only works if the iterator was created with an IImage
- *
- * @param[in] first_pixel_ptr Pointer to the first pixel of a single channel image.
- * @param[in] stride Stride in bytes of the image
- * @param[in] width Width of the image
- * @param[in] height Height of the image
- * @param[in] x X position of the wanted pixel
- * @param[in] y Y position of the wanted pixel
- *
- * @return The pixel at (x, y) using bilinear interpolation.
- */
-template <typename T>
-inline uint8_t pixel_bilinear_c1_clamp(const T *first_pixel_ptr, size_t stride, size_t width, size_t height, float x, float y)
-{
- ARM_COMPUTE_ERROR_ON(first_pixel_ptr == nullptr);
-
- x = std::max(-1.f, std::min(x, static_cast<float>(width)));
- y = std::max(-1.f, std::min(y, static_cast<float>(height)));
-
- const float xi = std::floor(x);
- const float yi = std::floor(y);
-
- const float dx = x - xi;
- const float dy = y - yi;
-
- if(dx == 0.0f)
- {
- if(dy == 0.0f)
- {
- return static_cast<T>(first_pixel_ptr[static_cast<int32_t>(xi) + static_cast<int32_t>(yi) * stride]);
- }
- return delta_linear_c1_y(first_pixel_ptr + static_cast<int32_t>(xi) + static_cast<int32_t>(yi) * stride, stride, dy);
- }
- if(dy == 0.0f)
- {
- return delta_linear_c1_x(first_pixel_ptr + static_cast<int32_t>(xi) + static_cast<int32_t>(yi) * stride, dx);
- }
- return delta_bilinear_c1(first_pixel_ptr + static_cast<int32_t>(xi) + static_cast<int32_t>(yi) * stride, stride, dx, dy);
-}
-
-/** Return the pixel at (x,y) using area interpolation by clamping when out of borders. The image must be single channel U8
- *
- * @note The interpolation area depends on the width and height ration of the input and output images
- * @note Currently average of the contributing pixels is calculated
- *
- * @param[in] first_pixel_ptr Pointer to the first pixel of a single channel U8 image.
- * @param[in] stride Stride in bytes of the image
- * @param[in] width Width of the image
- * @param[in] height Height of the image
- * @param[in] wr Width ratio among the input image width and output image width.
- * @param[in] hr Height ratio among the input image height and output image height.
- * @param[in] x X position of the wanted pixel
- * @param[in] y Y position of the wanted pixel
- *
- * @return The pixel at (x, y) using area interpolation.
- */
-inline uint8_t pixel_area_c1u8_clamp(const uint8_t *first_pixel_ptr, size_t stride, size_t width, size_t height, float wr, float hr, int x, int y);
-
/** Iterator updated by @ref execute_window_loop for each window element */
class Iterator
{
@@ -362,6 +55,16 @@ public:
*/
Iterator(const ITensor *tensor, const Window &window);
+ /** Create a container iterator for the tensor with the specified number of dimensions, stride, buffer pointer and window.
+ *
+ * @param[in] num_dims The number of dimensions.
+ * @param[in] strides The strides in bytes.
+ * @param[in] buffer The data buffer.
+ * @param[in] offset The offset in bytes from the beginning of the buffer to the first element of the tensor.
+ * @param[in] window The window which will be used to iterate over the tensor.
+ */
+ Iterator(size_t num_dims, const Strides &strides, uint8_t *buffer, size_t offset, const Window &window);
+
/** Increment the iterator along the specified dimension of the step value associated to the dimension.
*
* @warning It is the caller's responsibility to call increment(dimension+1) when reaching the end of a dimension, the iterator will not check for overflow.
@@ -376,7 +79,7 @@ public:
*
* @return The current position of the iterator in bytes relative to the first element.
*/
- constexpr int offset() const;
+ constexpr size_t offset() const;
/** Return a pointer to the current pixel.
*
@@ -393,18 +96,27 @@ public:
void reset(size_t dimension);
private:
+ /** Initialize a container iterator for the tensor with the specified number of dimensions, stride, buffer pointer and window.
+ *
+ * @param[in] num_dims The number of dimensions.
+ * @param[in] strides The strides in bytes.
+ * @param[in] buffer The data buffer.
+ * @param[in] offset The offset in bytes from the beginning of the buffer to the first element of the tensor.
+ * @param[in] window The window which will be used to iterate over the tensor.
+ */
+ void initialize(size_t num_dims, const Strides &strides, uint8_t *buffer, size_t offset, const Window &window);
+
uint8_t *_ptr;
class Dimension
{
public:
- constexpr Dimension()
- : _dim_start(0), _stride(0)
+ constexpr Dimension() : _dim_start(0), _stride(0)
{
}
- int _dim_start;
- int _stride;
+ size_t _dim_start;
+ size_t _stride;
};
std::array<Dimension, Coordinates::num_max_dimensions> _dims;
@@ -419,180 +131,7 @@ private:
* @param[in,out] iterators Tensor iterators which will be updated by this function before calling lambda_function.
*/
template <typename L, typename... Ts>
-inline void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators);
-
-/** Update window and padding size for each of the access patterns.
- *
- * First the window size is reduced based on all access patterns that are not
- * allowed to modify the padding of the underlying tensor. Then the padding of
- * the remaining tensors is increased to match the window.
- *
- * @param[in] win Window that is used by the kernel.
- * @param[in] patterns Access patterns used to calculate the final window and padding.
- *
- * @return True if the window has been changed. Changes to the padding do not
- * influence the returned value.
- */
-template <typename... Ts>
-bool update_window_and_padding(Window &win, Ts &&... patterns)
-{
- bool window_changed = false;
-
- utility::for_each([&](const IAccessWindow & w)
- {
- window_changed |= w.update_window_if_needed(win);
- },
- patterns...);
-
- bool padding_changed = false;
-
- utility::for_each([&](IAccessWindow & w)
- {
- padding_changed |= w.update_padding_if_needed(win);
- },
- patterns...);
-
- return window_changed;
-}
-
-/** Calculate the maximum window for a given tensor shape and border setting
- *
- * @param[in] valid_region Valid region object defining the shape of the tensor space for which the window is created.
- * @param[in] steps (Optional) Number of elements processed for each step.
- * @param[in] skip_border (Optional) If true exclude the border region from the window.
- * @param[in] border_size (Optional) Border size.
- *
- * @return The maximum window the kernel can be executed on.
- */
-Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize());
-
-/** Calculate the maximum window for a given tensor shape and border setting
- *
- * @param[in] info Tensor info object defining the shape of the object for which the window is created.
- * @param[in] steps (Optional) Number of elements processed for each step.
- * @param[in] skip_border (Optional) If true exclude the border region from the window.
- * @param[in] border_size (Optional) Border size.
- *
- * @return The maximum window the kernel can be executed on.
- */
-inline Window calculate_max_window(const ITensorInfo &info, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize())
-{
- return calculate_max_window(info.valid_region(), steps, skip_border, border_size);
-}
-
-/** Calculate the maximum window used by a horizontal kernel for a given tensor shape and border setting
- *
- * @param[in] valid_region Valid region object defining the shape of the tensor space for which the window is created.
- * @param[in] steps (Optional) Number of elements processed for each step.
- * @param[in] skip_border (Optional) If true exclude the border region from the window.
- * @param[in] border_size (Optional) Border size. The border region will be excluded from the window.
- *
- * @return The maximum window the kernel can be executed on.
- */
-Window calculate_max_window_horizontal(const ValidRegion &valid_region, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize());
-
-/** Calculate the maximum window used by a horizontal kernel for a given tensor shape and border setting
- *
- * @param[in] info Tensor info object defining the shape of the object for which the window is created.
- * @param[in] steps (Optional) Number of elements processed for each step.
- * @param[in] skip_border (Optional) If true exclude the border region from the window.
- * @param[in] border_size (Optional) Border size.
- *
- * @return The maximum window the kernel can be executed on.
- */
-inline Window calculate_max_window_horizontal(const ITensorInfo &info, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize())
-{
- return calculate_max_window_horizontal(info.valid_region(), steps, skip_border, border_size);
-}
-
-/** Calculate the maximum window for a given tensor shape and border setting. The window will also includes the border.
- *
- * @param[in] valid_region Valid region object defining the shape of the tensor space for which the window is created.
- * @param[in] steps (Optional) Number of elements processed for each step.
- * @param[in] border_size (Optional) Border size. The border region will be included in the window.
- *
- * @return The maximum window the kernel can be executed on.
- */
-Window calculate_max_enlarged_window(const ValidRegion &valid_region, const Steps &steps = Steps(), BorderSize border_size = BorderSize());
-
-/** Calculate the maximum window for a given tensor shape and border setting. The window will also includes the border.
- *
- * @param[in] info Tensor info object defining the shape of the object for which the window is created.
- * @param[in] steps (Optional) Number of elements processed for each step.
- * @param[in] border_size (Optional) Border size. The border region will be included in the window.
- *
- * @return The maximum window the kernel can be executed on.
- */
-inline Window calculate_max_enlarged_window(const ITensorInfo &info, const Steps &steps = Steps(), BorderSize border_size = BorderSize())
-{
- return calculate_max_enlarged_window(info.valid_region(), steps, border_size);
-}
-
-/** Intersect multiple valid regions.
- *
- * @param[in] regions Valid regions.
- *
- * @return Intersection of all regions.
- */
-template <typename... Ts>
-ValidRegion intersect_valid_regions(const Ts &... regions)
-{
- auto intersect = [](const ValidRegion & r1, const ValidRegion & r2) -> ValidRegion
- {
- ValidRegion region;
-
- for(size_t d = 0; d < std::min(r1.anchor.num_dimensions(), r2.anchor.num_dimensions()); ++d)
- {
- region.anchor.set(d, std::max(r1.anchor[d], r2.anchor[d]));
- }
-
- for(size_t d = 0; d < std::min(r1.shape.num_dimensions(), r2.shape.num_dimensions()); ++d)
- {
- region.shape.set(d, std::min(r1.shape[d], r2.shape[d]));
- }
-
- return region;
- };
-
- return utility::foldl(intersect, regions...);
-}
-
-/** Create a strides object based on the provided strides and the tensor dimensions.
- *
- * @param[in] info Tensor info object providing the shape of the tensor for unspecified strides.
- * @param[in] stride_x Stride to be used in X dimension (in bytes).
- * @param[in] fixed_strides Strides to be used in higher dimensions starting at Y (in bytes).
- *
- * @return Strides object based on the specified strides. Missing strides are
- * calculated based on the tensor shape and the strides of lower dimensions.
- */
-template <typename T, typename... Ts>
-inline Strides compute_strides(const ITensorInfo &info, T stride_x, Ts &&... fixed_strides)
-{
- const TensorShape &shape = info.tensor_shape();
-
- // Create strides object
- Strides strides(stride_x, fixed_strides...);
-
- for(size_t i = 1 + sizeof...(Ts); i < info.num_dimensions(); ++i)
- {
- strides.set(i, shape[i - 1] * strides[i - 1]);
- }
-
- return strides;
-}
-
-/** Create a strides object based on the tensor dimensions.
- *
- * @param[in] info Tensor info object used to compute the strides.
- *
- * @return Strides object based on element size and tensor shape.
- */
-template <typename... Ts>
-inline Strides compute_strides(const ITensorInfo &info)
-{
- return compute_strides(info, info.element_size());
-}
+inline void execute_window_loop(const Window &w, L &&lambda_function, Ts &&...iterators);
/** Permutes given Dimensions according to a permutation vector
*
@@ -605,7 +144,7 @@ template <typename T>
inline void permute(Dimensions<T> &dimensions, const PermutationVector &perm)
{
auto dimensions_copy = utility::make_array<Dimensions<T>::num_max_dimensions>(dimensions.begin(), dimensions.end());
- for(unsigned int i = 0; i < perm.num_dimensions(); ++i)
+ for (unsigned int i = 0; i < perm.num_dimensions(); ++i)
{
T dimension_val = (perm[i] < dimensions.num_dimensions()) ? dimensions_copy[perm[i]] : 0;
dimensions.set(i, dimension_val);
@@ -622,86 +161,13 @@ inline void permute(Dimensions<T> &dimensions, const PermutationVector &perm)
inline void permute(TensorShape &shape, const PermutationVector &perm)
{
TensorShape shape_copy = shape;
- for(unsigned int i = 0; i < perm.num_dimensions(); ++i)
+ for (unsigned int i = 0; i < perm.num_dimensions(); ++i)
{
size_t dimension_val = (perm[i] < shape.num_dimensions()) ? shape_copy[perm[i]] : 1;
- shape.set(i, dimension_val, false); // Avoid changes in _num_dimension
+ shape.set(i, dimension_val, false, false); // Avoid changes in _num_dimension
}
}
-/** Auto initialize the tensor info (shape, number of channels and data type) if the current assignment is empty.
- *
- * @param[in,out] info Tensor info used to check and assign.
- * @param[in] shape New shape.
- * @param[in] num_channels New number of channels.
- * @param[in] data_type New data type
- * @param[in] quantization_info (Optional) New quantization info
- *
- * @return True if the tensor info has been initialized
- */
-bool auto_init_if_empty(ITensorInfo &info,
- const TensorShape &shape,
- int num_channels, DataType data_type,
- QuantizationInfo quantization_info = QuantizationInfo());
-
-/** Auto initialize the tensor info using another tensor info.
- *
- * @param info_sink Tensor info used to check and assign
- * @param info_source Tensor info used to assign
- *
- * @return True if the tensor info has been initialized
- */
-bool auto_init_if_empty(ITensorInfo &info_sink, const ITensorInfo &info_source);
-
-/** Set the shape to the specified value if the current assignment is empty.
- *
- * @param[in,out] info Tensor info used to check and assign.
- * @param[in] shape New shape.
- *
- * @return True if the shape has been changed.
- */
-bool set_shape_if_empty(ITensorInfo &info, const TensorShape &shape);
-
-/** Set the format, data type and number of channels to the specified value if
- * the current data type is unknown.
- *
- * @param[in,out] info Tensor info used to check and assign.
- * @param[in] format New format.
- *
- * @return True if the format has been changed.
- */
-bool set_format_if_unknown(ITensorInfo &info, Format format);
-
-/** Set the data type and number of channels to the specified value if
- * the current data type is unknown.
- *
- * @param[in,out] info Tensor info used to check and assign.
- * @param[in] data_type New data type.
- *
- * @return True if the data type has been changed.
- */
-bool set_data_type_if_unknown(ITensorInfo &info, DataType data_type);
-
-/** Set the data layout to the specified value if
- * the current data layout is unknown.
- *
- * @param[in,out] info Tensor info used to check and assign.
- * @param[in] data_layout New data layout.
- *
- * @return True if the data type has been changed.
- */
-bool set_data_layout_if_unknown(ITensorInfo &info, DataLayout data_layout);
-
-/** Set the quantization info to the specified value if
- * the current quantization info is empty and the data type of asymmetric quantized type
- *
- * @param[in,out] info Tensor info used to check and assign.
- * @param[in] quantization_info Quantization info
- *
- * @return True if the quantization info has been changed.
- */
-bool set_quantization_info_if_empty(ITensorInfo &info, QuantizationInfo quantization_info);
-
/** Helper function to calculate the Valid Region for Scale.
*
* @param[in] src_info Input tensor info used to check.
@@ -712,8 +178,11 @@ bool set_quantization_info_if_empty(ITensorInfo &info, QuantizationInfo quantiza
*
* @return The corresponding valid region
*/
-ValidRegion calculate_valid_region_scale(const ITensorInfo &src_info, const TensorShape &dst_shape,
- InterpolationPolicy interpolate_policy, SamplingPolicy sampling_policy, bool border_undefined);
+ValidRegion calculate_valid_region_scale(const ITensorInfo &src_info,
+ const TensorShape &dst_shape,
+ InterpolationPolicy interpolate_policy,
+ SamplingPolicy sampling_policy,
+ bool border_undefined);
/** Convert a linear index into n-dimensional coordinates.
*
@@ -733,6 +202,22 @@ inline Coordinates index2coords(const TensorShape &shape, int index);
*/
inline int coords2index(const TensorShape &shape, const Coordinates &coord);
+/** Returns a static map used to find an index or dimension based on a data layout
+ *
+ * *** Layouts ***
+ *
+ * *** 4D ***
+ * [N C H W]
+ * [3 2 1 0]
+ * [N H W C]
+ *
+ * * *** 5D ***
+ * [N C D H W]
+ * [4 3 2 1 0]
+ * [N D H W C]
+ */
+const std::map<DataLayout, std::vector<DataLayoutDimension>> &get_layout_map();
+
/** Get the index of the given dimension.
*
* @param[in] data_layout The data layout.
@@ -740,7 +225,8 @@ inline int coords2index(const TensorShape &shape, const Coordinates &coord);
*
* @return The int conversion of the requested data layout index.
*/
-inline size_t get_data_layout_dimension_index(const DataLayout data_layout, const DataLayoutDimension data_layout_dimension);
+inline size_t get_data_layout_dimension_index(const DataLayout &data_layout,
+ const DataLayoutDimension &data_layout_dimension);
/** Get the DataLayoutDimension of a given index and layout.
*
@@ -749,22 +235,7 @@ inline size_t get_data_layout_dimension_index(const DataLayout data_layout, cons
*
* @return The dimension which this index is requested for.
*/
-inline DataLayoutDimension get_index_data_layout_dimension(const DataLayout data_layout, const size_t index);
-
-/** Calculate the normalization dimension index for a given normalization type
- *
- * @param[in] layout Data layout of the input and output tensor
- * @param[in] info Normalization info
- *
- * @return Normalization dimension index
- */
-inline unsigned int get_normalization_dimension_index(DataLayout layout, const NormalizationLayerInfo &info)
-{
- const unsigned int width_idx = get_data_layout_dimension_index(layout, DataLayoutDimension::WIDTH);
- const unsigned int channel_idx = get_data_layout_dimension_index(layout, DataLayoutDimension::CHANNEL);
-
- return info.is_in_map() ? width_idx : channel_idx;
-}
+inline DataLayoutDimension get_index_data_layout_dimension(const DataLayout &data_layout, const size_t index);
/** Calculate the number of output tiles required by Winograd Convolution layer. This utility function can be used by the Winograd input transform
* to know the number of tiles on the x and y direction
@@ -776,10 +247,17 @@ inline unsigned int get_normalization_dimension_index(DataLayout layout, const N
*
* @return the number of output tiles along the x and y directions of size "output_tile_size"
*/
-inline Size2D compute_winograd_convolution_tiles(const Size2D &in_dims, const Size2D &kernel_size, const Size2D &output_tile_size, const PadStrideInfo &conv_info)
+inline Size2D compute_winograd_convolution_tiles(const Size2D &in_dims,
+ const Size2D &kernel_size,
+ const Size2D &output_tile_size,
+ const PadStrideInfo &conv_info)
{
- int num_tiles_x = std::ceil((in_dims.width - (kernel_size.width - 1) + conv_info.pad_left() + conv_info.pad_right()) / static_cast<float>(output_tile_size.width));
- int num_tiles_y = std::ceil((in_dims.height - (kernel_size.height - 1) + conv_info.pad_top() + conv_info.pad_bottom()) / static_cast<float>(output_tile_size.height));
+ int num_tiles_x =
+ std::ceil((in_dims.width - (kernel_size.width - 1) + conv_info.pad_left() + conv_info.pad_right()) /
+ static_cast<float>(output_tile_size.width));
+ int num_tiles_y =
+ std::ceil((in_dims.height - (kernel_size.height - 1) + conv_info.pad_top() + conv_info.pad_bottom()) /
+ static_cast<float>(output_tile_size.height));
// Clamp in case we provide paddings but we have 1D convolution
num_tiles_x = std::min(num_tiles_x, static_cast<int>(in_dims.width));
@@ -808,40 +286,12 @@ inline T wrap_around(T x, T m)
*/
inline Coordinates &convert_negative_axis(Coordinates &coords, int max_value)
{
- for(unsigned int i = 0; i < coords.num_dimensions(); ++i)
+ for (unsigned int i = 0; i < coords.num_dimensions(); ++i)
{
coords[i] = wrap_around(coords[i], max_value);
}
return coords;
}
-
-/** Given an integer value, this function returns the next power of two
- *
- * @param[in] x Input value
- *
- * @return the next power of two
- */
-inline unsigned int get_next_power_two(unsigned int x)
-{
- // Decrement by 1
- x--;
-
- // Shift right by 1
- x |= x >> 1u;
- // Shift right by 2
- x |= x >> 2u;
- // Shift right by 4
- x |= x >> 4u;
- // Shift right by 8
- x |= x >> 8u;
- // Shift right by 16
- x |= x >> 16u;
-
- // Increment by 1
- x++;
-
- return x;
-}
} // namespace arm_compute
#include "arm_compute/core/Helpers.inl"
diff --git a/arm_compute/core/Helpers.inl b/arm_compute/core/Helpers.inl
index 233d46bb86..60a21e9418 100644
--- a/arm_compute/core/Helpers.inl
+++ b/arm_compute/core/Helpers.inl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 ARM Limited.
+ * Copyright (c) 2016-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -22,68 +22,19 @@
* SOFTWARE.
*/
#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Validate.h"
#include <cmath>
#include <numeric>
namespace arm_compute
{
-inline uint8_t pixel_area_c1u8_clamp(const uint8_t *first_pixel_ptr, size_t stride, size_t width, size_t height, float wr, float hr, int x, int y)
-{
- ARM_COMPUTE_ERROR_ON(first_pixel_ptr == nullptr);
-
- // Calculate sampling position
- float in_x = (x + 0.5f) * wr - 0.5f;
- float in_y = (y + 0.5f) * hr - 0.5f;
-
- // Get bounding box offsets
- int x_from = std::floor(x * wr - 0.5f - in_x);
- int y_from = std::floor(y * hr - 0.5f - in_y);
- int x_to = std::ceil((x + 1) * wr - 0.5f - in_x);
- int y_to = std::ceil((y + 1) * hr - 0.5f - in_y);
-
- // Clamp position to borders
- in_x = std::max(-1.f, std::min(in_x, static_cast<float>(width)));
- in_y = std::max(-1.f, std::min(in_y, static_cast<float>(height)));
-
- // Clamp bounding box offsets to borders
- x_from = ((in_x + x_from) < -1) ? -1 : x_from;
- y_from = ((in_y + y_from) < -1) ? -1 : y_from;
- x_to = ((in_x + x_to) > width) ? (width - in_x) : x_to;
- y_to = ((in_y + y_to) > height) ? (height - in_y) : y_to;
-
- // Get pixel index
- const int xi = std::floor(in_x);
- const int yi = std::floor(in_y);
-
- // Bounding box elements in each dimension
- const int x_elements = (x_to - x_from + 1);
- const int y_elements = (y_to - y_from + 1);
- ARM_COMPUTE_ERROR_ON(x_elements == 0 || y_elements == 0);
-
- // Sum pixels in area
- int sum = 0;
- for(int j = yi + y_from, je = yi + y_to; j <= je; ++j)
- {
- const uint8_t *ptr = first_pixel_ptr + j * stride + xi + x_from;
- sum = std::accumulate(ptr, ptr + x_elements, sum);
- }
-
- // Return average
- return sum / (x_elements * y_elements);
-}
-
template <size_t dimension>
struct IncrementIterators
{
template <typename T, typename... Ts>
- static void unroll(T &&it, Ts &&... iterators)
+ static void unroll(T &&it, Ts &&...iterators)
{
- auto increment = [](T && it)
- {
- it.increment(dimension);
- };
+ auto increment = [](T &&it) { it.increment(dimension); };
utility::for_each(increment, std::forward<T>(it), std::forward<Ts>(iterators)...);
}
static void unroll()
@@ -96,14 +47,14 @@ template <size_t dim>
struct ForEachDimension
{
template <typename L, typename... Ts>
- static void unroll(const Window &w, Coordinates &id, L &&lambda_function, Ts &&... iterators)
+ static void unroll(const Window &w, Coordinates &id, L &&lambda_function, Ts &&...iterators)
{
const auto &d = w[dim - 1];
- for(auto v = d.start(); v < d.end(); v += d.step(), IncrementIterators < dim - 1 >::unroll(iterators...))
+ for (auto v = d.start(); v < d.end(); v += d.step(), IncrementIterators<dim - 1>::unroll(iterators...))
{
id.set(dim - 1, v);
- ForEachDimension < dim - 1 >::unroll(w, id, lambda_function, iterators...);
+ ForEachDimension<dim - 1>::unroll(w, id, lambda_function, iterators...);
}
}
};
@@ -112,7 +63,7 @@ template <>
struct ForEachDimension<0>
{
template <typename L, typename... Ts>
- static void unroll(const Window &w, Coordinates &id, L &&lambda_function, Ts &&... iterators)
+ static void unroll(const Window &w, Coordinates &id, L &&lambda_function, Ts &&...iterators)
{
ARM_COMPUTE_UNUSED(w, iterators...);
lambda_function(id);
@@ -120,49 +71,60 @@ struct ForEachDimension<0>
};
template <typename L, typename... Ts>
-inline void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
+inline void execute_window_loop(const Window &w, L &&lambda_function, Ts &&...iterators)
{
w.validate();
- for(unsigned int i = 0; i < Coordinates::num_max_dimensions; ++i)
+ for (unsigned int i = 0; i < Coordinates::num_max_dimensions; ++i)
{
ARM_COMPUTE_ERROR_ON(w[i].step() == 0);
}
Coordinates id;
- ForEachDimension<Coordinates::num_max_dimensions>::unroll(w, id, std::forward<L>(lambda_function), std::forward<Ts>(iterators)...);
+ ForEachDimension<Coordinates::num_max_dimensions>::unroll(w, id, std::forward<L>(lambda_function),
+ std::forward<Ts>(iterators)...);
}
-inline constexpr Iterator::Iterator()
- : _ptr(nullptr), _dims()
+inline constexpr Iterator::Iterator() : _ptr(nullptr), _dims()
{
}
-inline Iterator::Iterator(const ITensor *tensor, const Window &win)
- : Iterator()
+inline Iterator::Iterator(const ITensor *tensor, const Window &win) : Iterator()
{
ARM_COMPUTE_ERROR_ON(tensor == nullptr);
ARM_COMPUTE_ERROR_ON(tensor->info() == nullptr);
- const ITensorInfo *info = tensor->info();
- const Strides &strides = info->strides_in_bytes();
+ initialize(tensor->info()->num_dimensions(), tensor->info()->strides_in_bytes(), tensor->buffer(),
+ tensor->info()->offset_first_element_in_bytes(), win);
+}
+
+inline Iterator::Iterator(size_t num_dims, const Strides &strides, uint8_t *buffer, size_t offset, const Window &win)
+ : Iterator()
+{
+ initialize(num_dims, strides, buffer, offset, win);
+}
+
+inline void
+Iterator::initialize(size_t num_dims, const Strides &strides, uint8_t *buffer, size_t offset, const Window &win)
+{
+ ARM_COMPUTE_ERROR_ON(buffer == nullptr);
- _ptr = tensor->buffer() + info->offset_first_element_in_bytes();
+ _ptr = buffer + offset;
//Initialize the stride for each dimension and calculate the position of the first element of the iteration:
- for(unsigned int n = 0; n < info->num_dimensions(); ++n)
+ for (unsigned int n = 0; n < num_dims; ++n)
{
_dims[n]._stride = win[n].step() * strides[n];
- std::get<0>(_dims)._dim_start += strides[n] * win[n].start();
+ std::get<0>(_dims)._dim_start += static_cast<size_t>(strides[n]) * win[n].start();
}
//Copy the starting point to all the dimensions:
- for(unsigned int n = 1; n < Coordinates::num_max_dimensions; ++n)
+ for (unsigned int n = 1; n < Coordinates::num_max_dimensions; ++n)
{
_dims[n]._dim_start = std::get<0>(_dims)._dim_start;
}
- ARM_COMPUTE_ERROR_ON_WINDOW_DIMENSIONS_GTE(win, info->num_dimensions());
+ ARM_COMPUTE_ERROR_ON_WINDOW_DIMENSIONS_GTE(win, num_dims);
}
inline void Iterator::increment(const size_t dimension)
@@ -171,13 +133,13 @@ inline void Iterator::increment(const size_t dimension)
_dims[dimension]._dim_start += _dims[dimension]._stride;
- for(unsigned int n = 0; n < dimension; ++n)
+ for (unsigned int n = 0; n < dimension; ++n)
{
_dims[n]._dim_start = _dims[dimension]._dim_start;
}
}
-inline constexpr int Iterator::offset() const
+inline constexpr size_t Iterator::offset() const
{
return _dims.at(0)._dim_start;
}
@@ -193,100 +155,12 @@ inline void Iterator::reset(const size_t dimension)
_dims[dimension]._dim_start = _dims[dimension + 1]._dim_start;
- for(unsigned int n = 0; n < dimension; ++n)
+ for (unsigned int n = 0; n < dimension; ++n)
{
_dims[n]._dim_start = _dims[dimension]._dim_start;
}
}
-inline bool auto_init_if_empty(ITensorInfo &info,
- const TensorShape &shape,
- int num_channels,
- DataType data_type,
- QuantizationInfo quantization_info)
-{
- if(info.tensor_shape().total_size() == 0)
- {
- info.set_data_type(data_type);
- info.set_num_channels(num_channels);
- info.set_tensor_shape(shape);
- info.set_quantization_info(quantization_info);
- return true;
- }
-
- return false;
-}
-
-inline bool auto_init_if_empty(ITensorInfo &info_sink, const ITensorInfo &info_source)
-{
- if(info_sink.tensor_shape().total_size() == 0)
- {
- info_sink.set_data_type(info_source.data_type());
- info_sink.set_num_channels(info_source.num_channels());
- info_sink.set_tensor_shape(info_source.tensor_shape());
- info_sink.set_quantization_info(info_source.quantization_info());
- info_sink.set_data_layout(info_source.data_layout());
- return true;
- }
-
- return false;
-}
-
-inline bool set_shape_if_empty(ITensorInfo &info, const TensorShape &shape)
-{
- if(info.tensor_shape().total_size() == 0)
- {
- info.set_tensor_shape(shape);
- return true;
- }
-
- return false;
-}
-
-inline bool set_format_if_unknown(ITensorInfo &info, Format format)
-{
- if(info.data_type() == DataType::UNKNOWN)
- {
- info.set_format(format);
- return true;
- }
-
- return false;
-}
-
-inline bool set_data_type_if_unknown(ITensorInfo &info, DataType data_type)
-{
- if(info.data_type() == DataType::UNKNOWN)
- {
- info.set_data_type(data_type);
- return true;
- }
-
- return false;
-}
-
-inline bool set_data_layout_if_unknown(ITensorInfo &info, DataLayout data_layout)
-{
- if(info.data_layout() == DataLayout::UNKNOWN)
- {
- info.set_data_layout(data_layout);
- return true;
- }
-
- return false;
-}
-
-inline bool set_quantization_info_if_empty(ITensorInfo &info, QuantizationInfo quantization_info)
-{
- if(info.quantization_info().empty() && (is_data_type_quantized_asymmetric(info.data_type())))
- {
- info.set_quantization_info(quantization_info);
- return true;
- }
-
- return false;
-}
-
inline Coordinates index2coords(const TensorShape &shape, int index)
{
int num_elements = shape.total_size();
@@ -294,9 +168,9 @@ inline Coordinates index2coords(const TensorShape &shape, int index)
ARM_COMPUTE_ERROR_ON_MSG(index < 0 || index >= num_elements, "Index has to be in [0, num_elements]!");
ARM_COMPUTE_ERROR_ON_MSG(num_elements == 0, "Cannot create coordinate from empty shape!");
- Coordinates coord{ 0 };
+ Coordinates coord{0};
- for(int d = shape.num_dimensions() - 1; d >= 0; --d)
+ for (int d = shape.num_dimensions() - 1; d >= 0; --d)
{
num_elements /= shape[d];
coord.set(d, index / num_elements);
@@ -315,7 +189,7 @@ inline int coords2index(const TensorShape &shape, const Coordinates &coord)
int index = 0;
int stride = 1;
- for(unsigned int d = 0; d < coord.num_dimensions(); ++d)
+ for (unsigned int d = 0; d < coord.num_dimensions(); ++d)
{
index += coord[d] * stride;
stride *= shape[d];
@@ -324,61 +198,23 @@ inline int coords2index(const TensorShape &shape, const Coordinates &coord)
return index;
}
-inline size_t get_data_layout_dimension_index(const DataLayout data_layout, const DataLayoutDimension data_layout_dimension)
+inline size_t get_data_layout_dimension_index(const DataLayout &data_layout,
+ const DataLayoutDimension &data_layout_dimension)
{
- ARM_COMPUTE_ERROR_ON_MSG(data_layout == DataLayout::UNKNOWN, "Cannot retrieve the dimension index for an unknown layout!");
-
- /* Return the index based on the data layout
- * [N C H W]
- * [3 2 1 0]
- * [N H W C]
- */
- switch(data_layout_dimension)
- {
- case DataLayoutDimension::CHANNEL:
- return (data_layout == DataLayout::NCHW) ? 2 : 0;
- break;
- case DataLayoutDimension::HEIGHT:
- return (data_layout == DataLayout::NCHW) ? 1 : 2;
- break;
- case DataLayoutDimension::WIDTH:
- return (data_layout == DataLayout::NCHW) ? 0 : 1;
- break;
- case DataLayoutDimension::BATCHES:
- return 3;
- break;
- default:
- break;
- }
- ARM_COMPUTE_ERROR("Data layout index not supported!");
+ ARM_COMPUTE_ERROR_ON_MSG(data_layout == DataLayout::UNKNOWN,
+ "Cannot retrieve the dimension index for an unknown layout!");
+ const auto &dims = get_layout_map().at(data_layout);
+ const auto &it = std::find(dims.cbegin(), dims.cend(), data_layout_dimension);
+ ARM_COMPUTE_ERROR_ON_MSG(it == dims.cend(), "Invalid dimension for the given layout.");
+ return it - dims.cbegin();
}
-inline DataLayoutDimension get_index_data_layout_dimension(const DataLayout data_layout, const size_t index)
+inline DataLayoutDimension get_index_data_layout_dimension(const DataLayout &data_layout, const size_t index)
{
- ARM_COMPUTE_ERROR_ON_MSG(data_layout == DataLayout::UNKNOWN, "Cannot retrieve the dimension index for an unknown layout!");
-
- /* Return the index based on the data layout
- * [N C H W]
- * [3 2 1 0]
- * [N H W C]
- */
- switch(index)
- {
- case 0:
- return (data_layout == DataLayout::NCHW) ? DataLayoutDimension::WIDTH : DataLayoutDimension::CHANNEL;
- break;
- case 1:
- return (data_layout == DataLayout::NCHW) ? DataLayoutDimension::HEIGHT : DataLayoutDimension::WIDTH;
- break;
- case 2:
- return (data_layout == DataLayout::NCHW) ? DataLayoutDimension::CHANNEL : DataLayoutDimension::HEIGHT;
- break;
- case 3:
- return DataLayoutDimension::BATCHES;
- break;
- default:
- ARM_COMPUTE_ERROR("Index value not supported!");
- break;
- }
+ ARM_COMPUTE_ERROR_ON_MSG(data_layout == DataLayout::UNKNOWN,
+ "Cannot retrieve the layout dimension for an unknown layout!");
+ const auto &dims = get_layout_map().at(data_layout);
+ ARM_COMPUTE_ERROR_ON_MSG(index >= dims.size(), "Invalid index for the given layout.");
+ return dims[index];
}
} // namespace arm_compute
diff --git a/arm_compute/core/IAccessWindow.h b/arm_compute/core/IAccessWindow.h
index 227d1c4bb2..9c9fb90915 100644
--- a/arm_compute/core/IAccessWindow.h
+++ b/arm_compute/core/IAccessWindow.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2019 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -100,7 +100,10 @@ public:
* @return a valid region.
*
*/
- virtual ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) const = 0;
+ virtual ValidRegion compute_valid_region(const Window &window,
+ ValidRegion input_valid_region,
+ bool border_undefined,
+ BorderSize border_size) const = 0;
};
/** Implementation of a rectangular access pattern. */
@@ -161,7 +164,10 @@ public:
* @param[in] border_undefined (Optional) Undefined borders are excluded from the valid region.
* @param[in] border_size (Optional) Size of the border around the XY-plane of the tensor.
*/
- void set_valid_region(const Window &window, const ValidRegion &input_valid_region, bool border_undefined = false, const BorderSize &border_size = BorderSize(0));
+ void set_valid_region(const Window &window,
+ const ValidRegion &input_valid_region,
+ bool border_undefined = false,
+ const BorderSize &border_size = BorderSize(0));
/** Compute the valid region based on access pattern, valid region of the inputs and border mode.
*
@@ -189,7 +195,10 @@ public:
* @return a valid region.
*
*/
- ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) const override;
+ ValidRegion compute_valid_region(const Window &window,
+ ValidRegion input_valid_region,
+ bool border_undefined,
+ BorderSize border_size) const override;
bool update_window_if_needed(Window &window) const override;
bool update_padding_if_needed(const Window &window) override;
diff --git a/arm_compute/core/IArray.h b/arm_compute/core/IArray.h
index c6a1499698..3471fc9a86 100644
--- a/arm_compute/core/IArray.h
+++ b/arm_compute/core/IArray.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,30 +25,24 @@
#define ARM_COMPUTE_IARRAY_H
#include "arm_compute/core/Error.h"
+
#include <cstddef>
#include <cstdint>
namespace arm_compute
{
-struct KeyPoint;
-struct Coordinates2D;
-struct DetectionWindow;
-class Size2D;
-
/** Array of type T */
template <class T>
class IArray
{
public:
/** Default constructor */
- IArray()
- : _num_values(0), _max_size(0) {};
+ IArray() : _num_values(0), _max_size(0){};
/** Constructor: initializes an array which can contain up to max_num_points values
*
* @param[in] max_num_values Maximum number of values the array will be able to stored
*/
- IArray(size_t max_num_values)
- : _num_values(0), _max_size(max_num_values)
+ IArray(size_t max_num_values) : _num_values(0), _max_size(max_num_values)
{
}
/** Maximum number of values which can be stored in this array
@@ -78,7 +72,7 @@ public:
bool push_back(const T &val)
{
ARM_COMPUTE_ERROR_ON(0 == _max_size);
- if(_num_values >= max_num_values())
+ if (_num_values >= max_num_values())
{
_num_values = max_num_values() + 1;
return false;
@@ -135,14 +129,6 @@ private:
size_t _num_values;
size_t _max_size;
};
-/** Interface for Array of Key Points. */
-using IKeyPointArray = IArray<KeyPoint>;
-/** Interface for Array of 2D Coordinates. */
-using ICoordinates2DArray = IArray<Coordinates2D>;
-/** Interface for Array of Detection Windows. */
-using IDetectionWindowArray = IArray<DetectionWindow>;
-/** Interface for Array of 2D Sizes. */
-using ISize2DArray = IArray<Size2D>;
/** Interface for Array of uint8s. */
using IUInt8Array = IArray<uint8_t>;
/** Interface for Array of uint16s. */
@@ -155,5 +141,5 @@ using IInt16Array = IArray<int16_t>;
using IInt32Array = IArray<int32_t>;
/** Interface for Array of floats. */
using IFloatArray = IArray<float>;
-}
+} // namespace arm_compute
#endif /* ARM_COMPUTE_IARRAY_H */
diff --git a/arm_compute/core/IDevice.h b/arm_compute/core/IDevice.h
index 5cffe646d4..12efa91e19 100644
--- a/arm_compute/core/IDevice.h
+++ b/arm_compute/core/IDevice.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020 ARM Limited.
+ * Copyright (c) 2020-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -34,7 +34,6 @@ enum class DeviceType
{
NEON,
CL,
- GLES
};
/** Interface for device object */
diff --git a/arm_compute/core/IDistribution.h b/arm_compute/core/IDistribution.h
deleted file mode 100644
index cd6f25fd47..0000000000
--- a/arm_compute/core/IDistribution.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_IDISTRIBUTION_H
-#define ARM_COMPUTE_IDISTRIBUTION_H
-
-#include <cstddef>
-#include <cstdint>
-
-namespace arm_compute
-{
-/** Interface for distribution objects */
-class IDistribution
-{
-public:
- /** Default virtual destructor */
- virtual ~IDistribution() = default;
- /** Returns the dimensions of the distribution.
- *
- * @note This is fixed to 1-dimensional distribution for now.
- * @return Dimensions of the distribution.
- */
- virtual size_t dimensions() const = 0;
- /** Returns the total size in bytes of the distribution.
- *
- * @return Total size of the distribution in bytes.
- */
- virtual size_t size() const = 0;
- /** Returns a pointer to the start of the distribution.
- * Other elements of the array can be accessed using buffer()[idx] for 0 <= idx < num_bins()
- *
- * @return Pointer to the start of the distribution.
- */
- virtual uint32_t *buffer() const = 0;
- /** Clears the distribution by setting every element to zero. */
- void clear() const;
-};
-}
-#endif /* ARM_COMPUTE_IDISTRIBUTION_H */
diff --git a/arm_compute/core/IDistribution1D.h b/arm_compute/core/IDistribution1D.h
deleted file mode 100644
index 081ba580db..0000000000
--- a/arm_compute/core/IDistribution1D.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_IDISTRIBUTION1D_H
-#define ARM_COMPUTE_IDISTRIBUTION1D_H
-
-#include "arm_compute/core/IDistribution.h"
-
-#include <cstddef>
-#include <cstdint>
-
-namespace arm_compute
-{
-/** 1D Distribution interface */
-class IDistribution1D : public IDistribution
-{
-public:
- /** Constructor: Creates a 1D Distribution of a consecutive interval [offset, offset + range - 1]
- * defined by a start offset and valid range, divided equally into num_bins parts.
- *
- * @param[in] num_bins The number of bins the distribution is divided in.
- * @param[in] offset The start of the values to use.
- * @param[in] range The total number of the consecutive values of the distribution interval.
- */
- IDistribution1D(size_t num_bins, int32_t offset, uint32_t range);
- /** Returns the number of bins that the distribution has.
- *
- * @return Number of bins of the distribution.
- */
- size_t num_bins() const;
- /** Returns the offset of the distribution.
- *
- * @return Offset of the distribution.
- */
- int32_t offset() const;
- /** Returns the range of the distribution.
- *
- * @return Range of the distribution.
- */
- uint32_t range() const;
- /** Returns the window of the distribution, which is the range divided by the number of bins.
- *
- * @note If range is not divided by the number of bins then it is invalid.
- *
- * @return Window of the distribution.
- */
- uint32_t window() const;
- /** Sets the range of the distribution.
- *
- * @param[in] range New range of the distribution to be set.
- */
- void set_range(uint32_t range);
-
- // Inherited methods overridden:
- size_t size() const override;
- size_t dimensions() const override;
-
-private:
- size_t _num_bins; /**< Number of bins. */
- int32_t _offset; /**< Offset, which indicate the start of the usable values. */
- uint32_t _range; /**< The total number of consecutive values of the distribution interval */
-};
-}
-#endif /* ARM_COMPUTE_IDISTRIBUTION1D_H */
diff --git a/arm_compute/core/IHOG.h b/arm_compute/core/IHOG.h
deleted file mode 100644
index bf8bd73087..0000000000
--- a/arm_compute/core/IHOG.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_IHOG_H
-#define ARM_COMPUTE_IHOG_H
-
-#include "arm_compute/core/Types.h"
-
-#include <cstddef>
-
-namespace arm_compute
-{
-class HOGInfo;
-/** Interface for HOG data-object */
-class IHOG
-{
-public:
- /** Interface to be implemented by the child class to return the HOG's metadata
- *
- * @return A pointer to the HOG's metadata.
- */
- virtual const HOGInfo *info() const = 0;
- /** Default virtual destructor */
- virtual ~IHOG() = default;
- /** Pointer to the first element of the array which stores the linear SVM coefficients of HOG descriptor
- *
- * @note Other elements of the array can be accessed using descriptor()[idx] for idx=[0, descriptor_size() - 1]
- *
- * @return A pointer to the first element of the array which stores the linear SVM coefficients of HOG descriptor
- */
- virtual float *descriptor() const = 0;
-};
-}
-#endif /* ARM_COMPUTE_IHOG_H */
diff --git a/arm_compute/core/IKernel.h b/arm_compute/core/IKernel.h
index cb1ddb1d7f..403a2c724e 100644
--- a/arm_compute/core/IKernel.h
+++ b/arm_compute/core/IKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -57,6 +57,11 @@ public:
* @return The maximum window the kernel can be executed on.
*/
const Window &window() const;
+ /** Function to check if the embedded window of this kernel has been configured
+ *
+ * @return True if the windows has been configured
+ */
+ bool is_window_configured() const;
protected:
/** Configure the kernel's window
@@ -68,5 +73,5 @@ protected:
private:
Window _window;
};
-}
+} // namespace arm_compute
#endif /*ARM_COMPUTE_IKERNEL_H */
diff --git a/arm_compute/core/ILut.h b/arm_compute/core/ILut.h
deleted file mode 100644
index d1a03af969..0000000000
--- a/arm_compute/core/ILut.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_ILUT_H
-#define ARM_COMPUTE_ILUT_H
-
-#include "arm_compute/core/Types.h"
-
-#include <cstddef>
-
-namespace arm_compute
-{
-/** Lookup Table object interface. */
-class ILut
-{
-public:
- /** Default virtual destructor */
- virtual ~ILut() = default;
- /** Returns the total number of elements in the LUT.
- *
- * @return Total number of elements.
- */
- virtual size_t num_elements() const = 0;
- /** Indicates the offset that needs to be applied to the raw index before performing a lookup in the LUT.
- *
- * @return The normalization offset.
- */
- virtual uint32_t index_offset() const = 0;
- /** Returns the total size in bytes of the LUT.
- *
- * @return Total size of the LUT in bytes.
- */
- virtual size_t size_in_bytes() const = 0;
- /** Returns the type of the LUT.
- *
- * @return The type of the LUT.
- */
- virtual DataType type() const = 0;
- /** Returns a pointer to the start of the LUT.
- * Other elements of the LUT can be accessed using buffer()[idx] for 0 <= idx < num_elements().
- *
- * @return Pointer to the start of the lut.
- */
- virtual uint8_t *buffer() const = 0;
- /** Clears the LUT by setting every element to zero. */
- virtual void clear() = 0;
-};
-}
-#endif /* ARM_COMPUTE_ILUT_H */
diff --git a/arm_compute/core/IMultiHOG.h b/arm_compute/core/IMultiHOG.h
deleted file mode 100644
index ab79fac154..0000000000
--- a/arm_compute/core/IMultiHOG.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_IMULTIHOG_H
-#define ARM_COMPUTE_IMULTIHOG_H
-
-#include "arm_compute/core/IHOG.h"
-
-#include <cstddef>
-
-namespace arm_compute
-{
-/** Interface for storing multiple HOG data-objects */
-class IMultiHOG
-{
-public:
- /** Default destructor */
- virtual ~IMultiHOG() = default;
- /** The number of HOG models stored
- *
- * @return The number of HOG models stored
- */
- virtual size_t num_models() const = 0;
- /** Return a pointer to the requested HOG model
- *
- * @param[in] index The index of the wanted HOG model.
- *
- * @return A pointer pointed to the HOG model
- */
- virtual IHOG *model(size_t index) = 0;
- /** Return a const pointer to the requested HOG model
- *
- * @param[in] index The index of the wanted HOG model.
- *
- * @return A const pointer pointed to the HOG model
- */
- virtual const IHOG *model(size_t index) const = 0;
-};
-}
-
-#endif /* ARM_COMPUTE_IMULTIHOG_H */
diff --git a/arm_compute/core/IMultiImage.h b/arm_compute/core/IMultiImage.h
deleted file mode 100644
index 3abdfed8a8..0000000000
--- a/arm_compute/core/IMultiImage.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_IMULTIIMAGE_H
-#define ARM_COMPUTE_IMULTIIMAGE_H
-
-namespace arm_compute
-{
-class ITensor;
-using IImage = ITensor;
-class MultiImageInfo;
-
-/** Interface for multi-planar images */
-class IMultiImage
-{
-public:
- /** Destructor */
- virtual ~IMultiImage() = default;
- /** Interface to be implemented by the child class to return the multi-planar image's metadata
- *
- * @return A pointer to the image's metadata.
- */
- virtual const MultiImageInfo *info() const = 0;
- /** Return a pointer to the requested plane of the image.
- *
- * @param[in] index The index of the wanted planed.
- *
- * @return A pointer pointed to the plane
- */
- virtual IImage *plane(unsigned int index) = 0;
- /** Return a constant pointer to the requested plane of the image.
- *
- * @param[in] index The index of the wanted planed.
- *
- * @return A constant pointer pointed to the plane
- */
- virtual const IImage *plane(unsigned int index) const = 0;
-};
-}
-#endif /*ARM_COMPUTE_IMULTIIMAGE_H */
diff --git a/arm_compute/core/IPyramid.h b/arm_compute/core/IPyramid.h
deleted file mode 100644
index b2a74656b6..0000000000
--- a/arm_compute/core/IPyramid.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_IPYRAMID_H
-#define ARM_COMPUTE_IPYRAMID_H
-
-#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/PyramidInfo.h"
-#include "arm_compute/core/Types.h"
-
-#include <cstddef>
-
-namespace arm_compute
-{
-/** Interface for pyramid data-object */
-class IPyramid
-{
-public:
- /** Default virtual destructor */
- virtual ~IPyramid() = default;
- /** Interface to be implemented by the child class to return the Pyramid's metadata
- *
- * @return A pointer to the Pyramid's metadata.
- */
- virtual const PyramidInfo *info() const = 0;
- /** Retrieves a level of the pyramid as a ITensor pointer
- *
- * @param[in] index The index of the level, such that index is less than levels.
- *
- * @return An ITensor pointer
- */
- virtual ITensor *get_pyramid_level(size_t index) const = 0;
-};
-}
-
-#endif /* ARM_COMPUTE_IPYRAMID_H */
diff --git a/arm_compute/core/ITensor.h b/arm_compute/core/ITensor.h
index 501279eb25..aad8313261 100644
--- a/arm_compute/core/ITensor.h
+++ b/arm_compute/core/ITensor.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -32,7 +32,7 @@ namespace arm_compute
{
class Coordinates;
-/** Interface for NEON tensor */
+/** Interface for CPU tensor */
class ITensor
{
public:
@@ -90,11 +90,13 @@ public:
bool is_used() const;
/** Marks a tensor as unused */
void mark_as_unused() const;
+ /** Marks a tensor as used */
+ void mark_as_used() const;
private:
- mutable bool _is_used = { true }; /**< Flag that marks if the tensor is used or not */
+ mutable bool _is_used = {true}; /**< Flag that marks if the tensor is used or not */
};
using IImage = ITensor;
-}
+} // namespace arm_compute
#endif /*ARM_COMPUTE_ITENSOR_H */
diff --git a/arm_compute/core/ITensorInfo.h b/arm_compute/core/ITensorInfo.h
index f2b4c155aa..c42f4b57a1 100644
--- a/arm_compute/core/ITensorInfo.h
+++ b/arm_compute/core/ITensorInfo.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -28,18 +28,46 @@
#include "arm_compute/core/Strides.h"
#include "arm_compute/core/TensorShape.h"
#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/utils/misc/ICloneable.h"
#include "arm_compute/core/utils/misc/Utility.h"
+#include "support/ICloneable.h"
+
#include <cstddef>
namespace arm_compute
{
+class QuantizationInfo;
+// Note: Any changes to the fields of the class below that have setters should be mirrored
+// (if possible) in the auto_init_if_empty function in AutoConfiguration.h
+
/** Store the tensor's metadata */
class ITensorInfo : public misc::ICloneable<ITensorInfo>
{
public:
+ using TensorDimsState = std::vector<int>;
+ /** An id that uniquely identifies an ITensorInfo within some domain (e.g. a workload)
+ */
+ using Id = int32_t;
+ /** An invalid tensor id within a domain */
+ static constexpr Id invalid_tensor_id = 0;
+ /** Get the value representing dynamic dimension state
+ *
+ * @return Value representing dynamic dimension state
+ *
+ */
+ static constexpr int32_t get_dynamic_state_value()
+ {
+ return _dynamic_dimension;
+ }
+ /** Get the value representing static dimension state
+ *
+ * @return Value representing static dimension state
+ *
+ */
+ static constexpr int32_t get_static_state_value()
+ {
+ return _static_dimension;
+ }
/** Default virtual destructor */
virtual ~ITensorInfo() = default;
/** Set the data type to the specified value.
@@ -81,6 +109,17 @@ public:
* @return Reference to this ITensorInfo object
*/
virtual ITensorInfo &set_tensor_shape(const TensorShape &shape) = 0;
+ /** Set the state for each dimension of the tensor
+ *
+ * This sets the state of each dimension of the shape in terms of dynamic behavior using -1 where appropriate.
+ * The index in the state is a 1 to 1 mapping with the shape dimension index.
+ * For example if you want to express [?, 3, 3] as a dynamic input then [-1, 3, 3] has to be set as a state
+ *
+ * @param[in] state Tensor dimensions state
+ *
+ * @return Reference to this ITensorInfo object
+ */
+ virtual ITensorInfo &set_tensor_dims_state(const TensorDimsState &state) = 0;
/** Set the quantization settings (scale and offset) of the tensor.
*
* @param[in] quantization_info QuantizationInfo containing the scale and offset
@@ -107,6 +146,17 @@ public:
* @return True if the strides or the offset to the first element have changed.
*/
virtual bool auto_padding() = 0;
+ /** Set the lock paddings flag of the tensor.
+ * It should be set to True, when the tensor could be mapped to camera or frame buffer.
+ *
+ * @return Reference to this ITensorInfo object
+ */
+ virtual ITensorInfo &set_lock_paddings(bool flag) = 0;
+ /** Get the lock paddings flag value
+ *
+ * @return lock paddings flag value
+ */
+ virtual bool lock_paddings() const = 0;
/** Update the offset to the first element, the strides and the total size.
*
* @note This function can only increase the offset, strides and total size.
@@ -170,6 +220,11 @@ public:
* @return A vector with the size for each dimension of the tensor
*/
virtual const TensorShape &tensor_shape() const = 0;
+ /** State of each dimension of the tensor shape
+ *
+ * @return A vector with the state for each dimension of the tensor, where -1 specifies dynamic dimension
+ */
+ virtual const TensorDimsState &tensor_dims_state() const = 0;
/** Data type used for each element of the tensor
*
* @return Tensor data type
@@ -205,6 +260,11 @@ public:
* @return True if its dynamic else false
*/
virtual bool is_dynamic() const = 0;
+ /** Flag indicating whether the values of the tensor are constant, meaning that they can change on kernel/function execution.
+ *
+ * @return True if values are constant else false
+ */
+ virtual bool are_values_constant() const = 0;
/** Set the flag whether the tensor size can be changed.
*
* @param[in] is_resizable Flag that marks the tensor if it can be changed or not.
@@ -212,13 +272,13 @@ public:
* @return Reference to this ITensorInfo object
*/
virtual ITensorInfo &set_is_resizable(bool is_resizable) = 0;
- /** Set the flag whether the tensor size is dynamic.
+ /** Set the flag whether the tensor values can change during kernel/function execution.
*
- * @param[in] is_dynamic Flag that marks the tensor if it's dynamic.
+ * @param[in] are_values_constant Flag that marks the tensor values if they can be changed or not.
*
* @return Reference to this ITensorInfo object
*/
- virtual ITensorInfo &set_is_dynamic(bool is_dynamic) = 0;
+ virtual ITensorInfo &set_are_values_constant(bool are_values_constant) = 0;
/** Valid region of the tensor. All elements in the valid region have defined values, i.e. are not undefined.
*
* @return The valid region.
@@ -240,7 +300,20 @@ public:
* @return A DataLayout containing the layout data information.
*/
virtual DataLayout data_layout() const = 0;
-
+ /** Get the workload tensor id of the tensor.
+ *
+ * @return Workload tensor id of the tensor
+ */
+ virtual Id id() const = 0;
+ /** Set the tensor id
+ */
+ virtual ITensorInfo &set_id(ITensorInfo::Id id) = 0;
+ /** Check if the tensor id is valid
+ */
+ bool has_valid_id() const
+ {
+ return id() != invalid_tensor_id;
+ }
/** If infos are broadcast compatible tensor info's, return the broadcasted shape and the intersection of
* the broadcasted valid regions of the tensors.
*
@@ -256,23 +329,23 @@ public:
* not broadcast compatible.
*/
template <typename... Infos>
- static std::pair<TensorShape, ValidRegion> broadcast_shape_and_valid_region(const Infos &... infos)
+ static std::pair<TensorShape, ValidRegion> broadcast_shape_and_valid_region(const Infos &...infos)
{
TensorShape bc_shape = TensorShape::broadcast_shape(infos.tensor_shape()...);
- ValidRegion bc_valid_region{ Coordinates(), bc_shape };
+ ValidRegion bc_valid_region{Coordinates(), bc_shape};
- auto broadcast_valid_region = [&bc_valid_region](const ITensorInfo & info)
+ auto broadcast_valid_region = [&bc_valid_region](const ITensorInfo &info)
{
- if(info.num_dimensions() != 0)
+ if (info.num_dimensions() != 0)
{
- for(size_t d = 0; d < bc_valid_region.shape.num_dimensions(); ++d)
+ for (size_t d = 0; d < bc_valid_region.shape.num_dimensions(); ++d)
{
const bool is_broadcast = (info.tensor_shape()[d] == 1);
const int anchor_max = std::max(bc_valid_region.anchor[d], info.valid_region().anchor[d]);
const size_t valid_min = std::min(bc_valid_region.shape[d], info.valid_region().shape[d]);
- if(!is_broadcast || (valid_min == 0))
+ if (!is_broadcast || (valid_min == 0))
{
bc_valid_region.anchor.set(d, anchor_max);
bc_valid_region.shape.set(d, valid_min);
@@ -285,6 +358,10 @@ public:
return std::pair<TensorShape, ValidRegion>(bc_shape, bc_valid_region);
}
+
+private:
+ static constexpr int32_t _dynamic_dimension = -1;
+ static constexpr int32_t _static_dimension = 0;
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_TENSORINFO_H */
diff --git a/arm_compute/core/ITensorPack.h b/arm_compute/core/ITensorPack.h
new file mode 100644
index 0000000000..f456c50769
--- /dev/null
+++ b/arm_compute/core/ITensorPack.h
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2020-2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_ITENSORPACK_H
+#define ARM_COMPUTE_ITENSORPACK_H
+
+#include "arm_compute/core/experimental/Types.h"
+
+#include <cstddef>
+#include <cstdint>
+#include <unordered_map>
+
+namespace arm_compute
+{
+// Forward declaration
+class ITensor;
+
+/** Tensor packing service */
+class ITensorPack
+{
+public:
+ struct PackElement
+ {
+ PackElement() = default;
+ PackElement(int id, ITensor *tensor) : id(id), tensor(tensor), ctensor(nullptr)
+ {
+ }
+ PackElement(int id, const ITensor *ctensor) : id(id), tensor(nullptr), ctensor(ctensor)
+ {
+ }
+
+ int id{-1};
+ ITensor *tensor{nullptr};
+ const ITensor *ctensor{nullptr};
+ };
+
+public:
+ /** Default Constructor */
+ ITensorPack() = default;
+ /** Initializer list Constructor */
+ ITensorPack(std::initializer_list<PackElement> l);
+ /** Add tensor to the pack
+ *
+ * @param[in] id ID/type of the tensor to add
+ * @param[in] tensor Tensor to add
+ */
+ void add_tensor(int id, ITensor *tensor);
+
+ /** Add const tensor to the pack
+ *
+ * @param[in] id ID/type of the tensor to add
+ * @param[in] tensor Tensor to add
+ */
+ void add_tensor(int id, const ITensor *tensor);
+
+ /** Add const tensor to the pack
+ *
+ * @param[in] id ID/type of the tensor to add
+ * @param[in] tensor Tensor to add
+ */
+ void add_const_tensor(int id, const ITensor *tensor);
+ /** Get tensor of a given id from the pac
+ *
+ * @param[in] id ID of tensor to extract
+ *
+ * @return The pointer to the tensor if exist and is non-const else nullptr
+ */
+ ITensor *get_tensor(int id);
+ /** Get constant tensor of a given id
+ *
+ * @param[in] id ID of tensor to extract
+ *
+ * @return The pointer to the tensor if exist and is const else nullptr
+ */
+ const ITensor *get_const_tensor(int id) const;
+ /** Remove the tensor stored with the given id
+ *
+ * @param[in] id ID of tensor to remove
+ */
+ void remove_tensor(int id);
+ /** Pack size accessor
+ *
+ * @return Number of tensors registered to the pack
+ */
+ size_t size() const;
+ /** Checks if pack is empty
+ *
+ * @return True if empty else false
+ */
+ bool empty() const;
+
+private:
+ std::unordered_map<int, PackElement> _pack{}; /**< Container with the packed tensors */
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_ITENSORPACK_H */
diff --git a/arm_compute/core/KernelDescriptors.h b/arm_compute/core/KernelDescriptors.h
index de08288dec..168a06a55c 100644
--- a/arm_compute/core/KernelDescriptors.h
+++ b/arm_compute/core/KernelDescriptors.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 ARM Limited.
+ * Copyright (c) 2019-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,113 +21,139 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_CORE_KERNEL_DESCRIPTORS_H
-#define ARM_COMPUTE_CORE_KERNEL_DESCRIPTORS_H
+#ifndef ACL_ARM_COMPUTE_CORE_KERNELDESCRIPTORS_H
+#define ACL_ARM_COMPUTE_CORE_KERNELDESCRIPTORS_H
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/Types.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
namespace arm_compute
{
/** Descriptor for FFT scale kernels */
struct FFTScaleKernelInfo
{
- float scale{ 0.f }; /**< Axis to perform the kernel on. */
- bool conjugate{ true }; /**< Flag to conjugate the output/ */
+ float scale{0.f}; /**< Axis to perform the kernel on. */
+ bool conjugate{true}; /**< Flag to conjugate the output/ */
};
/** Descriptor for FFT digit reverse kernels */
struct FFTDigitReverseKernelInfo
{
- unsigned int axis{ 0 }; /**< Axis to perform the kernel on. */
- bool conjugate{ false }; /**< Flag to conjugate the output/ */
+ unsigned int axis{0}; /**< Axis to perform the kernel on. */
+ bool conjugate{false}; /**< Flag to conjugate the output/ */
};
/** Descriptor used by the FFT core kernels */
struct FFTRadixStageKernelInfo
{
- unsigned int axis{ 0 }; /**< Axis to run the kernel on. */
- unsigned int radix{ 0 }; /**< Radix to use. */
- unsigned int Nx{ 0 }; /**< Nx coefficient. */
- bool is_first_stage{ false }; /**< Flags if the FFT kernels is the first stage of a decomposed FFT. */
+ unsigned int axis{0}; /**< Axis to run the kernel on. */
+ unsigned int radix{0}; /**< Radix to use. */
+ unsigned int Nx{0}; /**< Nx coefficient. */
+ bool is_first_stage{false}; /**< Flags if the FFT kernels is the first stage of a decomposed FFT. */
};
+class ITensorInfo;
/** Descriptor used by the GEMM kernels */
struct GEMMKernelInfo
{
GEMMKernelInfo() = default;
- GEMMKernelInfo(
- unsigned int im,
- unsigned int in,
- unsigned int ik,
- unsigned int idepth_output_gemm3d,
- bool ireinterpret_input_as_3d,
- bool ibroadcast_bias,
- bool ifp_mixed_precision,
- ActivationLayerInfo iactivation_info,
- int inmult_transpose1xW_width,
- int imult_interleave4x4_height,
- GEMMLHSMatrixInfo ilhs_info,
- GEMMRHSMatrixInfo irhs_info,
- int32_t ina_offset,
- int32_t inb_offset)
- : m(im), n(in), k(ik), depth_output_gemm3d(idepth_output_gemm3d), reinterpret_input_as_3d(ireinterpret_input_as_3d), broadcast_bias(ibroadcast_bias), fp_mixed_precision(ifp_mixed_precision),
- activation_info(iactivation_info), mult_transpose1xW_width(inmult_transpose1xW_width), mult_interleave4x4_height(imult_interleave4x4_height), lhs_info(ilhs_info), rhs_info(irhs_info),
- a_offset(ina_offset), b_offset(inb_offset)
+ GEMMKernelInfo(unsigned int im,
+ unsigned int in,
+ unsigned int ik,
+ unsigned int idepth_output_gemm3d,
+ bool ireinterpret_input_as_3d,
+ bool ibroadcast_bias,
+ bool ifp_mixed_precision,
+ bool ihas_pad_y,
+ ActivationLayerInfo iactivation_info,
+ int inmult_transpose1xW_width,
+ int imult_interleave4x4_height,
+ GEMMLHSMatrixInfo ilhs_info,
+ GEMMRHSMatrixInfo irhs_info,
+ int32_t ina_offset,
+ int32_t inb_offset)
+ : m(im),
+ n(in),
+ k(ik),
+ depth_output_gemm3d(idepth_output_gemm3d),
+ reinterpret_input_as_3d(ireinterpret_input_as_3d),
+ broadcast_bias(ibroadcast_bias),
+ fp_mixed_precision(ifp_mixed_precision),
+ has_pad_y(ihas_pad_y),
+ activation_info(iactivation_info),
+ mult_transpose1xW_width(inmult_transpose1xW_width),
+ mult_interleave4x4_height(imult_interleave4x4_height),
+ lhs_info(ilhs_info),
+ rhs_info(irhs_info),
+ a_offset(ina_offset),
+ b_offset(inb_offset)
{
}
- unsigned int m{ 0 }; /**< Number of LHS rows*/
- unsigned int n{ 0 }; /**< Number of RHS columns*/
- unsigned int k{ 0 }; /**< Number of LHS columns or RHS rows */
- unsigned int depth_output_gemm3d{ 0 }; /**< Depth of the output tensor in case is reinterpreted as 3D */
- bool reinterpret_input_as_3d{ false }; /**< Flag used to reinterpret the input as 3D */
- bool broadcast_bias{ false }; /**< Flag used to broadcast the bias addition */
- bool fp_mixed_precision{ false }; /**< Flag used to indicate wider accumulators (32 bit instead of 16 for FP16). */
- ActivationLayerInfo activation_info{}; /**< Activation function to perform after the matrix multiplication */
- int mult_transpose1xW_width{ 1 }; /**< Multiplication factor for the width of the 1xW transposed block */
- int mult_interleave4x4_height{ 1 }; /**< Multiplication factor for the height of the 4x4 interleaved block */
- GEMMLHSMatrixInfo lhs_info{}; /**< LHS matrix information used to retrieve the number of rows processed by each thread */
- GEMMRHSMatrixInfo rhs_info{}; /**< RHS matrix information used for reshaping the RHS matrix */
- int32_t a_offset{ 0 }; /**< Offset to be added to each element of the matrix A */
- int32_t b_offset{ 0 }; /**< Offset to be added to each element of the matrix B */
- GEMMLowpOutputStageInfo output_stage{}; /**< GEMMLowp output stage information */
+ unsigned int m{0}; /**< Number of LHS rows*/
+ unsigned int n{0}; /**< Number of RHS columns*/
+ unsigned int k{0}; /**< Number of LHS columns or RHS rows */
+ unsigned int depth_output_gemm3d{0}; /**< Depth of the output tensor in case is reinterpreted as 3D */
+ bool reinterpret_input_as_3d{false}; /**< Flag used to reinterpret the input as 3D */
+ bool broadcast_bias{false}; /**< Flag used to broadcast the bias addition */
+ bool fp_mixed_precision{false}; /**< Flag used to indicate wider accumulators (32 bit instead of 16 for FP16). */
+ bool has_pad_y{
+ false}; /**< Flag used to indicate if the input/output tensors have internal pad on the y direction */
+ ActivationLayerInfo activation_info{}; /**< Activation function to perform after the matrix multiplication */
+ int mult_transpose1xW_width{1}; /**< Multiplication factor for the width of the 1xW transposed block */
+ int mult_interleave4x4_height{1}; /**< Multiplication factor for the height of the 4x4 interleaved block */
+ GEMMLHSMatrixInfo
+ lhs_info{}; /**< LHS matrix information used to retrieve the number of rows processed by each thread */
+ GEMMRHSMatrixInfo rhs_info{}; /**< RHS matrix information used for reshaping the RHS matrix */
+ int32_t a_offset{0}; /**< Offset to be added to each element of the matrix A */
+ int32_t b_offset{0}; /**< Offset to be added to each element of the matrix B */
+ GEMMLowpOutputStageInfo output_stage{}; /**< GEMMLowp output stage information */
};
-/** Descriptor used by the depthwise convolution kernels */
-struct DWCKernelInfo
+/** Compute descriptor used by the depthwise convolution native kernel */
+struct DWCComputeKernelInfo
{
- ActivationLayerInfo activation_info{}; /**< Activation function to perform after the depthwise convolution */
+ unsigned int n0{1}; /**< Number of columns processed by each thread */
+ unsigned int m0{1}; /**< Number of rows processed by each thread */
+ bool export_input_to_cl_image{false}; /**< Export input to cl_image */
+ bool export_weights_to_cl_image{false}; /**< Export the weights to cl_image */
};
-/** Descriptor used by the depthwise convolution kernels to retrieve the number of output elements processed by each thread */
-struct DWCWeightsKernelInfo
+/** Compute descriptor used by the direct convolution kernel */
+struct DirectConvComputeKernelInfo
{
- unsigned int n0{ 0 }; /**< Number of columns processed by each thread */
+ int32_t m0{1}; /**< Number of rows to be processed by the kernel */
+ int32_t n0{1}; /**< Number of columns to be processed by the kernel */
+ int32_t k0{1}; /**< Number of partial accumulations to be processed in a single iteration by the kernel */
+ bool export_weights_to_cl_image{false}; /**< Flag to export the weights to cl_image */
+ bool export_output_to_cl_image{false}; /**< Flag to export the output to cl_image */
+ bool export_input_to_cl_image{false}; /**< Flag to export the input to cl_image */
};
/** Descriptor used by the softmax kernels */
struct SoftmaxKernelInfo
{
- float beta{ 1.f }; /**< A scaling factor for the exponent with default value 1.0 */
- bool is_log{ false }; /**< Flag used to perform Log Softmax operation */
- DataType input_data_type{ DataType::UNKNOWN }; /**< Input tensor data type */
+ float beta{1.f}; /**< A scaling factor for the exponent with default value 1.0 */
+ bool is_log{false}; /**< Flag used to perform Log Softmax operation */
+ DataType input_data_type{DataType::UNKNOWN}; /**< Input tensor data type */
+ int32_t axis{0}; /**< The dimension in which to apply softmax. */
};
/** Descriptor used by the direct convolution layer output stage kernels */
struct DirectConvolutionLayerOutputStageKernelInfo
{
- int32_t result_fixedpoint_multiplier{ 0 }; /**< Result output stage multiplier used for quantizing */
- int32_t result_shift{ 0 }; /**< Result output stage shift used for quantizing */
- int32_t result_offset_after_shift{ 0 }; /**< Result offset used for quantizing */
- DataType output_data_type{ DataType::UNKNOWN }; /**< Output tensor data type to use if the output is not initialized */
+ int32_t result_fixedpoint_multiplier{0}; /**< Result output stage multiplier used for quantizing */
+ int32_t result_shift{0}; /**< Result output stage shift used for quantizing */
+ int32_t result_offset_after_shift{0}; /**< Result offset used for quantizing */
+ DataType output_data_type{
+ DataType::UNKNOWN}; /**< Output tensor data type to use if the output is not initialized */
};
struct InstanceNormalizationLayerKernelInfo
{
/** Default constructor */
- InstanceNormalizationLayerKernelInfo()
- : InstanceNormalizationLayerKernelInfo(1.f, 0.f, 1e-12, true)
+ InstanceNormalizationLayerKernelInfo() : InstanceNormalizationLayerKernelInfo(1.f, 0.f, 1e-12, true)
{
}
/** Constructor
@@ -164,10 +190,10 @@ struct GEMMLowpReductionKernelInfo
{
}
- int32_t k{ 0 }; /**< Number of matrix columns/rows */
- bool is_reshaped{ false }; /**< True if the input tensor has been reshaped */
- int32_t scalar{ 0 }; /**< Scalar value to multiply each reduced column/row by */
- bool mul_by_scalar{ false }; /**< True if each column/row reduction has to be multiplied by a scalar value */
+ int32_t k{0}; /**< Number of matrix columns/rows */
+ bool is_reshaped{false}; /**< True if the input tensor has been reshaped */
+ int32_t scalar{0}; /**< Scalar value to multiply each reduced column/row by */
+ bool mul_by_scalar{false}; /**< True if each column/row reduction has to be multiplied by a scalar value */
};
struct ScaleKernelInfo
@@ -180,19 +206,22 @@ struct ScaleKernelInfo
* @param[in] sampling_policy (Optional) Sampling policy used by the interpolation. Defaults to @ref SamplingPolicy::CENTER
* @param[in] use_padding (Optional) Is padding in use or not. Defaults to true.
* @param[in] align_corners (Optional) Align corners of input and output, only affecting bilinear policy with TOP_LEFT sampling policy. Defaults to false.
+ * @param[in] data_layout (Optional) Data layout used by the layer. Defaults to @ref DataLayout::UNKNOWN
*/
ScaleKernelInfo(InterpolationPolicy interpolation_policy,
BorderMode border_mode,
PixelValue constant_border_value = PixelValue(),
SamplingPolicy sampling_policy = SamplingPolicy::CENTER,
bool use_padding = true,
- bool align_corners = false)
- : interpolation_policy{ interpolation_policy },
- border_mode{ border_mode },
- constant_border_value{ constant_border_value },
- sampling_policy{ sampling_policy },
- use_padding{ use_padding },
- align_corners{ align_corners }
+ bool align_corners = false,
+ DataLayout data_layout = DataLayout::UNKNOWN) noexcept
+ : interpolation_policy{interpolation_policy},
+ border_mode{border_mode},
+ constant_border_value{constant_border_value},
+ sampling_policy{sampling_policy},
+ use_padding{use_padding},
+ align_corners{align_corners},
+ data_layout{data_layout}
{
}
@@ -202,6 +231,23 @@ struct ScaleKernelInfo
SamplingPolicy sampling_policy; /**< Sampling policy used by the interpolation. */
bool use_padding; /**< Indication of using padding */
bool align_corners; /**< Align corners of input and output */
+ DataLayout data_layout; /**< Data layout to use */
+};
+
+struct MatMulKernelInfo
+{
+ MatMulKernelInfo() = default;
+ MatMulKernelInfo(
+ bool adj_lhs, bool adj_rhs, int m0 = 1, int n0 = 1, int k0 = 1, bool export_rhs_to_cl_image = false)
+ : adj_lhs{adj_lhs}, adj_rhs{adj_rhs}, m0{m0}, n0{n0}, k0{k0}, export_rhs_to_cl_image{export_rhs_to_cl_image}
+ {
+ }
+ bool adj_lhs{false}; /**< Get Adjoint LHS flag value */
+ bool adj_rhs{false}; /**< Get Adjoint RHS flag value */
+ int m0{1}; /**< Number of output rows processed by each work-item*/
+ int n0{1}; /**< Number of output columns processed by each work-item*/
+ int k0{1}; /**< Number of inner accumulations */
+ bool export_rhs_to_cl_image{false}; /**< Flag to know whether the RHS tensor should be exported to cl_image*/
};
} // namespace arm_compute
-#endif /* ARM_COMPUTE_CORE_KERNEL_DESCRIPTORS_H */
+#endif // ACL_ARM_COMPUTE_CORE_KERNELDESCRIPTORS_H
diff --git a/arm_compute/core/Log.h b/arm_compute/core/Log.h
index 1515557f4c..03b861f765 100644
--- a/arm_compute/core/Log.h
+++ b/arm_compute/core/Log.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2019 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -34,11 +34,11 @@
#define ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER() \
do \
{ \
- if(arm_compute::logging::LoggerRegistry::get().logger("CORE") == nullptr) \
+ if (arm_compute::logging::LoggerRegistry::get().logger("CORE") == nullptr) \
{ \
arm_compute::logging::LoggerRegistry::get().create_reserved_loggers(); \
} \
- } while(false)
+ } while (false)
#else /* ARM_COMPUTE_LOGGING_ENABLED */
#define ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER()
#endif /* ARM_COMPUTE_LOGGING_ENABLED */
@@ -53,7 +53,7 @@
{ \
ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER(); \
ARM_COMPUTE_LOG_MSG("CORE", log_level, msg); \
- } while(false)
+ } while (false)
/** Log a message with format to the core system logger
*
@@ -66,7 +66,7 @@
{ \
ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER(); \
ARM_COMPUTE_LOG_MSG_WITH_FORMAT("CORE", log_level, fmt, __VA_ARGS__); \
- } while(false)
+ } while (false)
/** Log a stream to the core system logger
*
@@ -78,7 +78,7 @@
{ \
ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER(); \
ARM_COMPUTE_LOG_STREAM("CORE", log_level, ss); \
- } while(false)
+ } while (false)
/** Log information level message to the core system logger
*
@@ -89,7 +89,7 @@
{ \
ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER(); \
ARM_COMPUTE_LOG_MSG_CORE(arm_compute::logging::LogLevel::INFO, msg); \
- } while(false)
+ } while (false)
/** Log information level formatted message to the core system logger
*
@@ -101,7 +101,7 @@
{ \
ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER(); \
ARM_COMPUTE_LOG_MSG_WITH_FORMAT_CORE(arm_compute::logging::LogLevel::INFO, #fmt, __VA_ARGS__); \
- } while(false)
+ } while (false)
/** Log information level stream to the core system logger
*
@@ -112,6 +112,6 @@
{ \
ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER(); \
ARM_COMPUTE_LOG_STREAM_CORE(arm_compute::logging::LogLevel::INFO, ss); \
- } while(false)
+ } while (false)
#endif /* ARM_COMPUTE_LOGGING_MACROS_H */
diff --git a/arm_compute/core/MultiImageInfo.h b/arm_compute/core/MultiImageInfo.h
deleted file mode 100644
index fcd7ba744d..0000000000
--- a/arm_compute/core/MultiImageInfo.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_MULTIIMAGEINFO_H
-#define ARM_COMPUTE_MULTIIMAGEINFO_H
-
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-/** Store the multi-planar image's metadata */
-class MultiImageInfo
-{
-public:
- /** Constructor */
- MultiImageInfo();
- /** Initialize the metadata structure with the given parameters
- *
- * @param[in] width Width of the image (in number of pixels)
- * @param[in] height Height of the image (in number of pixels)
- * @param[in] format Colour format of the image.
- */
- void init(unsigned int width, unsigned int height, Format format);
- /** Colour format of the image
- *
- * @return Colour format of the image
- */
- Format format() const;
- /** Width in pixels
- *
- * @return The width in pixels
- */
- unsigned int width() const;
- /** Height in pixels
- *
- * @return The height in pixels
- */
- unsigned int height() const;
-
-protected:
- unsigned int _width;
- unsigned int _height;
- Format _format;
-};
-}
-#endif /*ARM_COMPUTE_MULTIIMAGEINFO_H */
diff --git a/arm_compute/core/NEON/INESimpleKernel.h b/arm_compute/core/NEON/INESimpleKernel.h
deleted file mode 100644
index 5d9c1ec1e2..0000000000
--- a/arm_compute/core/NEON/INESimpleKernel.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_INESIMPLEKERNEL_H
-#define ARM_COMPUTE_INESIMPLEKERNEL_H
-
-#include "arm_compute/core/CPP/ICPPSimpleKernel.h"
-
-namespace arm_compute
-{
-/** Interface for simple NEON kernels having 1 tensor input and 1 tensor output */
-using INESimpleKernel = ICPPSimpleKernel;
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_INESIMPLEKERNEL_H */
diff --git a/arm_compute/core/NEON/NEAsymm.h b/arm_compute/core/NEON/NEAsymm.h
deleted file mode 100644
index e4f4250d16..0000000000
--- a/arm_compute/core/NEON/NEAsymm.h
+++ /dev/null
@@ -1,760 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEASYMM_H
-#define ARM_COMPUTE_NEASYMM_H
-
-#include "arm_compute/core/NEON/NEMath.h"
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-using qasymm8x8_t = uint8x8_t; /**< 8 bit quantized asymmetric vector with 8 elements */
-using qasymm8x8x2_t = uint8x8x2_t; /**< 8 bit quantized asymmetric vector with 16 elements */
-using qasymm8x8x3_t = uint8x8x3_t; /**< 8 bit quantized asymmetric vector with 24 elements */
-using qasymm8x8x4_t = uint8x8x4_t; /**< 8 bit quantized asymmetric vector with 32 elements */
-using qasymm8x16_t = uint8x16_t; /**< 8 bit quantized asymmetric vector with 16 elements */
-
-using qasymm8x8_signed_t = int8x8_t; /**< 8 bit quantized signed asymmetric vector with 8 elements */
-using qasymm8x8x2_signed_t = int8x8x2_t; /**< 8 bit quantized signed asymmetric vector with 16 elements */
-using qasymm8x8x3_signed_t = int8x8x3_t; /**< 8 bit quantized signed asymmetric vector with 24 elements */
-using qasymm8x8x4_signed_t = int8x8x4_t; /**< 8 bit quantized signed asymmetric vector with 32 elements */
-using qasymm8x16_signed_t = int8x16_t; /**< 8 bit quantized signed asymmetric vector with 16 elements */
-
-/** Perform a multiply-accumulate on all 16 components of a QASYMM8 vector
- *
- * vd*vs + vo
- *
- * @param[in] vd Input vector value in QASYMM8 format
- * @param[in] vs Vector multiplier in F32 format. The multiplier value must be duplicated across all four lanes.
- * @param[in] vo Vector addend in F32 format. The addend value must be duplicated across all four lanes.
- *
- * @return A 16-component vector in QASYMM8 format, saturated to fit
- */
-uint8x16_t vmlaq_qasymm8(qasymm8x16_t vd, float32x4_t vs, float32x4_t vo);
-
-/** Perform a multiply-accumulate on all 16 components of a QASYMM8_SIGNED vector
- *
- * vd*vs + vo
- *
- * @param[in] vd Input vector value in QASYMM8_SIGNED format
- * @param[in] vs Vector multiplier in F32 format. The multiplier value must be duplicated across all four lanes.
- * @param[in] vo Vector addend in F32 format. The addend value must be duplicated across all four lanes.
- *
- * @return A 16-component vector in QASYMM8_SIGNED format, saturated to fit
- */
-int8x16_t vmlaq_qasymm8_signed(qasymm8x16_signed_t vd, float32x4_t vs, float32x4_t vo);
-
-/** Performs final quantization step on 16 elements
- *
- * @tparam is_bounded_relu Specified if a fused bounded relu should be applied
- *
- * @param in_s32 Input to be quantized.
- * @param result_fixedpoint_multiplier Result multiplier parameter
- * @param result_shift Result shift parameter
- * @param result_offset_after_shift_s32 Result offset parameter
- * @param min_u8 Relu lower bound
- * @param max_u8 Relu upper bound
- *
- * @return Quantized values
- */
-template <bool is_bounded_relu>
-uint8x16_t finalize_quantization(int32x4x4_t &in_s32,
- int result_fixedpoint_multiplier,
- int32_t result_shift,
- int32x4_t result_offset_after_shift_s32,
- uint8x16_t min_u8,
- uint8x16_t max_u8)
-{
- const static int32x4_t zero_s32 = vdupq_n_s32(0);
-
- if(result_shift < 0)
- {
- in_s32.val[0] = vmulq_n_s32(in_s32.val[0], (1 << (-result_shift)));
- in_s32.val[1] = vmulq_n_s32(in_s32.val[1], (1 << (-result_shift)));
- in_s32.val[2] = vmulq_n_s32(in_s32.val[2], (1 << (-result_shift)));
- in_s32.val[3] = vmulq_n_s32(in_s32.val[3], (1 << (-result_shift)));
-
- in_s32.val[0] = vqrdmulhq_n_s32(in_s32.val[0], result_fixedpoint_multiplier);
- in_s32.val[1] = vqrdmulhq_n_s32(in_s32.val[1], result_fixedpoint_multiplier);
- in_s32.val[2] = vqrdmulhq_n_s32(in_s32.val[2], result_fixedpoint_multiplier);
- in_s32.val[3] = vqrdmulhq_n_s32(in_s32.val[3], result_fixedpoint_multiplier);
- }
- else
- {
- // Fixed point multiplication with vector saturating rounding doubling multiply high with scalar
- in_s32.val[0] = vqrdmulhq_n_s32(in_s32.val[0], result_fixedpoint_multiplier);
- in_s32.val[1] = vqrdmulhq_n_s32(in_s32.val[1], result_fixedpoint_multiplier);
- in_s32.val[2] = vqrdmulhq_n_s32(in_s32.val[2], result_fixedpoint_multiplier);
- in_s32.val[3] = vqrdmulhq_n_s32(in_s32.val[3], result_fixedpoint_multiplier);
-
- // Round to the nearest division by a power-of-two using result_shift_s32
- in_s32.val[0] = rounding_divide_by_pow2(in_s32.val[0], result_shift);
- in_s32.val[1] = rounding_divide_by_pow2(in_s32.val[1], result_shift);
- in_s32.val[2] = rounding_divide_by_pow2(in_s32.val[2], result_shift);
- in_s32.val[3] = rounding_divide_by_pow2(in_s32.val[3], result_shift);
- }
-
- // Add the offset terms
- in_s32.val[0] = vaddq_s32(in_s32.val[0], result_offset_after_shift_s32);
- in_s32.val[1] = vaddq_s32(in_s32.val[1], result_offset_after_shift_s32);
- in_s32.val[2] = vaddq_s32(in_s32.val[2], result_offset_after_shift_s32);
- in_s32.val[3] = vaddq_s32(in_s32.val[3], result_offset_after_shift_s32);
-
- // Saturate negative values
- in_s32.val[0] = vmaxq_s32(in_s32.val[0], zero_s32);
- in_s32.val[1] = vmaxq_s32(in_s32.val[1], zero_s32);
- in_s32.val[2] = vmaxq_s32(in_s32.val[2], zero_s32);
- in_s32.val[3] = vmaxq_s32(in_s32.val[3], zero_s32);
-
- // Convert S32 to S16
- const int16x8x2_t in_s16 =
- {
- {
- vcombine_s16(vqmovn_s32(in_s32.val[0]), vqmovn_s32(in_s32.val[1])),
- vcombine_s16(vqmovn_s32(in_s32.val[2]), vqmovn_s32(in_s32.val[3]))
- }
- };
-
- // Convert S16 to U8
- uint8x16_t out_u8 = vcombine_u8(vqmovun_s16(in_s16.val[0]), vqmovun_s16(in_s16.val[1]));
-
- if(is_bounded_relu)
- {
- out_u8 = vmaxq_u8(out_u8, min_u8);
- out_u8 = vminq_u8(out_u8, max_u8);
- }
-
- return out_u8;
-}
-
-/** Performs final quantization step on 16 elements
- *
- * @tparam is_bounded_relu Specified if a fused bounded relu should be applied
- *
- * @param in_s32 Input to be quantized.
- * @param result_fixedpoint_multiplier Result multiplier parameter
- * @param result_shift Result shift parameter
- * @param result_offset_after_shift_s32 Result offset parameter
- * @param min_s8 Relu lower bound
- * @param max_s8 Relu upper bound
- *
- * @return Quantized values
- */
-template <bool is_bounded_relu>
-int8x16_t finalize_quantization(int32x4x4_t &in_s32,
- int result_fixedpoint_multiplier,
- int32_t result_shift,
- int32x4_t result_offset_after_shift_s32,
- int8x16_t min_s8,
- int8x16_t max_s8)
-{
- if(result_shift < 0)
- {
- in_s32.val[0] = vmulq_n_s32(in_s32.val[0], (1 << (-result_shift)));
- in_s32.val[1] = vmulq_n_s32(in_s32.val[1], (1 << (-result_shift)));
- in_s32.val[2] = vmulq_n_s32(in_s32.val[2], (1 << (-result_shift)));
- in_s32.val[3] = vmulq_n_s32(in_s32.val[3], (1 << (-result_shift)));
-
- in_s32.val[0] = vqrdmulhq_n_s32(in_s32.val[0], result_fixedpoint_multiplier);
- in_s32.val[1] = vqrdmulhq_n_s32(in_s32.val[1], result_fixedpoint_multiplier);
- in_s32.val[2] = vqrdmulhq_n_s32(in_s32.val[2], result_fixedpoint_multiplier);
- in_s32.val[3] = vqrdmulhq_n_s32(in_s32.val[3], result_fixedpoint_multiplier);
- }
- else
- {
- // Fixed point multiplication with vector saturating rounding doubling multiply high with scalar
- in_s32.val[0] = vqrdmulhq_n_s32(in_s32.val[0], result_fixedpoint_multiplier);
- in_s32.val[1] = vqrdmulhq_n_s32(in_s32.val[1], result_fixedpoint_multiplier);
- in_s32.val[2] = vqrdmulhq_n_s32(in_s32.val[2], result_fixedpoint_multiplier);
- in_s32.val[3] = vqrdmulhq_n_s32(in_s32.val[3], result_fixedpoint_multiplier);
-
- // Round to the nearest division by a power-of-two using result_shift_s32
- in_s32.val[0] = rounding_divide_by_pow2(in_s32.val[0], result_shift);
- in_s32.val[1] = rounding_divide_by_pow2(in_s32.val[1], result_shift);
- in_s32.val[2] = rounding_divide_by_pow2(in_s32.val[2], result_shift);
- in_s32.val[3] = rounding_divide_by_pow2(in_s32.val[3], result_shift);
- }
-
- // Add the offset terms
- in_s32.val[0] = vaddq_s32(in_s32.val[0], result_offset_after_shift_s32);
- in_s32.val[1] = vaddq_s32(in_s32.val[1], result_offset_after_shift_s32);
- in_s32.val[2] = vaddq_s32(in_s32.val[2], result_offset_after_shift_s32);
- in_s32.val[3] = vaddq_s32(in_s32.val[3], result_offset_after_shift_s32);
-
- // Convert S32 to S16
- const int16x8x2_t in_s16 =
- {
- {
- vcombine_s16(vqmovn_s32(in_s32.val[0]), vqmovn_s32(in_s32.val[1])),
- vcombine_s16(vqmovn_s32(in_s32.val[2]), vqmovn_s32(in_s32.val[3]))
- }
- };
-
- // Convert S16 to S8
- int8x16_t out_s8 = vcombine_s8(vqmovn_s16(in_s16.val[0]), vqmovn_s16(in_s16.val[1]));
-
- if(is_bounded_relu)
- {
- out_s8 = vmaxq_s8(out_s8, min_s8);
- out_s8 = vminq_s8(out_s8, max_s8);
- }
-
- return out_s8;
-}
-
-/** Performs final quantization step on 16 elements for symmetric quantization
- *
- * @tparam is_bounded_relu Specified if a fused bounded relu should be applied
- *
- * @param in_s32 Input to be quantized.
- * @param result_fixedpoint_multiplier Result multiplier parameter
- * @param result_shift Result shift parameter
- * @param result_offset_after_shift_s32 Result offset parameter
- * @param min_s8 Relu lower bound
- * @param max_s8 Relu upper bound
- *
- * @return Quantized values
- */
-template <bool is_bounded_relu>
-inline int8x16_t finalize_quantization_symm(int32x4x4_t &in_s32,
- const int32x4x4_t &result_fixedpoint_multiplier,
- const int32x4x4_t &result_shift,
- const int32x4_t &result_offset_after_shift_s32,
- const int8x16_t &min_s8,
- const int8x16_t &max_s8)
-{
- const static int32x4_t one_s32 = vdupq_n_s32(1);
-
- // Fixed point multiplication with vector saturating rounding doubling multiply high with scalar
- int32x4x4_t res_shift_gt0 =
- {
- vqrdmulhq_s32(in_s32.val[0], result_fixedpoint_multiplier.val[0]),
- vqrdmulhq_s32(in_s32.val[1], result_fixedpoint_multiplier.val[1]),
- vqrdmulhq_s32(in_s32.val[2], result_fixedpoint_multiplier.val[2]),
- vqrdmulhq_s32(in_s32.val[3], result_fixedpoint_multiplier.val[3]),
- };
- // Round to the nearest division by a power-of-two using result_shift_s32
- res_shift_gt0.val[0] = rounding_divide_by_pow2(res_shift_gt0.val[0], result_shift.val[0]);
- res_shift_gt0.val[1] = rounding_divide_by_pow2(res_shift_gt0.val[1], result_shift.val[1]);
- res_shift_gt0.val[2] = rounding_divide_by_pow2(res_shift_gt0.val[2], result_shift.val[2]);
- res_shift_gt0.val[3] = rounding_divide_by_pow2(res_shift_gt0.val[3], result_shift.val[3]);
-
- int32x4x4_t res_shift_lt0 =
- {
- vmulq_s32(in_s32.val[0], vshlq_s32(one_s32, vnegq_s32(result_shift.val[0]))),
- vmulq_s32(in_s32.val[1], vshlq_s32(one_s32, vnegq_s32(result_shift.val[1]))),
- vmulq_s32(in_s32.val[2], vshlq_s32(one_s32, vnegq_s32(result_shift.val[2]))),
- vmulq_s32(in_s32.val[3], vshlq_s32(one_s32, vnegq_s32(result_shift.val[3]))),
- };
- res_shift_lt0.val[0] = vqrdmulhq_s32(res_shift_lt0.val[0], result_fixedpoint_multiplier.val[0]);
- res_shift_lt0.val[1] = vqrdmulhq_s32(res_shift_lt0.val[1], result_fixedpoint_multiplier.val[1]);
- res_shift_lt0.val[2] = vqrdmulhq_s32(res_shift_lt0.val[2], result_fixedpoint_multiplier.val[2]);
- res_shift_lt0.val[3] = vqrdmulhq_s32(res_shift_lt0.val[3], result_fixedpoint_multiplier.val[3]);
-
- // Select result depending on shift value
- const uint32x4x4_t mask_lt0 =
- {
-#ifdef __aarch64__
- vcltzq_s32(result_shift.val[0]),
- vcltzq_s32(result_shift.val[1]),
- vcltzq_s32(result_shift.val[2]),
- vcltzq_s32(result_shift.val[3]),
-#else //__aarch64__
- vcltq_s32(result_shift.val[0], vdupq_n_s32(0)),
- vcltq_s32(result_shift.val[1], vdupq_n_s32(0)),
- vcltq_s32(result_shift.val[2], vdupq_n_s32(0)),
- vcltq_s32(result_shift.val[3], vdupq_n_s32(0)),
-#endif //__aarch64__
- };
-
- in_s32.val[0] = vbslq_s32(mask_lt0.val[0], res_shift_lt0.val[0], res_shift_gt0.val[0]);
- in_s32.val[1] = vbslq_s32(mask_lt0.val[1], res_shift_lt0.val[1], res_shift_gt0.val[1]);
- in_s32.val[2] = vbslq_s32(mask_lt0.val[2], res_shift_lt0.val[2], res_shift_gt0.val[2]);
- in_s32.val[3] = vbslq_s32(mask_lt0.val[3], res_shift_lt0.val[3], res_shift_gt0.val[3]);
-
- // Add the offset terms
- in_s32.val[0] = vaddq_s32(in_s32.val[0], result_offset_after_shift_s32);
- in_s32.val[1] = vaddq_s32(in_s32.val[1], result_offset_after_shift_s32);
- in_s32.val[2] = vaddq_s32(in_s32.val[2], result_offset_after_shift_s32);
- in_s32.val[3] = vaddq_s32(in_s32.val[3], result_offset_after_shift_s32);
-
- // Convert S32 to S16
- const int16x8x2_t in_s16 =
- {
- {
- vcombine_s16(vqmovn_s32(in_s32.val[0]), vqmovn_s32(in_s32.val[1])),
- vcombine_s16(vqmovn_s32(in_s32.val[2]), vqmovn_s32(in_s32.val[3]))
- }
- };
-
- // Convert S16 to S8
- int8x16_t out_s8 = vcombine_s8(vqmovn_s16(in_s16.val[0]), vqmovn_s16(in_s16.val[1]));
-
- if(is_bounded_relu)
- {
- out_s8 = vmaxq_s8(out_s8, min_s8);
- out_s8 = vminq_s8(out_s8, max_s8);
- }
-
- return out_s8;
-}
-
-/** Performs final quantization step on single element
- *
- * @tparam is_bounded_relu Specified if a fused bounded relu should be applied
- *
- * @param[in] in_value Input to be quantized.
- * @param[in] result_fixedpoint_multiplier Result multiplier parameter
- * @param[in] result_shift Result shift parameter
- * @param[in] result_offset_after_shift_s32 Result offset parameter
- * @param[in] min_u8 Relu lower bound
- * @param[in] max_u8 Relu upper bound
- *
- * @return Quantized value
- */
-template <bool is_bounded_relu>
-inline uint8_t finalize_quantization(int32_t in_value, int result_fixedpoint_multiplier,
- int32_t result_shift, int32_t result_offset_after_shift_s32,
- uint8_t min_u8, uint8_t max_u8)
-{
- int32x4_t in_s32 = vdupq_n_s32(in_value);
-
- if(result_shift < 0)
- {
- in_value = vgetq_lane_s32(vqrdmulhq_n_s32(vmulq_n_s32(in_s32, (1 << (-result_shift))), result_fixedpoint_multiplier), 0);
- }
- else
- {
- // Fixed point multiplication with vector saturating rounding doubling multiply high with scalar
- in_value = vgetq_lane_s32(vqrdmulhq_n_s32(in_s32, result_fixedpoint_multiplier), 0);
- // Shift value by result_shift_s32
- in_value = rounding_divide_by_pow2(in_value, result_shift);
- }
-
- // Add the offset term
- in_value += result_offset_after_shift_s32;
-
- // Bound the result
- uint8_t out_u8 = static_cast<uint8_t>(std::max<int32_t>(0, std::min<int32_t>(255, in_value)));
- if(is_bounded_relu)
- {
- out_u8 = static_cast<uint8_t>(std::max(min_u8, std::min(max_u8, out_u8)));
- }
-
- return out_u8;
-}
-
-/** Performs final quantization step on single element
- *
- * @tparam is_bounded_relu Specified if a fused bounded relu should be applied
- *
- * @param[in] in_value Input to be quantized.
- * @param[in] result_fixedpoint_multiplier Result multiplier parameter
- * @param[in] result_shift Result shift parameter
- * @param[in] result_offset_after_shift_s32 Result offset parameter
- * @param[in] min_s8 Relu lower bound
- * @param[in] max_s8 Relu upper bound
- *
- * @return Quantized value
- */
-template <bool is_bounded_relu>
-inline int8_t finalize_quantization(int32_t in_value, int result_fixedpoint_multiplier,
- int32_t result_shift, int32_t result_offset_after_shift_s32,
- int8_t min_s8, int8_t max_s8)
-{
- int32x4_t in_s32 = vdupq_n_s32(in_value);
-
- if(result_shift < 0)
- {
- in_value = vgetq_lane_s32(vqrdmulhq_n_s32(vmulq_n_s32(in_s32, (1 << (-result_shift))), result_fixedpoint_multiplier), 0);
- }
- else
- {
- // Fixed point multiplication with vector saturating rounding doubling multiply high with scalar
- in_value = vgetq_lane_s32(vqrdmulhq_n_s32(in_s32, result_fixedpoint_multiplier), 0);
-
- // Shift value by result_shift_s32
- in_value = rounding_divide_by_pow2(in_value, result_shift);
- }
-
- // Add the offset term
- in_value += result_offset_after_shift_s32;
-
- // Bound the result
- int8_t out_s8 = static_cast<int8_t>(std::max<int32_t>(-128, std::min<int32_t>(127, in_value)));
- if(is_bounded_relu)
- {
- out_s8 = static_cast<int8_t>(std::max(min_s8, std::min(max_s8, out_s8)));
- }
-
- return out_s8;
-}
-
-/** Dequantize a neon vector holding 8 quantized values.
- *
- * @param[in] qv Input values to be dequantized.
- * @param[in] qi Quantization information to be used in the computation.
- *
- * @return Dequantized values in a neon vector
- */
-inline float32x4x2_t vdequantize(const uint8x8_t &qv, const UniformQuantizationInfo &qi)
-{
- const float scale = qi.scale;
- const int offset = qi.offset;
- const int32x4_t voffset = vdupq_n_s32(offset);
- const float32x4_t vscale = vdupq_n_f32(scale);
- const float32x4x2_t vdequantized_input =
- {
- {
- vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(vmovl_u8(qv)))), voffset)), vscale),
- vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(vmovl_u8(qv)))), voffset)), vscale),
- }
- };
- return vdequantized_input;
-}
-
-/** Dequantize a neon vector holding 8 singed quantized values.
- *
- * @param[in] qv Input values to be dequantized.
- * @param[in] qi Quantization information to be used in the computation.
- *
- * @return Dequantized values in a neon vector
- */
-inline float32x4x2_t vdequantize(const int8x8_t &qv, const UniformQuantizationInfo &qi)
-{
- const float scale = qi.scale;
- const int offset = qi.offset;
- const int32x4_t voffset = vdupq_n_s32(offset);
- const float32x4_t vscale = vdupq_n_f32(scale);
- const float32x4x2_t vdequantized_input =
- {
- {
- vmulq_f32(vcvtq_f32_s32(vsubq_s32(vmovl_s16(vget_low_s16(vmovl_s8(qv))), voffset)), vscale),
- vmulq_f32(vcvtq_f32_s32(vsubq_s32(vmovl_s16(vget_high_s16(vmovl_s8(qv))), voffset)), vscale),
- }
- };
- return vdequantized_input;
-}
-
-/** Dequantize a neon vector holding 16 quantized values.
- *
- * @param[in] qv Input values to be dequantized.
- * @param[in] qi Quantization information to be used in the computation.
- *
- * @return Dequantized values in a neon vector
- */
-inline float32x4x4_t vdequantize(const uint8x16_t &qv, const UniformQuantizationInfo &qi)
-{
- const float scale = qi.scale;
- const int offset = qi.offset;
- const int32x4_t voffset = vdupq_n_s32(offset);
- const float32x4_t vscale = vdupq_n_f32(scale);
- const float32x4x4_t vdequantized_input =
- {
- {
- vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_low_u8(qv))))), voffset)), vscale),
- vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_low_u8(qv))))), voffset)), vscale),
- vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_high_u8(qv))))), voffset)), vscale),
- vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_high_u8(qv))))), voffset)), vscale),
- }
- };
- return vdequantized_input;
-}
-
-/** Dequantize a neon vector holding 16 signed quantized values.
- *
- * @param[in] qv Input values to be dequantized.
- * @param[in] qi Quantization information to be used in the computation.
- *
- * @return Dequantized values in a neon vector
- */
-inline float32x4x4_t vdequantize(const int8x16_t &qv, const UniformQuantizationInfo &qi)
-{
- const float scale = qi.scale;
- const int offset = qi.offset;
- const int32x4_t voffset = vdupq_n_s32(offset);
- const float32x4_t vscale = vdupq_n_f32(scale);
- const float32x4x4_t vdequantized_input =
- {
- {
- vmulq_f32(vcvtq_f32_s32(vsubq_s32(vmovl_s16(vget_low_s16(vmovl_s8(vget_low_s8(qv)))), voffset)), vscale),
- vmulq_f32(vcvtq_f32_s32(vsubq_s32(vmovl_s16(vget_high_s16(vmovl_s8(vget_low_s8(qv)))), voffset)), vscale),
- vmulq_f32(vcvtq_f32_s32(vsubq_s32(vmovl_s16(vget_low_s16(vmovl_s8(vget_high_s8(qv)))), voffset)), vscale),
- vmulq_f32(vcvtq_f32_s32(vsubq_s32(vmovl_s16(vget_high_s16(vmovl_s8(vget_high_s8(qv)))), voffset)), vscale),
- }
- };
- return vdequantized_input;
-}
-
-/** Dequantize following an asymmetric quantization scheme a neon vector holding 16 quantized values.
- *
- * @param[in] qv Input values to be dequantized.
- * @param[in] scale Quantization scaling factor.
- * @param[in] offset Zero quantization offset.
- *
- * @return Dequantized values in a neon vector
- */
-inline float32x4x4_t vdequantize(const uint8x16_t &qv, float scale, int32_t offset)
-{
- const int32x4_t voffset = vdupq_n_s32(offset);
- const float32x4_t vscale = vdupq_n_f32(scale);
- const float32x4x4_t vdequantized_input =
- {
- {
- vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_low_u8(qv))))), voffset)), vscale),
- vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_low_u8(qv))))), voffset)), vscale),
- vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_high_u8(qv))))), voffset)), vscale),
- vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_high_u8(qv))))), voffset)), vscale),
- }
- };
- return vdequantized_input;
-}
-
-/** Dequantize a vector of 16 values stored as signed asymmetric.
- *
- * @param[in] qv Input values to be dequantized.
- * @param[in] scale Quantization scaling factor.
- * @param[in] offset Zero quantization offset.
- *
- * @return Dequantized values in a neon vector
- */
-inline float32x4x4_t vdequantize(const int8x16_t &qv, float scale, int32_t offset)
-{
- const int32x4_t voffset = vdupq_n_s32(offset);
- const float32x4_t vscale = vdupq_n_f32(scale);
- const float32x4x4_t vdequantized_input =
- {
- {
- vmulq_f32(vcvtq_f32_s32(vsubq_s32(vmovl_s16(vget_low_s16(vmovl_s8(vget_low_s8(qv)))), voffset)), vscale),
- vmulq_f32(vcvtq_f32_s32(vsubq_s32(vmovl_s16(vget_high_s16(vmovl_s8(vget_low_s8(qv)))), voffset)), vscale),
- vmulq_f32(vcvtq_f32_s32(vsubq_s32(vmovl_s16(vget_low_s16(vmovl_s8(vget_high_s8(qv)))), voffset)), vscale),
- vmulq_f32(vcvtq_f32_s32(vsubq_s32(vmovl_s16(vget_high_s16(vmovl_s8(vget_high_s8(qv)))), voffset)), vscale),
- }
- };
- return vdequantized_input;
-}
-
-/** Dequantize following symmetric quantization scheme a neon vector holding 16 quantized values.
- *
- * @param[in] qv Input values to be dequantized.
- * @param[in] vscale Vector containing quantization scaling factors.
- *
- * @return Dequantized values in a neon vector
- */
-inline float32x4x4_t vdequantize(const int8x16_t &qv, const float32x4x4_t vscale)
-{
- const float32x4x4_t vdequantized_input =
- {
- {
- vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(vget_low_s8(qv))))), vscale.val[0]),
- vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(vmovl_s8(vget_low_s8(qv))))), vscale.val[1]),
- vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(vget_high_s8(qv))))), vscale.val[2]),
- vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(vmovl_s8(vget_high_s8(qv))))), vscale.val[3]),
- }
- };
- return vdequantized_input;
-}
-
-/** Dequantize following a symmetric quantization scheme a neon vector holding 16 quantized values.
- *
- * @param[in] qv Input values to be dequantized.
- * @param[in] scale Quantization scaling factor.
- *
- * @return Dequantized values in a neon vector
- */
-inline float32x4x4_t vdequantize(const int8x16_t &qv, float scale)
-{
- const float32x4_t vscale = vdupq_n_f32(scale);
- const float32x4x4_t vdequantized_input =
- {
- {
- vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(vget_low_s8(qv))))), vscale),
- vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(vmovl_s8(vget_low_s8(qv))))), vscale),
- vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(vget_high_s8(qv))))), vscale),
- vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(vmovl_s8(vget_high_s8(qv))))), vscale),
- }
- };
- return vdequantized_input;
-}
-
-/** Quantize a neon vector holding 8 floating point values.
- *
- * @param[in] qv Input values to be quantized.
- * @param[in] qi Quantization information to be used in the computation.
- *
- * @return A neon vector holding the quantized values
- */
-inline uint8x8_t vquantize(const float32x4x2_t &qv, const UniformQuantizationInfo &qi)
-{
- const float scale = qi.scale;
- const int offset = qi.offset;
- const float32x4_t voffset = vdupq_n_f32(offset);
- const float32x4_t vinvscale = vdupq_n_f32(1.f / scale);
- const int32x4x4_t rf =
- {
- {
-#ifdef __aarch64__
- vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[0], vinvscale)),
- vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[1], vinvscale)),
-#else //__aarch64__
- vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[0], vinvscale)),
- vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[1], vinvscale)),
-#endif //__aarch64__
- }
- };
- return vqmovun_s16(vcombine_s16(vqmovn_s32(rf.val[0]), vqmovn_s32(rf.val[1])));
-}
-
-/** Quantize a neon vector holding 8 floating point values.
- *
- * @param[in] qv Input values to be quantized.
- * @param[in] qi Quantization information to be used in the computation.
- *
- * @return A neon vector holding the singed quantized values
- */
-inline int8x8_t vquantize_signed(const float32x4x2_t &qv, const UniformQuantizationInfo &qi)
-{
- const float scale = qi.scale;
- const int offset = qi.offset;
- const float32x4_t voffset = vdupq_n_f32(offset);
- const float32x4_t vinvscale = vdupq_n_f32(1.f / scale);
- const int32x4x4_t rf =
- {
- {
-#ifdef __aarch64__
- vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[0], vinvscale)),
- vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[1], vinvscale)),
-#else //__aarch64__
- vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[0], vinvscale)),
- vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[1], vinvscale)),
-#endif //__aarch64__
- }
- };
- return vqmovn_s16(vcombine_s16(vqmovn_s32(rf.val[0]), vqmovn_s32(rf.val[1])));
-}
-
-/** Quantize a neon vector holding 16 floating point values.
- *
- * @param[in] qv Input values to be quantized.
- * @param[in] qi Quantization information to be used in the computation.
- *
- * @return A neon vector holding the quantized values
- */
-inline uint8x16_t vquantize(const float32x4x4_t &qv, const UniformQuantizationInfo &qi)
-{
- const float scale = qi.scale;
- const int offset = qi.offset;
- const float32x4_t voffset = vdupq_n_f32(offset);
- const float32x4_t vinvscale = vdupq_n_f32(1.f / scale);
- const int32x4x4_t rf =
- {
- {
-#ifdef __aarch64__
- vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[0], vinvscale)),
- vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[1], vinvscale)),
- vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[2], vinvscale)),
- vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[3], vinvscale)),
-#else //__aarch64__
- vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[0], vinvscale)),
- vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[1], vinvscale)),
- vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[2], vinvscale)),
- vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[3], vinvscale)),
-#endif //__aarch64__
- }
- };
- const uint8x8_t pa = vqmovun_s16(vcombine_s16(vqmovn_s32(rf.val[0]), vqmovn_s32(rf.val[1])));
- const uint8x8_t pb = vqmovun_s16(vcombine_s16(vqmovn_s32(rf.val[2]), vqmovn_s32(rf.val[3])));
- return vcombine_u8(pa, pb);
-}
-
-/** Signed quantize a neon vector holding 16 floating point values.
- *
- * @param[in] qv Input values to be quantized.
- * @param[in] qi Quantization information to be used in the computation.
- *
- * @return A neon vector holding the quantized values
- */
-inline int8x16_t vquantize_signed(const float32x4x4_t &qv, const UniformQuantizationInfo &qi)
-{
- const float scale = qi.scale;
- const int offset = qi.offset;
- const float32x4_t voffset = vdupq_n_f32(offset);
- const float32x4_t vinvscale = vdupq_n_f32(1.f / scale);
- const int32x4x4_t rf =
- {
- {
-#ifdef __aarch64__
- vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[0], vinvscale)),
- vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[1], vinvscale)),
- vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[2], vinvscale)),
- vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[3], vinvscale)),
-#else //__aarch64__
- vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[0], vinvscale)),
- vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[1], vinvscale)),
- vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[2], vinvscale)),
- vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[3], vinvscale)),
-#endif //__aarch64__
- }
- };
- const int8x8_t pa = vqmovn_s16(vcombine_s16(vqmovn_s32(rf.val[0]), vqmovn_s32(rf.val[1])));
- const int8x8_t pb = vqmovn_s16(vcombine_s16(vqmovn_s32(rf.val[2]), vqmovn_s32(rf.val[3])));
- return vcombine_s8(pa, pb);
-}
-
-/** Quantize to QASYMM16 a neon vector holding 16 floating point values.
- *
- * @param[in] qv Input values to be quantized.
- * @param[in] qi Quantization information to be used in the computation.
- *
- * @return A neon vector holding the quantized values
- */
-inline uint16x8x2_t vquantize_qasymm16(const float32x4x4_t &qv, const UniformQuantizationInfo &qi)
-{
- const float scale = qi.scale;
- const int offset = qi.offset;
- const float32x4_t voffset = vdupq_n_f32(offset);
- const float32x4_t vinvscale = vdupq_n_f32(1.f / scale);
- const int32x4x4_t rf =
- {
- {
-#ifdef __aarch64__
- vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[0], vinvscale)),
- vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[1], vinvscale)),
- vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[2], vinvscale)),
- vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[3], vinvscale)),
-#else //__aarch64__
- vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[0], vinvscale)),
- vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[1], vinvscale)),
- vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[2], vinvscale)),
- vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[3], vinvscale)),
-#endif //__aarch64__
- }
- };
- const uint16x8_t pa = vcombine_u16(vqmovun_s32(rf.val[0]), vqmovun_s32(rf.val[1]));
- const uint16x8_t pb = vcombine_u16(vqmovun_s32(rf.val[2]), vqmovun_s32(rf.val[3]));
- return { pa, pb };
-}
-} // namespace arm_compute
-#include "arm_compute/core/NEON/NEAsymm.inl"
-#endif // ARM_COMPUTE_NEASYMM_H
diff --git a/arm_compute/core/NEON/NEAsymm.inl b/arm_compute/core/NEON/NEAsymm.inl
deleted file mode 100644
index 71205e0403..0000000000
--- a/arm_compute/core/NEON/NEAsymm.inl
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-namespace arm_compute
-{
-inline qasymm8x16_t vmlaq_qasymm8(qasymm8x16_t vd, float32x4_t vs, float32x4_t vo)
-{
- // Convert uint8 vectors to uint16 vectors
- const uint8x8_t vd_low = vget_low_u8(vd);
- const uint8x8_t vd_high = vget_high_u8(vd);
- uint16x8_t vd_low_u16x8 = vmovl_u8(vd_low);
- uint16x8_t vd_high_u16x8 = vmovl_u8(vd_high);
- // Convert uint16 vectors to uint32 vectors
- uint32x4_t A_u32x4 = vmovl_u16(vget_low_u16(vd_low_u16x8));
- uint32x4_t B_u32x4 = vmovl_u16(vget_high_u16(vd_low_u16x8));
- uint32x4_t C_u32x4 = vmovl_u16(vget_low_u16(vd_high_u16x8));
- uint32x4_t D_u32x4 = vmovl_u16(vget_high_u16(vd_high_u16x8));
- // Convert uint32 vectors to float32 vectors
- float32x4_t A_f32x4 = vcvtq_f32_u32(A_u32x4);
- float32x4_t B_f32x4 = vcvtq_f32_u32(B_u32x4);
- float32x4_t C_f32x4 = vcvtq_f32_u32(C_u32x4);
- float32x4_t D_f32x4 = vcvtq_f32_u32(D_u32x4);
- // vd = vd*vs + vo
- A_f32x4 = vmlaq_f32(vo, A_f32x4, vs);
- B_f32x4 = vmlaq_f32(vo, B_f32x4, vs);
- C_f32x4 = vmlaq_f32(vo, C_f32x4, vs);
- D_f32x4 = vmlaq_f32(vo, D_f32x4, vs);
- // Convert float32 vectors to uint32 vectors
- A_u32x4 = vcvtq_u32_f32(A_f32x4);
- B_u32x4 = vcvtq_u32_f32(B_f32x4);
- C_u32x4 = vcvtq_u32_f32(C_f32x4);
- D_u32x4 = vcvtq_u32_f32(D_f32x4);
- // Convert uint32 vectors to uint16 vectors (with saturation)
- vd_low_u16x8 = vcombine_u16(vqmovn_u32(A_u32x4), vqmovn_u32(B_u32x4));
- vd_high_u16x8 = vcombine_u16(vqmovn_u32(C_u32x4), vqmovn_u32(D_u32x4));
- // convert uint16 vectors to uint8 vectors (with saturation)
- return vcombine_u8(vqmovn_u16(vd_low_u16x8), vqmovn_u16(vd_high_u16x8));
-}
-inline qasymm8x16_signed_t vmlaq_qasymm8_signed(qasymm8x16_signed_t vd, float32x4_t vs, float32x4_t vo)
-{
- // Convert uint8 vectors to int16 vectors
- const int8x8_t vd_low = vget_low_s8(vd);
- const int8x8_t vd_high = vget_high_s8(vd);
- int16x8_t vd_low_s16x8 = vmovl_s8(vd_low);
- int16x8_t vd_high_s16x8 = vmovl_s8(vd_high);
- // Convert int16 vectors to int32 vectors
- int32x4_t A_s32x4 = vmovl_s16(vget_low_s16(vd_low_s16x8));
- int32x4_t B_s32x4 = vmovl_s16(vget_high_s16(vd_low_s16x8));
- int32x4_t C_s32x4 = vmovl_s16(vget_low_s16(vd_high_s16x8));
- int32x4_t D_s32x4 = vmovl_s16(vget_high_s16(vd_high_s16x8));
- // Convert int32 vectors to float32 vectors
- float32x4_t A_f32x4 = vcvtq_f32_s32(A_s32x4);
- float32x4_t B_f32x4 = vcvtq_f32_s32(B_s32x4);
- float32x4_t C_f32x4 = vcvtq_f32_s32(C_s32x4);
- float32x4_t D_f32x4 = vcvtq_f32_s32(D_s32x4);
- // vd = vd*vs + vo
- A_f32x4 = vmlaq_f32(vo, A_f32x4, vs);
- B_f32x4 = vmlaq_f32(vo, B_f32x4, vs);
- C_f32x4 = vmlaq_f32(vo, C_f32x4, vs);
- D_f32x4 = vmlaq_f32(vo, D_f32x4, vs);
- // Convert float32 vectors to int32 vectors
- A_s32x4 = vcvtq_s32_f32(A_f32x4);
- B_s32x4 = vcvtq_s32_f32(B_f32x4);
- C_s32x4 = vcvtq_s32_f32(C_f32x4);
- D_s32x4 = vcvtq_s32_f32(D_f32x4);
- // Convert int32 vectors to int16 vectors (with saturation)
- vd_low_s16x8 = vcombine_s16(vqmovn_s32(A_s32x4), vqmovn_s32(B_s32x4));
- vd_high_s16x8 = vcombine_s16(vqmovn_s32(C_s32x4), vqmovn_s32(D_s32x4));
- // convert int16 vectors to int8 vectors (with saturation)
- return vcombine_s8(vqmovn_s16(vd_low_s16x8), vqmovn_s16(vd_high_s16x8));
-}
-} // namespace arm_compute
diff --git a/arm_compute/core/NEON/NEColorConvertHelper.inl b/arm_compute/core/NEON/NEColorConvertHelper.inl
deleted file mode 100644
index 2cf52e58d2..0000000000
--- a/arm_compute/core/NEON/NEColorConvertHelper.inl
+++ /dev/null
@@ -1,1045 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/IMultiImage.h"
-#include "arm_compute/core/NEON/NEMath.h"
-#include "arm_compute/core/Utils.h"
-
-#include <arm_neon.h>
-
-namespace
-{
-#ifndef DOXYGEN_SKIP_THIS
-constexpr float red_coef_bt709 = 1.5748F;
-constexpr float green_coef_bt709 = -0.1873f;
-constexpr float green_coef2_bt709 = -0.4681f;
-constexpr float blue_coef_bt709 = 1.8556f;
-
-constexpr float rgb2yuv_bt709_kr = 0.2126f;
-constexpr float rgb2yuv_bt709_kb = 0.0722f;
-// K_g = 1 - K_r - K_b
-constexpr float rgb2yuv_bt709_kg = 0.7152f;
-// C_u = 1 / (2 * (1 - K_b))
-constexpr float rgb2yuv_bt709_cu = 0.5389f;
-// C_v = 1 / (2 * (1 - K_r))
-constexpr float rgb2yuv_bt709_cv = 0.6350f;
-
-constexpr float rgb2u8_red_coef = 0.2126f;
-constexpr float rgb2u8_green_coef = 0.7152f;
-constexpr float rgb2u8_blue_coef = 0.0722f;
-
-inline float32x4_t rgb_to_greyscale_calculation(const float32x4_t &rcolor, const float32x4_t &gcolor, const float32x4_t &bcolor,
- const float rcoef, const float gcoef, const float bcoef)
-{
- float32x4_t greyscale = vmulq_n_f32(rcolor, rcoef);
- greyscale = vmlaq_n_f32(greyscale, gcolor, gcoef);
- greyscale = vmlaq_n_f32(greyscale, bcolor, bcoef);
- return greyscale;
-}
-
-inline void rgb_to_u8_conversion(const uint8x16x3_t &in, uint8x16_t &out)
-{
- float32x4x4_t out_float32;
-
- //Conversion from 3(RGB) 4 uint8s to 3(RGB) 4 floats
- const float32x4x4_t r_float32 = arm_compute::convert_uint8x16_to_float32x4x4(in.val[0]);
- const float32x4x4_t g_float32 = arm_compute::convert_uint8x16_to_float32x4x4(in.val[1]);
- const float32x4x4_t b_float32 = arm_compute::convert_uint8x16_to_float32x4x4(in.val[2]);
-
- //New grayscale image = ( (RED_COEFF * R) + (GREEN_COEFF * G) + (BLUE_COEFF * B) )
- //Computation of 1(Greyscale) 4 uint8 using 3(RGB) 4 uint8s float
- out_float32.val[0] = rgb_to_greyscale_calculation(r_float32.val[0], g_float32.val[0], b_float32.val[0],
- rgb2u8_red_coef, rgb2u8_green_coef, rgb2u8_blue_coef);
-
- out_float32.val[1] = rgb_to_greyscale_calculation(r_float32.val[1], g_float32.val[1], b_float32.val[1],
- rgb2u8_red_coef, rgb2u8_green_coef, rgb2u8_blue_coef);
-
- out_float32.val[2] = rgb_to_greyscale_calculation(r_float32.val[2], g_float32.val[2], b_float32.val[2],
- rgb2u8_red_coef, rgb2u8_green_coef, rgb2u8_blue_coef);
-
- out_float32.val[3] = rgb_to_greyscale_calculation(r_float32.val[3], g_float32.val[3], b_float32.val[3],
- rgb2u8_red_coef, rgb2u8_green_coef, rgb2u8_blue_coef);
-
- //Conversion from 1(Greyscale) 4 floats to 1(Greyscale) 4 uint8s
- arm_compute::convert_float32x4x4_to_uint8x16(out_float32, out);
-}
-
-inline void rgb_to_yuv_calculation(const float32x4_t &rvec, const float32x4_t &gvec, const float32x4_t &bvec,
- float32x4_t &yvec, float32x4_t &uvec, float32x4_t &vvec)
-{
- /*
- Y'= 0.2126*R' + 0.7152*G' + 0.0722*B'
- U'=-0.1146*R' - 0.3854*G' + 0.5000*B'
- V'= 0.5000*R' - 0.4542*G' - 0.0458*B'
- */
- const auto c128 = vdupq_n_f32(128.f);
-
- // Y = R * K_r + G * (1 - K_r - K_b) * B * K_b
- yvec = vmulq_n_f32(rvec, rgb2yuv_bt709_kr);
- yvec = vmlaq_n_f32(yvec, gvec, rgb2yuv_bt709_kg);
- yvec = vmlaq_n_f32(yvec, bvec, rgb2yuv_bt709_kb);
-
- // U = (B - Y) / (2 * (1 - K_b))
- uvec = vsubq_f32(bvec, yvec);
- uvec = vmlaq_n_f32(c128, uvec, rgb2yuv_bt709_cu);
-
- // V = (R - Y) / (2 * (1 - K_r))
- vvec = vsubq_f32(rvec, yvec);
- vvec = vmlaq_n_f32(c128, vvec, rgb2yuv_bt709_cv);
-}
-
-inline void yuyv_to_rgb_calculation(const float32x4_t &yvec_val, float32x4_t uvec_val, const float32x4_t &yyvec_val,
- float32x4_t vvec_val, unsigned char *output_ptr, const bool alpha)
-{
- float32x4x3_t rgb1, rgb2;
-
- // Compute: cb - 128 and cr - 128;
- const auto c128 = vdupq_n_f32(128.f);
- uvec_val = vsubq_f32(uvec_val, c128);
- vvec_val = vsubq_f32(vvec_val, c128);
-
- // Compute:
- // r = 0.0000f*f_u + 1.5748f*f_v;
- // g = 0.1873f*f_u - 0.4681f*f_v;
- // b = 1.8556f*f_u + 0.0000f*f_v;
- const auto red = vmulq_n_f32(vvec_val, red_coef_bt709);
- const auto blue = vmulq_n_f32(uvec_val, blue_coef_bt709);
- const auto green = vaddq_f32(vmulq_n_f32(uvec_val, green_coef_bt709),
- vmulq_n_f32(vvec_val, green_coef2_bt709));
-
- // Compute the final r,g,b values using y1 for the first texel and y2 for the second one.
- // the result is stored in two float32x4x3_t which then are converted to one uint8x8x3_t
- // and written back to memory using vst3 instruction
-
- rgb1.val[0] = vaddq_f32(yvec_val, red);
- rgb1.val[1] = vaddq_f32(yvec_val, green);
- rgb1.val[2] = vaddq_f32(yvec_val, blue);
-
- rgb2.val[0] = vaddq_f32(yyvec_val, red);
- rgb2.val[1] = vaddq_f32(yyvec_val, green);
- rgb2.val[2] = vaddq_f32(yyvec_val, blue);
-
- uint8x8x3_t u8_rgb;
- arm_compute::convert_float32x4x3_to_uint8x8x3(rgb1, rgb2, u8_rgb);
-
- if(!alpha)
- {
- vst3_lane_u8(&output_ptr[0], u8_rgb, 0);
- vst3_lane_u8(&output_ptr[3], u8_rgb, 4);
- vst3_lane_u8(&output_ptr[6], u8_rgb, 1);
- vst3_lane_u8(&output_ptr[9], u8_rgb, 5);
- vst3_lane_u8(&output_ptr[12], u8_rgb, 2);
- vst3_lane_u8(&output_ptr[15], u8_rgb, 6);
- vst3_lane_u8(&output_ptr[18], u8_rgb, 3);
- vst3_lane_u8(&output_ptr[21], u8_rgb, 7);
- }
- else
- {
- uint8x8x4_t u8_rgba;
- u8_rgba.val[0] = u8_rgb.val[0];
- u8_rgba.val[1] = u8_rgb.val[1];
- u8_rgba.val[2] = u8_rgb.val[2];
- u8_rgba.val[3] = vdup_n_u8(255);
- vst4_lane_u8(&output_ptr[0], u8_rgba, 0);
- vst4_lane_u8(&output_ptr[4], u8_rgba, 4);
- vst4_lane_u8(&output_ptr[8], u8_rgba, 1);
- vst4_lane_u8(&output_ptr[12], u8_rgba, 5);
- vst4_lane_u8(&output_ptr[16], u8_rgba, 2);
- vst4_lane_u8(&output_ptr[20], u8_rgba, 6);
- vst4_lane_u8(&output_ptr[24], u8_rgba, 3);
- vst4_lane_u8(&output_ptr[28], u8_rgba, 7);
- }
-}
-
-inline uint8x16x3_t load_rgb(const unsigned char *const ptr, const bool alpha)
-{
- uint8x16x3_t rgb;
-
- if(alpha)
- {
- const auto tmp = vld4q_u8(ptr);
- rgb.val[0] = tmp.val[0];
- rgb.val[1] = tmp.val[1];
- rgb.val[2] = tmp.val[2];
- }
- else
- {
- rgb = vld3q_u8(ptr);
- }
-
- return rgb;
-}
-
-inline void rgb_to_yuv_conversion(uint8x16x3_t &vec_top, uint8x16x3_t &vec_bottom)
-{
- // Convert the uint8x16_t to float32x4x4_t
- const float32x4x4_t frvec_top = arm_compute::convert_uint8x16_to_float32x4x4(vec_top.val[0]);
- const float32x4x4_t fgvec_top = arm_compute::convert_uint8x16_to_float32x4x4(vec_top.val[1]);
- const float32x4x4_t fbvec_top = arm_compute::convert_uint8x16_to_float32x4x4(vec_top.val[2]);
-
- const float32x4x4_t frvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(vec_bottom.val[0]);
- const float32x4x4_t fgvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(vec_bottom.val[1]);
- const float32x4x4_t fbvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(vec_bottom.val[2]);
-
- float32x4x4_t fyvec_top, fuvec_top, fvvec_top;
- float32x4x4_t fyvec_bottom, fuvec_bottom, fvvec_bottom;
-
- for(auto i = 0; i < 4; ++i)
- {
- rgb_to_yuv_calculation(frvec_top.val[i], fgvec_top.val[i], fbvec_top.val[i],
- fyvec_top.val[i], fuvec_top.val[i], fvvec_top.val[i]);
- rgb_to_yuv_calculation(frvec_bottom.val[i], fgvec_bottom.val[i], fbvec_bottom.val[i],
- fyvec_bottom.val[i], fuvec_bottom.val[i], fvvec_bottom.val[i]);
- }
-
- arm_compute::convert_float32x4x4_to_uint8x16(fyvec_top, vec_top.val[0]);
- arm_compute::convert_float32x4x4_to_uint8x16(fuvec_top, vec_top.val[1]);
- arm_compute::convert_float32x4x4_to_uint8x16(fvvec_top, vec_top.val[2]);
- arm_compute::convert_float32x4x4_to_uint8x16(fyvec_bottom, vec_bottom.val[0]);
- arm_compute::convert_float32x4x4_to_uint8x16(fuvec_bottom, vec_bottom.val[1]);
- arm_compute::convert_float32x4x4_to_uint8x16(fvvec_bottom, vec_bottom.val[2]);
-}
-
-inline void store_rgb_to_nv12(const uint8x16_t &rvec_top, const uint8x16_t &gvec_top, const uint8x16_t &bvec_top,
- const uint8x16_t &rvec_bottom, const uint8x16_t &gvec_bottom, const uint8x16_t &bvec_bottom,
- unsigned char *const __restrict out_y_top, unsigned char *const __restrict out_y_bottom,
- unsigned char *const __restrict out_uv)
-{
- uint8x16x3_t vec_top, vec_bottom;
- vec_top.val[0] = rvec_top;
- vec_top.val[1] = gvec_top;
- vec_top.val[2] = bvec_top;
- vec_bottom.val[0] = rvec_bottom;
- vec_bottom.val[1] = gvec_bottom;
- vec_bottom.val[2] = bvec_bottom;
-
- rgb_to_yuv_conversion(vec_top, vec_bottom);
-
- vst1q_u8(out_y_top, vec_top.val[0]);
- vst1q_u8(out_y_bottom, vec_bottom.val[0]);
-
- const auto uvec = vuzpq_u8(vec_top.val[1], vec_bottom.val[1]);
- const auto vvec = vuzpq_u8(vec_top.val[2], vec_bottom.val[2]);
- const auto utmp = vrhaddq_u8(uvec.val[0], uvec.val[1]);
- const auto vtmp = vrhaddq_u8(vvec.val[0], vvec.val[1]);
-
- uint8x8x2_t uvvec;
- uvvec.val[0] = vhadd_u8(vget_low_u8(utmp), vget_high_u8(utmp));
- uvvec.val[1] = vhadd_u8(vget_low_u8(vtmp), vget_high_u8(vtmp));
-
- vst2_u8(out_uv, uvvec);
-}
-
-inline void store_rgb_to_iyuv(const uint8x16_t &rvec_top, const uint8x16_t &gvec_top, const uint8x16_t &bvec_top,
- const uint8x16_t &rvec_bottom, const uint8x16_t &gvec_bottom, const uint8x16_t &bvec_bottom,
- unsigned char *const __restrict out_y_top, unsigned char *const __restrict out_y_bottom,
- unsigned char *const __restrict out_u,
- unsigned char *const __restrict out_v)
-{
- uint8x16x3_t vec_top, vec_bottom;
- vec_top.val[0] = rvec_top;
- vec_top.val[1] = gvec_top;
- vec_top.val[2] = bvec_top;
- vec_bottom.val[0] = rvec_bottom;
- vec_bottom.val[1] = gvec_bottom;
- vec_bottom.val[2] = bvec_bottom;
-
- rgb_to_yuv_conversion(vec_top, vec_bottom);
-
- vst1q_u8(out_y_top, vec_top.val[0]);
- vst1q_u8(out_y_bottom, vec_bottom.val[0]);
-
- const auto uvvec_top = vuzpq_u8(vec_top.val[1], vec_top.val[2]);
- const auto uvvec_bottom = vuzpq_u8(vec_bottom.val[1], vec_bottom.val[2]);
- const auto uvvec = vhaddq_u8(vrhaddq_u8(uvvec_top.val[0], uvvec_top.val[1]),
- vrhaddq_u8(uvvec_bottom.val[0], uvvec_bottom.val[1]));
-
- vst1_u8(out_u, vget_low_u8(uvvec));
- vst1_u8(out_v, vget_high_u8(uvvec));
-}
-
-inline void store_rgb_to_yuv4(const uint8x16_t &rvec, const uint8x16_t &gvec, const uint8x16_t &bvec,
- unsigned char *const __restrict out_y,
- unsigned char *const __restrict out_u,
- unsigned char *const __restrict out_v)
-{
- // Convert the uint8x16_t to float32x4x4_t
- const float32x4x4_t frvec = arm_compute::convert_uint8x16_to_float32x4x4(rvec);
- const float32x4x4_t fgvec = arm_compute::convert_uint8x16_to_float32x4x4(gvec);
- const float32x4x4_t fbvec = arm_compute::convert_uint8x16_to_float32x4x4(bvec);
-
- float32x4x4_t fyvec, fuvec, fvvec;
- for(auto i = 0; i < 4; ++i)
- {
- rgb_to_yuv_calculation(frvec.val[i], fgvec.val[i], fbvec.val[i],
- fyvec.val[i], fuvec.val[i], fvvec.val[i]);
- }
-
- uint8x16_t yvec, uvec, vvec;
- arm_compute::convert_float32x4x4_to_uint8x16(fyvec, yvec);
- arm_compute::convert_float32x4x4_to_uint8x16(fuvec, uvec);
- arm_compute::convert_float32x4x4_to_uint8x16(fvvec, vvec);
-
- vst1q_u8(out_y, yvec);
- vst1q_u8(out_u, uvec);
- vst1q_u8(out_v, vvec);
-}
-#endif /* DOXYGEN_SKIP_THIS */
-}
-
-namespace arm_compute
-{
-/** Convert RGB to RGBX.
- *
- * @param[in] input Input RGB data buffer.
- * @param[out] output Output RGBX buffer.
- * @param[in] win Window for iterating the buffers.
- *
- */
-void colorconvert_rgb_to_rgbx(const void *__restrict input, void *__restrict output, const Window &win)
-{
- ARM_COMPUTE_ERROR_ON(nullptr == input);
- ARM_COMPUTE_ERROR_ON(nullptr == output);
-
- const auto input_ptr = static_cast<const IImage *__restrict>(input);
- const auto output_ptr = static_cast<IImage *__restrict>(output);
-
- Iterator in(input_ptr, win);
- Iterator out(output_ptr, win);
-
- execute_window_loop(win, [&](const Coordinates &)
- {
- const auto ta1 = vld3q_u8(in.ptr());
- uint8x16x4_t ta2;
- ta2.val[0] = ta1.val[0];
- ta2.val[1] = ta1.val[1];
- ta2.val[2] = ta1.val[2];
- ta2.val[3] = vdupq_n_u8(255);
- vst4q_u8(out.ptr(), ta2);
- },
- in, out);
-}
-
-/** Convert RGB to U8.
- *
- * @param[in] input Input RGB data buffer.
- * @param[out] output Output U8 buffer.
- * @param[in] win Window for iterating the buffers.
- *
- */
-void colorconvert_rgb_to_u8(const void *__restrict input, void *__restrict output, const Window &win)
-{
- ARM_COMPUTE_ERROR_ON(nullptr == input);
- ARM_COMPUTE_ERROR_ON(nullptr == output);
-
- const auto input_ptr = static_cast<const IImage *__restrict>(input);
- const auto output_ptr = static_cast<IImage *__restrict>(output);
-
- Iterator in(input_ptr, win);
- Iterator out(output_ptr, win);
-
- execute_window_loop(win, [&](const Coordinates &)
- {
- const auto ta1 = vld3q_u8(in.ptr());
- uint8x16_t ta2;
- rgb_to_u8_conversion(ta1, ta2);
- vst1q_u8(out.ptr(), ta2);
- },
- in, out);
-}
-
-/** Convert RGBX to RGB.
- *
- * @param[in] input Input RGBX data buffer.
- * @param[out] output Output RGB buffer.
- * @param[in] win Window for iterating the buffers.
- *
- */
-void colorconvert_rgbx_to_rgb(const void *input, void *output, const Window &win)
-{
- ARM_COMPUTE_ERROR_ON(nullptr == input);
- ARM_COMPUTE_ERROR_ON(nullptr == output);
-
- const auto input_ptr = static_cast<const IImage *__restrict>(input);
- const auto output_ptr = static_cast<IImage *__restrict>(output);
-
- Iterator in(input_ptr, win);
- Iterator out(output_ptr, win);
-
- execute_window_loop(win, [&](const Coordinates &)
- {
- const auto ta1 = vld4q_u8(in.ptr());
- uint8x16x3_t ta2;
- ta2.val[0] = ta1.val[0];
- ta2.val[1] = ta1.val[1];
- ta2.val[2] = ta1.val[2];
- vst3q_u8(out.ptr(), ta2);
- },
- in, out);
-}
-
-/** Convert YUYV to RGB.
- *
- * @param[in] input Input YUYV data buffer.
- * @param[out] output Output RGB buffer.
- * @param[in] win Window for iterating the buffers.
- *
- */
-template <bool yuyv, bool alpha>
-void colorconvert_yuyv_to_rgb(const void *__restrict input, void *__restrict output, const Window &win)
-{
- ARM_COMPUTE_ERROR_ON(nullptr == input);
- ARM_COMPUTE_ERROR_ON(nullptr == output);
-
- const auto input_ptr = static_cast<const IImage *__restrict>(input);
- const auto output_ptr = static_cast<IImage *__restrict>(output);
-
- constexpr auto element_size = alpha ? 32 : 24;
- constexpr auto shift = yuyv ? 0 : 1;
-
- Iterator in(input_ptr, win);
- Iterator out(output_ptr, win);
-
- execute_window_loop(win, [&](const Coordinates &)
- {
- const auto ta = vld4q_u8(in.ptr());
- //ta.val[0] = Y0 Y2 Y4 Y6 ...
- //ta.val[1] = U0 U2 U4 U6 ...
- //ta.val[2] = Y1 Y3 Y5 Y7 ...
- //ta.val[3] = V0 V2 V4 V7 ...
-
- // Convert the uint8x16x4_t to float32x4x4_t
- const float32x4x4_t yvec = arm_compute::convert_uint8x16_to_float32x4x4(ta.val[0 + shift]);
- const float32x4x4_t uvec = arm_compute::convert_uint8x16_to_float32x4x4(ta.val[1 - shift]);
- const float32x4x4_t yyvec = arm_compute::convert_uint8x16_to_float32x4x4(ta.val[2 + shift]);
- const float32x4x4_t vvec = arm_compute::convert_uint8x16_to_float32x4x4(ta.val[3 - shift]);
-
- yuyv_to_rgb_calculation(yvec.val[0], uvec.val[0], yyvec.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha);
- yuyv_to_rgb_calculation(yvec.val[1], uvec.val[1], yyvec.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha);
- yuyv_to_rgb_calculation(yvec.val[2], uvec.val[2], yyvec.val[2], vvec.val[2], out.ptr() + 2 * element_size, alpha);
- yuyv_to_rgb_calculation(yvec.val[3], uvec.val[3], yyvec.val[3], vvec.val[3], out.ptr() + 3 * element_size, alpha);
- },
- in, out);
-}
-
-/** Convert NV12 to RGB.
- *
- * @param[in] input Input NV12 data buffer.
- * @param[out] output Output RGB buffer.
- * @param[in] win Window for iterating the buffers.
- *
- */
-template <bool uv, bool alpha>
-void colorconvert_nv12_to_rgb(const void *__restrict input, void *__restrict output, const Window &win)
-{
- ARM_COMPUTE_ERROR_ON(nullptr == input);
- ARM_COMPUTE_ERROR_ON(nullptr == output);
- win.validate();
-
- const auto input_ptr = static_cast<const IMultiImage *__restrict>(input);
- const auto output_ptr = static_cast<IImage *__restrict>(output);
-
- constexpr auto element_size = alpha ? 32 : 24;
- const auto out_stride = output_ptr->info()->strides_in_bytes().y();
- constexpr auto shift = uv ? 0 : 1;
-
- // UV's width and height are subsampled
- Window win_uv(win);
- win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win.x().step() / 2));
- win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
- win_uv.validate();
-
- Iterator in_y(input_ptr->plane(0), win);
- Iterator in_uv(input_ptr->plane(1), win_uv);
- Iterator out(output_ptr, win);
-
- execute_window_loop(win, [&](const Coordinates &)
- {
- const auto ta_y_top = vld2q_u8(in_y.ptr());
- const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
- const auto ta_uv = vld2q_u8(in_uv.ptr());
- //ta_y.val[0] = Y0 Y2 Y4 Y6 ...
- //ta_y.val[1] = Y1 Y3 Y5 Y7 ...
- //ta_uv.val[0] = U0 U2 U4 U6 ...
- //ta_uv.val[1] = V0 V2 V4 V6 ...
-
- // Convert the uint8x16x4_t to float32x4x4_t
- float32x4x4_t yvec_top = arm_compute::convert_uint8x16_to_float32x4x4(ta_y_top.val[0]);
- float32x4x4_t yyvec_top = arm_compute::convert_uint8x16_to_float32x4x4(ta_y_top.val[1]);
- float32x4x4_t yvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(ta_y_bottom.val[0]);
- float32x4x4_t yyvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(ta_y_bottom.val[1]);
- float32x4x4_t uvec = arm_compute::convert_uint8x16_to_float32x4x4(ta_uv.val[0 + shift]);
- float32x4x4_t vvec = arm_compute::convert_uint8x16_to_float32x4x4(ta_uv.val[1 - shift]);
-
- yuyv_to_rgb_calculation(yvec_top.val[0], uvec.val[0], yyvec_top.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha);
- yuyv_to_rgb_calculation(yvec_top.val[1], uvec.val[1], yyvec_top.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha);
- yuyv_to_rgb_calculation(yvec_top.val[2], uvec.val[2], yyvec_top.val[2], vvec.val[2], out.ptr() + 2 * element_size, alpha);
- yuyv_to_rgb_calculation(yvec_top.val[3], uvec.val[3], yyvec_top.val[3], vvec.val[3], out.ptr() + 3 * element_size, alpha);
-
- yuyv_to_rgb_calculation(yvec_bottom.val[0], uvec.val[0], yyvec_bottom.val[0], vvec.val[0], out.ptr() + out_stride + 0 * element_size, alpha);
- yuyv_to_rgb_calculation(yvec_bottom.val[1], uvec.val[1], yyvec_bottom.val[1], vvec.val[1], out.ptr() + out_stride + 1 * element_size, alpha);
- yuyv_to_rgb_calculation(yvec_bottom.val[2], uvec.val[2], yyvec_bottom.val[2], vvec.val[2], out.ptr() + out_stride + 2 * element_size, alpha);
- yuyv_to_rgb_calculation(yvec_bottom.val[3], uvec.val[3], yyvec_bottom.val[3], vvec.val[3], out.ptr() + out_stride + 3 * element_size, alpha);
- },
- in_y, in_uv, out);
-}
-
-/** Convert IYUV to RGB.
- *
- * @param[in] input Input IYUV data buffer.
- * @param[out] output Output RGB buffer.
- * @param[in] win Window for iterating the buffers.
- *
- */
-template <bool alpha>
-void colorconvert_iyuv_to_rgb(const void *__restrict input, void *__restrict output, const Window &win)
-{
- ARM_COMPUTE_ERROR_ON(nullptr == input);
- ARM_COMPUTE_ERROR_ON(nullptr == output);
- win.validate();
-
- const auto input_ptr = static_cast<const IMultiImage *__restrict>(input);
- const auto output_ptr = static_cast<IImage *__restrict>(output);
-
- constexpr auto element_size = alpha ? 32 : 24;
- const auto out_stride = output_ptr->info()->strides_in_bytes().y();
-
- // UV's width and height are subsampled
- Window win_uv(win);
- win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
- win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
- win_uv.validate();
-
- Iterator in_y(input_ptr->plane(0), win);
- Iterator in_u(input_ptr->plane(1), win_uv);
- Iterator in_v(input_ptr->plane(2), win_uv);
- Iterator out(output_ptr, win);
-
- execute_window_loop(win, [&](const Coordinates &)
- {
- const auto *y_top_ptr = in_y.ptr();
- const auto *y_bottom_ptr = in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y();
- const auto *u_ptr = in_u.ptr();
- const auto *v_ptr = in_v.ptr();
-
- // Work-around issue in gcc 9(>=) where vld2q might cause issues with register allocation
-#if defined(__arch64__)
- const auto ta0_y_top = vld1q_u8(y_top_ptr);
- const auto ta1_y_top = vld1q_u8(y_top_ptr + 16);
- const auto ta0_y_bottom = vld1q_u8(y_bottom_ptr);
- const auto ta1_y_bottom = vld1q_u8(y_bottom_ptr + 16);
- const auto ta_u = vld1q_u8(u_ptr);
- const auto ta_v = vld1q_u8(v_ptr);
-
- // Convert the uint8x16x4_t to float32x4x4_t
- float32x4x4_t yvec_top = arm_compute::convert_uint8x16_to_float32x4x4(vuzp1q_u8(ta0_y_top, ta1_y_top));
- float32x4x4_t yyvec_top = arm_compute::convert_uint8x16_to_float32x4x4(vuzp2q_u8(ta0_y_top, ta1_y_top));
- float32x4x4_t yvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(vuzp1q_u8(ta0_y_bottom, ta1_y_bottom));
- float32x4x4_t yyvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(vuzp2q_u8(ta0_y_bottom, ta1_y_bottom));
- float32x4x4_t uvec = arm_compute::convert_uint8x16_to_float32x4x4(ta_u);
- float32x4x4_t vvec = arm_compute::convert_uint8x16_to_float32x4x4(ta_v);
-#else /* defined(__arch64__) */
- const auto ta_y_top = vld2q_u8(y_top_ptr);
- const auto ta_y_bottom = vld2q_u8(y_bottom_ptr);
- const auto ta_u = vld1q_u8(u_ptr);
- const auto ta_v = vld1q_u8(v_ptr);
- //ta_y.val[0] = Y0 Y2 Y4 Y6 ...
- //ta_y.val[1] = Y1 Y3 Y5 Y7 ...
- //ta_u.val[0] = U0 U2 U4 U6 ...
- //ta_v.val[0] = V0 V2 V4 V6 ...
-
- // Convert the uint8x16x4_t to float32x4x4_t
- float32x4x4_t yvec_top = arm_compute::convert_uint8x16_to_float32x4x4(ta_y_top.val[0]);
- float32x4x4_t yyvec_top = arm_compute::convert_uint8x16_to_float32x4x4(ta_y_top.val[1]);
- float32x4x4_t yvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(ta_y_bottom.val[0]);
- float32x4x4_t yyvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(ta_y_bottom.val[1]);
- float32x4x4_t uvec = arm_compute::convert_uint8x16_to_float32x4x4(ta_u);
- float32x4x4_t vvec = arm_compute::convert_uint8x16_to_float32x4x4(ta_v);
-#endif /* defined(__arch64__) */
-
- yuyv_to_rgb_calculation(yvec_top.val[0], uvec.val[0], yyvec_top.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha);
- yuyv_to_rgb_calculation(yvec_top.val[1], uvec.val[1], yyvec_top.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha);
- yuyv_to_rgb_calculation(yvec_top.val[2], uvec.val[2], yyvec_top.val[2], vvec.val[2], out.ptr() + 2 * element_size, alpha);
- yuyv_to_rgb_calculation(yvec_top.val[3], uvec.val[3], yyvec_top.val[3], vvec.val[3], out.ptr() + 3 * element_size, alpha);
-
- yuyv_to_rgb_calculation(yvec_bottom.val[0], uvec.val[0], yyvec_bottom.val[0], vvec.val[0], out.ptr() + out_stride + 0 * element_size, alpha);
- yuyv_to_rgb_calculation(yvec_bottom.val[1], uvec.val[1], yyvec_bottom.val[1], vvec.val[1], out.ptr() + out_stride + 1 * element_size, alpha);
- yuyv_to_rgb_calculation(yvec_bottom.val[2], uvec.val[2], yyvec_bottom.val[2], vvec.val[2], out.ptr() + out_stride + 2 * element_size, alpha);
- yuyv_to_rgb_calculation(yvec_bottom.val[3], uvec.val[3], yyvec_bottom.val[3], vvec.val[3], out.ptr() + out_stride + 3 * element_size, alpha);
- },
- in_y, in_u, in_v, out);
-}
-
-/** Convert YUYV to NV12.
- *
- * @param[in] input Input YUYV data buffer.
- * @param[out] output Output NV12 buffer.
- * @param[in] win Window for iterating the buffers.
- *
- */
-template <bool yuyv>
-void colorconvert_yuyv_to_nv12(const void *__restrict input, void *__restrict output, const Window &win)
-{
- ARM_COMPUTE_ERROR_ON(nullptr == input);
- ARM_COMPUTE_ERROR_ON(nullptr == output);
- win.validate();
-
- const auto input_ptr = static_cast<const IImage *__restrict>(input);
- const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
-
- constexpr auto shift = yuyv ? 0 : 1;
-
- // NV12's UV's width and height are subsampled
- Window win_uv(win);
- win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
- win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
- win_uv.validate();
-
- Iterator in(input_ptr, win);
- Iterator out_y(output_ptr->plane(0), win);
- Iterator out_uv(output_ptr->plane(1), win_uv);
-
- execute_window_loop(win, [&](const Coordinates &)
- {
- const auto ta_top = vld4q_u8(in.ptr());
- const auto ta_bottom = vld4q_u8(in.ptr() + input_ptr->info()->strides_in_bytes().y());
- //ta.val[0] = Y0 Y2 Y4 Y6 ...
- //ta.val[1] = U0 U2 U4 U6 ...
- //ta.val[2] = Y1 Y3 Y5 Y7 ...
- //ta.val[3] = V0 V2 V4 V7 ...
-
- uint8x16x2_t yvec;
- yvec.val[0] = ta_top.val[0 + shift];
- yvec.val[1] = ta_top.val[2 + shift];
- vst2q_u8(out_y.ptr(), yvec);
-
- uint8x16x2_t yyvec;
- yyvec.val[0] = ta_bottom.val[0 + shift];
- yyvec.val[1] = ta_bottom.val[2 + shift];
- vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), yyvec);
-
- uint8x16x2_t uvvec;
- uvvec.val[0] = vhaddq_u8(ta_top.val[1 - shift], ta_bottom.val[1 - shift]);
- uvvec.val[1] = vhaddq_u8(ta_top.val[3 - shift], ta_bottom.val[3 - shift]);
- vst2q_u8(out_uv.ptr(), uvvec);
- },
- in, out_y, out_uv);
-}
-
-/** Convert IYUV to NV12.
- *
- * @param[in] input Input IYUV data buffer.
- * @param[out] output Output NV12 buffer.
- * @param[in] win Window for iterating the buffers.
- *
- */
-void colorconvert_iyuv_to_nv12(const void *__restrict input, void *__restrict output, const Window &win)
-{
- ARM_COMPUTE_ERROR_ON(nullptr == input);
- ARM_COMPUTE_ERROR_ON(nullptr == output);
- win.validate();
-
- const auto input_ptr = static_cast<const IMultiImage *__restrict>(input);
- const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
-
- // UV's width and height are subsampled
- Window win_uv(win);
- win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
- win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
- win_uv.validate();
-
- Iterator in_y(input_ptr->plane(0), win);
- Iterator in_u(input_ptr->plane(1), win_uv);
- Iterator in_v(input_ptr->plane(2), win_uv);
- Iterator out_y(output_ptr->plane(0), win);
- Iterator out_uv(output_ptr->plane(1), win_uv);
-
- execute_window_loop(win, [&](const Coordinates &)
- {
- const auto ta_y_top = vld2q_u8(in_y.ptr());
- const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
- uint8x16x2_t ta_uv;
- ta_uv.val[0] = vld1q_u8(in_u.ptr());
- ta_uv.val[1] = vld1q_u8(in_v.ptr());
- //ta_y.val[0] = Y0 Y2 Y4 Y6 ...
- //ta_y.val[1] = Y1 Y3 Y5 Y7 ...
- //ta_uv.val[0] = U0 U2 U4 U6 ...
- //ta_uv.val[1] = V0 V2 V4 V6 ...
-
- vst2q_u8(out_y.ptr(), ta_y_top);
- vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom);
- vst2q_u8(out_uv.ptr(), ta_uv);
- },
- in_y, in_u, in_v, out_y, out_uv);
-}
-
-/** Convert NV12 to IYUV.
- *
- * @param[in] input Input NV12 data buffer.
- * @param[out] output Output IYUV buffer.
- * @param[in] win Window for iterating the buffers.
- *
- */
-template <bool uv>
-void colorconvert_nv12_to_iyuv(const void *__restrict input, void *__restrict output, const Window &win)
-{
- ARM_COMPUTE_ERROR_ON(nullptr == input);
- ARM_COMPUTE_ERROR_ON(nullptr == output);
- win.validate();
-
- const auto input_ptr = static_cast<const IMultiImage *__restrict>(input);
- const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
-
- constexpr auto shift = uv ? 0 : 1;
-
- // UV's width and height are subsampled
- Window win_uv(win);
- win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
- win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
- win_uv.validate();
-
- Iterator in_y(input_ptr->plane(0), win);
- Iterator in_uv(input_ptr->plane(1), win_uv);
- Iterator out_y(output_ptr->plane(0), win);
- Iterator out_u(output_ptr->plane(1), win_uv);
- Iterator out_v(output_ptr->plane(2), win_uv);
-
- execute_window_loop(win, [&](const Coordinates &)
- {
- const auto ta_y_top = vld2q_u8(in_y.ptr());
- const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
- const auto ta_uv = vld2q_u8(in_uv.ptr());
- //ta_y.val[0] = Y0 Y2 Y4 Y6 ...
- //ta_y.val[1] = Y1 Y3 Y5 Y7 ...
- //ta_uv.val[0] = U0 U2 U4 U6 ...
- //ta_uv.val[1] = V0 V2 V4 V6 ...
-
- vst2q_u8(out_y.ptr(), ta_y_top);
- vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom);
- vst1q_u8(out_u.ptr(), ta_uv.val[0 + shift]);
- vst1q_u8(out_v.ptr(), ta_uv.val[1 - shift]);
- },
- in_y, in_uv, out_y, out_u, out_v);
-}
-
-/** Convert YUYV to IYUV.
- *
- * @param[in] input Input YUYV data buffer.
- * @param[out] output Output IYUV buffer.
- * @param[in] win Window for iterating the buffers.
- *
- */
-template <bool yuyv>
-void colorconvert_yuyv_to_iyuv(const void *__restrict input, void *__restrict output, const Window &win)
-{
- ARM_COMPUTE_ERROR_ON(nullptr == input);
- ARM_COMPUTE_ERROR_ON(nullptr == output);
- win.validate();
-
- const auto input_ptr = static_cast<const IImage *__restrict>(input);
- const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
-
- constexpr auto shift = yuyv ? 0 : 1;
-
- // Destination's UV's width and height are subsampled
- Window win_uv(win);
- win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
- win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
- win_uv.validate();
-
- Iterator in(input_ptr, win);
- Iterator out_y(output_ptr->plane(0), win);
- Iterator out_u(output_ptr->plane(1), win_uv);
- Iterator out_v(output_ptr->plane(2), win_uv);
-
- execute_window_loop(win, [&](const Coordinates &)
- {
- const auto ta_top = vld4q_u8(in.ptr());
- const auto ta_bottom = vld4q_u8(in.ptr() + input_ptr->info()->strides_in_bytes().y());
- //ta.val[0] = Y0 Y2 Y4 Y6 ...
- //ta.val[1] = U0 U2 U4 U6 ...
- //ta.val[2] = Y1 Y3 Y5 Y7 ...
- //ta.val[3] = V0 V2 V4 V7 ...
-
- uint8x16x2_t yvec;
- yvec.val[0] = ta_top.val[0 + shift];
- yvec.val[1] = ta_top.val[2 + shift];
- vst2q_u8(out_y.ptr(), yvec);
-
- uint8x16x2_t yyvec;
- yyvec.val[0] = ta_bottom.val[0 + shift];
- yyvec.val[1] = ta_bottom.val[2 + shift];
- vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), yyvec);
-
- uint8x16_t uvec;
- uvec = vhaddq_u8(ta_top.val[1 - shift], ta_bottom.val[1 - shift]);
- vst1q_u8(out_u.ptr(), uvec);
-
- uint8x16_t vvec;
- vvec = vhaddq_u8(ta_top.val[3 - shift], ta_bottom.val[3 - shift]);
- vst1q_u8(out_v.ptr(), vvec);
- },
- in, out_y, out_u, out_v);
-}
-
-/** Convert NV12 to YUV4.
- *
- * @param[in] input Input NV12 data buffer.
- * @param[out] output Output YUV4 buffer.
- * @param[in] win Window for iterating the buffers.
- *
- */
-template <bool uv>
-void colorconvert_nv12_to_yuv4(const void *__restrict input, void *__restrict output, const Window &win)
-{
- ARM_COMPUTE_ERROR_ON(nullptr == input);
- ARM_COMPUTE_ERROR_ON(nullptr == output);
- win.validate();
-
- const auto input_ptr = static_cast<const IMultiImage *__restrict>(input);
- const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
-
- constexpr auto shift = uv ? 0 : 1;
-
- // UV's width and height are subsampled
- Window win_uv(win);
- win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
- win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
- win_uv.validate();
-
- Iterator in_y(input_ptr->plane(0), win);
- Iterator in_uv(input_ptr->plane(1), win_uv);
- Iterator out_y(output_ptr->plane(0), win);
- Iterator out_u(output_ptr->plane(1), win);
- Iterator out_v(output_ptr->plane(2), win);
-
- execute_window_loop(win, [&](const Coordinates &)
- {
- const auto ta_y_top = vld2q_u8(in_y.ptr());
- const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
- const auto ta_uv = vld2q_u8(in_uv.ptr());
- //ta_y.val[0] = Y0 Y2 Y4 Y6 ...
- //ta_y.val[1] = Y1 Y3 Y5 Y7 ...
- //ta_uv.val[0] = U0 U2 U4 U6 ...
- //ta_uv.val[1] = V0 V2 V4 V6 ...
-
- vst2q_u8(out_y.ptr(), ta_y_top);
- vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom);
-
- uint8x16x2_t uvec;
- uvec.val[0] = ta_uv.val[0 + shift];
- uvec.val[1] = ta_uv.val[0 + shift];
- vst2q_u8(out_u.ptr(), uvec);
- vst2q_u8(out_u.ptr() + output_ptr->plane(1)->info()->strides_in_bytes().y(), uvec);
-
- uint8x16x2_t vvec;
- vvec.val[0] = ta_uv.val[1 - shift];
- vvec.val[1] = ta_uv.val[1 - shift];
- vst2q_u8(out_v.ptr(), vvec);
- vst2q_u8(out_v.ptr() + output_ptr->plane(2)->info()->strides_in_bytes().y(), vvec);
- },
- in_y, in_uv, out_y, out_u, out_v);
-}
-
-/** Convert IYUV to YUV4.
- *
- * @param[in] input Input IYUV data buffer.
- * @param[out] output Output YUV4 buffer.
- * @param[in] win Window for iterating the buffers.
- *
- */
-void colorconvert_iyuv_to_yuv4(const void *__restrict input, void *__restrict output, const Window &win)
-{
- ARM_COMPUTE_ERROR_ON(nullptr == input);
- ARM_COMPUTE_ERROR_ON(nullptr == output);
- win.validate();
-
- const auto input_ptr = static_cast<const IMultiImage *__restrict>(input);
- const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
-
- // UV's width and height are subsampled
- Window win_uv(win);
- win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
- win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
- win_uv.validate();
-
- Iterator in_y(input_ptr->plane(0), win);
- Iterator in_u(input_ptr->plane(1), win_uv);
- Iterator in_v(input_ptr->plane(2), win_uv);
- Iterator out_y(output_ptr->plane(0), win);
- Iterator out_u(output_ptr->plane(1), win);
- Iterator out_v(output_ptr->plane(2), win);
-
- execute_window_loop(win, [&](const Coordinates &)
- {
- const auto ta_y_top = vld2q_u8(in_y.ptr());
- const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
- const auto ta_u = vld1q_u8(in_u.ptr());
- const auto ta_v = vld1q_u8(in_v.ptr());
- //ta_y.val[0] = Y0 Y2 Y4 Y6 ...
- //ta_y.val[1] = Y1 Y3 Y5 Y7 ...
- //ta_u = U0 U2 U4 U6 ...
- //ta_v = V0 V2 V4 V6 ...
-
- vst2q_u8(out_y.ptr(), ta_y_top);
- vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom);
-
- uint8x16x2_t uvec;
- uvec.val[0] = ta_u;
- uvec.val[1] = ta_u;
- vst2q_u8(out_u.ptr(), uvec);
- vst2q_u8(out_u.ptr() + output_ptr->plane(1)->info()->strides_in_bytes().y(), uvec);
-
- uint8x16x2_t vvec;
- vvec.val[0] = ta_v;
- vvec.val[1] = ta_v;
- vst2q_u8(out_v.ptr(), vvec);
- vst2q_u8(out_v.ptr() + output_ptr->plane(2)->info()->strides_in_bytes().y(), vvec);
- },
- in_y, in_u, in_v, out_y, out_u, out_v);
-}
-
-/** Convert RGB to NV12.
- *
- * @param[in] input Input RGB data buffer.
- * @param[out] output Output NV12 buffer.
- * @param[in] win Window for iterating the buffers.
- *
- */
-template <bool alpha>
-void colorconvert_rgb_to_nv12(const void *__restrict input, void *__restrict output, const Window &win)
-{
- ARM_COMPUTE_ERROR_ON(nullptr == input);
- ARM_COMPUTE_ERROR_ON(nullptr == output);
- win.validate();
-
- const auto input_ptr = static_cast<const IImage *__restrict>(input);
- const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
-
- // UV's width and height are subsampled
- Window win_uv(win);
- win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
- win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
- win_uv.validate();
-
- Iterator in(input_ptr, win);
- Iterator out_y(output_ptr->plane(0), win);
- Iterator out_uv(output_ptr->plane(1), win_uv);
-
- execute_window_loop(win, [&](const Coordinates &)
- {
- const auto ta_rgb_top = load_rgb(in.ptr(), alpha);
- const auto ta_rgb_bottom = load_rgb(in.ptr() + input_ptr->info()->strides_in_bytes().y(), alpha);
- //ta_rgb.val[0] = R0 R1 R2 R3 ...
- //ta_rgb.val[1] = G0 G1 G2 G3 ...
- //ta_rgb.val[2] = B0 B1 B2 B3 ...
-
- store_rgb_to_nv12(ta_rgb_top.val[0], ta_rgb_top.val[1], ta_rgb_top.val[2],
- ta_rgb_bottom.val[0], ta_rgb_bottom.val[1], ta_rgb_bottom.val[2],
- out_y.ptr(), out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(),
- out_uv.ptr());
- },
- in, out_y, out_uv);
-}
-
-/** Convert RGB to IYUV.
- *
- * @param[in] input Input RGB data buffer.
- * @param[out] output Output IYUV buffer.
- * @param[in] win Window for iterating the buffers.
- *
- */
-template <bool alpha>
-void colorconvert_rgb_to_iyuv(const void *__restrict input, void *__restrict output, const Window &win)
-{
- ARM_COMPUTE_ERROR_ON(nullptr == input);
- ARM_COMPUTE_ERROR_ON(nullptr == output);
- win.validate();
-
- const auto input_ptr = static_cast<const IImage *__restrict>(input);
- const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
-
- // UV's width and height are subsampled
- Window win_uv(win);
- win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
- win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
- win_uv.validate();
-
- Iterator in(input_ptr, win);
- Iterator out_y(output_ptr->plane(0), win);
- Iterator out_u(output_ptr->plane(1), win_uv);
- Iterator out_v(output_ptr->plane(2), win_uv);
-
- execute_window_loop(win, [&](const Coordinates &)
- {
- const auto ta_rgb_top = load_rgb(in.ptr(), alpha);
- const auto ta_rgb_bottom = load_rgb(in.ptr() + input_ptr->info()->strides_in_bytes().y(), alpha);
- //ta_rgb.val[0] = R0 R1 R2 R3 ...
- //ta_rgb.val[1] = G0 G1 G2 G3 ...
- //ta_rgb.val[2] = B0 B1 B2 B3 ...
-
- store_rgb_to_iyuv(ta_rgb_top.val[0], ta_rgb_top.val[1], ta_rgb_top.val[2],
- ta_rgb_bottom.val[0], ta_rgb_bottom.val[1], ta_rgb_bottom.val[2],
- out_y.ptr(), out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(),
- out_u.ptr(), out_v.ptr());
- },
- in, out_y, out_u, out_v);
-}
-
-/** Convert RGB to YUV4.
- *
- * @param[in] input Input RGB data buffer.
- * @param[out] output Output YUV4 buffer.
- * @param[in] win Window for iterating the buffers.
- *
- */
-template <bool alpha>
-void colorconvert_rgb_to_yuv4(const void *__restrict input, void *__restrict output, const Window &win)
-{
- ARM_COMPUTE_ERROR_ON(nullptr == input);
- ARM_COMPUTE_ERROR_ON(nullptr == output);
- win.validate();
-
- const auto input_ptr = static_cast<const IImage *__restrict>(input);
- const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
-
- Iterator in(input_ptr, win);
- Iterator out_y(output_ptr->plane(0), win);
- Iterator out_u(output_ptr->plane(1), win);
- Iterator out_v(output_ptr->plane(2), win);
-
- execute_window_loop(win, [&](const Coordinates &)
- {
- const auto ta_rgb = load_rgb(in.ptr(), alpha);
- //ta_rgb.val[0] = R0 R1 R2 R3 ...
- //ta_rgb.val[1] = G0 G1 G2 G3 ...
- //ta_rgb.val[2] = B0 B1 B2 B3 ...
-
- store_rgb_to_yuv4(ta_rgb.val[0], ta_rgb.val[1], ta_rgb.val[2],
- out_y.ptr(), out_u.ptr(), out_v.ptr());
- },
- in, out_y, out_u, out_v);
-}
-} // namespace arm_compute
diff --git a/arm_compute/core/NEON/NEFixedPoint.inl b/arm_compute/core/NEON/NEFixedPoint.inl
deleted file mode 100644
index 14e51d825c..0000000000
--- a/arm_compute/core/NEON/NEFixedPoint.inl
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (c) 2017-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <array>
-#include <limits>
-
-namespace arm_compute
-{
-#ifndef DOXYGEN_SKIP_THIS
-
-inline float32x4x2_t vmax2q_f32(float32x4x2_t a, float32x4x2_t b)
-{
- float32x4x2_t res =
- {
- {
- vmaxq_f32(a.val[0], b.val[0]),
- vmaxq_f32(a.val[1], b.val[1])
- }
- };
- return res;
-}
-#endif /* DOXYGEN_SKIP_THIS */
-} // namespace arm_compute
diff --git a/arm_compute/core/NEON/NEKernels.h b/arm_compute/core/NEON/NEKernels.h
deleted file mode 100644
index 38701f434a..0000000000
--- a/arm_compute/core/NEON/NEKernels.h
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEKERNELS_H
-#define ARM_COMPUTE_NEKERNELS_H
-
-/* Header regrouping all the NEON kernels */
-#include "arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h"
-#include "arm_compute/core/NEON/kernels/NEAccumulateKernel.h"
-#include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEBatchToSpaceLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h"
-#include "arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h"
-#include "arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h"
-#include "arm_compute/core/NEON/kernels/NEBitwiseXorKernel.h"
-#include "arm_compute/core/NEON/kernels/NEBoundingBoxTransformKernel.h"
-#include "arm_compute/core/NEON/kernels/NEBox3x3Kernel.h"
-#include "arm_compute/core/NEON/kernels/NECannyEdgeKernel.h"
-#include "arm_compute/core/NEON/kernels/NEChannelCombineKernel.h"
-#include "arm_compute/core/NEON/kernels/NEChannelExtractKernel.h"
-#include "arm_compute/core/NEON/kernels/NEChannelShuffleLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NECol2ImKernel.h"
-#include "arm_compute/core/NEON/kernels/NEColorConvertKernel.h"
-#include "arm_compute/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h"
-#include "arm_compute/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h"
-#include "arm_compute/core/NEON/kernels/NEConvolutionKernel.h"
-#include "arm_compute/core/NEON/kernels/NECopyKernel.h"
-#include "arm_compute/core/NEON/kernels/NECropKernel.h"
-#include "arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h"
-#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDerivativeKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDilateKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h"
-#include "arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h"
-#include "arm_compute/core/NEON/kernels/NEElementwiseUnaryKernel.h"
-#include "arm_compute/core/NEON/kernels/NEErodeKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFFTDigitReverseKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFFTRadixStageKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFFTScaleKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFastCornersKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillArrayKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillInnerBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFloorKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFuseBatchNormalizationKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMAssemblyBaseKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGatherKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h"
-#include "arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h"
-#include "arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h"
-#include "arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h"
-#include "arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h"
-#include "arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEHistogramKernel.h"
-#include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h"
-#include "arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEIntegralImageKernel.h"
-#include "arm_compute/core/NEON/kernels/NEL2NormalizeLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NELKTrackerKernel.h"
-#include "arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h"
-#include "arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h"
-#include "arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h"
-#include "arm_compute/core/NEON/kernels/NEMeanStdDevNormalizationKernel.h"
-#include "arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h"
-#include "arm_compute/core/NEON/kernels/NEMemsetKernel.h"
-#include "arm_compute/core/NEON/kernels/NEMinMaxLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h"
-#include "arm_compute/core/NEON/kernels/NENonLinearFilterKernel.h"
-#include "arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h"
-#include "arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEPadLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEPermuteKernel.h"
-#include "arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h"
-#include "arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEPriorBoxLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h"
-#include "arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEROIAlignLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEROIPoolingLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NERangeKernel.h"
-#include "arm_compute/core/NEON/kernels/NEReductionOperationKernel.h"
-#include "arm_compute/core/NEON/kernels/NERemapKernel.h"
-#include "arm_compute/core/NEON/kernels/NEReorgLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEReverseKernel.h"
-#include "arm_compute/core/NEON/kernels/NEScaleKernel.h"
-#include "arm_compute/core/NEON/kernels/NEScharr3x3Kernel.h"
-#include "arm_compute/core/NEON/kernels/NESelectKernel.h"
-#include "arm_compute/core/NEON/kernels/NESobel3x3Kernel.h"
-#include "arm_compute/core/NEON/kernels/NESobel5x5Kernel.h"
-#include "arm_compute/core/NEON/kernels/NESobel7x7Kernel.h"
-#include "arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NESpaceToBatchLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEStackLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEStridedSliceKernel.h"
-#include "arm_compute/core/NEON/kernels/NETableLookupKernel.h"
-#include "arm_compute/core/NEON/kernels/NEThresholdKernel.h"
-#include "arm_compute/core/NEON/kernels/NETileKernel.h"
-#include "arm_compute/core/NEON/kernels/NETransposeKernel.h"
-#include "arm_compute/core/NEON/kernels/NEUpsampleLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEWarpKernel.h"
-#include "arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h"
-#include "arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEYOLOLayerKernel.h"
-
-#endif /* ARM_COMPUTE_NEKERNELS_H */
diff --git a/arm_compute/core/NEON/NEMath.h b/arm_compute/core/NEON/NEMath.h
deleted file mode 100644
index 8827bbf459..0000000000
--- a/arm_compute/core/NEON/NEMath.h
+++ /dev/null
@@ -1,307 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEMATH_H
-#define ARM_COMPUTE_NEMATH_H
-
-#include <arm_neon.h>
-#include <array>
-
-namespace arm_compute
-{
-/** Calculate floor of a vector.
- *
- * @param[in] val Input vector value in F32 format.
- *
- * @return The calculated floor vector.
- */
-float32x4_t vfloorq_f32(float32x4_t val);
-
-/** Calculate round value of a vector to nearest with ties to even.
- *
- * @param[in] val Input vector value in F32 format.
- *
- * @return The calculated round vector.
- */
-float32x4_t vroundq_rte_f32(float32x4_t val);
-
-/** Calculate inverse square root.
- *
- * @param[in] x Input value.
- *
- * @return The calculated inverse square root.
- */
-float32x2_t vinvsqrt_f32(float32x2_t x);
-
-/** Calculate inverse square root.
- *
- * @param[in] x Input value.
- *
- * @return The calculated inverse square root.
- */
-float32x4_t vinvsqrtq_f32(float32x4_t x);
-
-/** Calculate reciprocal.
- *
- * @param[in] x Input value.
- *
- * @return The calculated reciprocal.
- */
-float32x2_t vinv_f32(float32x2_t x);
-
-/** Calculate reciprocal.
- *
- * @param[in] x Input value.
- *
- * @return The calculated reciprocal.
- */
-float32x4_t vinvq_f32(float32x4_t x);
-
-/** Perform a 7th degree polynomial approximation using Estrin's method.
- *
- * @param[in] x Input vector value in F32 format.
- * @param[in] coeffs Polynomial coefficients table.
- *
- * @return The calculated approximation.
- */
-float32x4_t vtaylor_polyq_f32(float32x4_t x, const std::array<float32x4_t, 8> &coeffs);
-
-/** Calculate exponential
- *
- * @param[in] x Input vector value in F32 format.
- *
- * @return The calculated exponent.
- */
-float32x4_t vexpq_f32(float32x4_t x);
-
-/** Calculate logarithm
- *
- * @param[in] x Input vector value in F32 format.
- *
- * @return The calculated logarithm.
- */
-float32x4_t vlogq_f32(float32x4_t x);
-
-/** Calculate hyperbolic tangent.
- *
- * tanh(x) = (e^2x - 1)/(e^2x + 1)
- *
- * @note We clamp x to [-5,5] to avoid overflowing issues.
- *
- * @param[in] val Input vector value in F32 format.
- *
- * @return The calculated Hyperbolic Tangent.
- */
-float32x4_t vtanhq_f32(float32x4_t val);
-
-/** Calculate n power of a number.
- *
- * pow(x,n) = e^(n*log(x))
- *
- * @param[in] val Input vector value in F32 format.
- * @param[in] n Powers to raise the input to.
- *
- * @return The calculated power.
- */
-float32x4_t vpowq_f32(float32x4_t val, float32x4_t n);
-
-/** Round to the nearest division by a power-of-two using exponent
- *
- * @note This function calculates the following expression: (x + 2^n -1 ) / 2^n where n = exponent
- *
- * @param[in] x Vector of 4 elements
- * @param[in] exponent Vector of 4 elements with integer value used to round to nearest division by a power-of-two
- *
- * @return the nearest division by a power-of-two using exponent
- */
-int32x4_t rounding_divide_by_pow2(int32x4_t x, int32x4_t exponent);
-
-/** Round to the nearest division by a power-of-two using exponent
- *
- * @note This function calculates the following expression: (x + 2^n -1 ) / 2^n where n = exponent
- *
- * @param[in] x Vector of 4 elements
- * @param[in] exponent Integer value used to round to nearest division by a power-of-two
- *
- * @return the nearest division by a power-of-two using exponent
- */
-int32x4_t rounding_divide_by_pow2(int32x4_t x, int exponent);
-
-/** Round to the nearest division by a power-of-two using exponent
- *
- * @note This function calculates the following expression: (x + 2^n -1 ) / 2^n where n = exponent
- *
- * @param[in] x Element to divide.
- * @param[in] exponent Integer value used to round to nearest division by a power-of-two
- *
- * @return the nearest division by a power-of-two using exponent
- */
-int32_t rounding_divide_by_pow2(int32_t x, int exponent);
-
-/** Converts from uint8x16 to float32x4x4_t
- *
- * @param[in] in Vector of uint8 to be converted
- *
- * @return Converted vector of float
- */
-float32x4x4_t convert_uint8x16_to_float32x4x4(const uint8x16_t &in);
-
-/** Converts from int8x16 to float32x4x4_t
- *
- * @param[in] in Vector of int8 to be converted
- *
- * @return Converted vector of float
- */
-float32x4x4_t convert_int8x16_to_float32x4x4(const int8x16_t &in);
-
-/** Converts to float32x4x4_t from the specified templated 16 elements vectors
- *
- * @param[in] in Vector of float to be converted
- *
- * @return Converted vector of float
- */
-template <typename T>
-float32x4x4_t convert_to_float32x4x4(const T &in);
-
-/** Converts from two float32x4x3_t to just one uint8x8x3_t
- *
- * @param[in] in1 First input vector of float to be converted
- * @param[in] in2 Second input vector of float to be converted
- * @param[out] out Converted output vector uint8 to store the result
- */
-void convert_float32x4x3_to_uint8x8x3(const float32x4x3_t &in1, const float32x4x3_t &in2, uint8x8x3_t &out);
-
-/** Converts from two float32x4x4_t to just one uint8x16_t
- *
- * @param[in] in Vector of float to be converted
- * @param[out] out Converted vector of uint8 to store the result
- */
-void convert_float32x4x4_to_uint8x16(const float32x4x4_t &in, uint8x16_t &out);
-
-/** Converts from float32x4x4_t to just one int8x16_t
- *
- * @param[in] in Vector of float to be converted
- * @param[out] out Converted vector of uint8 to store the result
- */
-void convert_float32x4x4_to_int8x16(const float32x4x4_t &in, int8x16_t &out);
-
-/** Calculate sine.
- *
- * @param[in] val Input vector value in radians, F32 format.
- *
- * @return The calculated sine.
- */
-float32x4_t vsinq_f32(float32x4_t val);
-
-/** Calculate sine.
- *
- * @param[in] val Input vector value in radians, F32 format.
- *
- * @return The calculated sine.
- */
-float32x2_t vsin_f32(float32x2_t val);
-
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-/** Calculate hyperbolic tangent.
- *
- * tanh(x) = (e^2x - 1)/(e^2x + 1)
- *
- * @note We clamp x to [-5,5] to avoid overflowing issues.
- *
- * @param[in] val Input vector value in F16 format.
- *
- * @return The calculated Hyperbolic Tangent.
- */
-float16x8_t vtanhq_f16(float16x8_t val);
-
-/** Calculate round value of a vector to nearest with ties to even.
- *
- * @param[in] val Input vector value in F16 format.
- *
- * @return The calculated round vector.
- */
-float16x8_t vroundq_rte_f16(float16x8_t val);
-
-/** Calculate reciprocal.
- *
- * @param[in] x Input value.
- *
- * @return The calculated reciprocal.
- */
-float16x4_t vinv_f16(float16x4_t x);
-
-/** Calculate reciprocal.
- *
- * @param[in] x Input value.
- *
- * @return The calculated reciprocal.
- */
-float16x8_t vinvq_f16(float16x8_t x);
-
-/** Calculate inverse square root.
- *
- * @param[in] x Input value.
- *
- * @return The calculated inverse square root.
- */
-float16x4_t vinvsqrt_f16(float16x4_t x);
-
-/** Calculate inverse square root.
- *
- * @param[in] x Input value.
- *
- * @return The calculated inverse square root.
- */
-float16x8_t vinvsqrtq_f16(float16x8_t x);
-
-/** Calculate exponential
- *
- * @param[in] x Input vector value in F16 format.
- *
- * @return The calculated exponent.
- */
-float16x8_t vexpq_f16(float16x8_t x);
-
-/** Calculate n power of a number.
- *
- * pow(x,n) = e^(n*log(x))
- *
- * @param[in] val Input vector value in F16 format.
- * @param[in] n Powers to raise the input to.
- *
- * @return The calculated power.
- */
-float16x8_t vpowq_f16(float16x8_t val, float16x8_t n);
-
-/** Calculate sine.
- *
- * @param[in] val Input vector value in radians, F16 format.
- *
- * @return The calculated sine.
- */
-float16x8_t vsinq_f16(float16x8_t val);
-
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-} // namespace arm_compute
-#include "arm_compute/core/NEON/NEMath.inl"
-#endif /* ARM_COMPUTE_NEMATH_H */
diff --git a/arm_compute/core/NEON/NEMath.inl b/arm_compute/core/NEON/NEMath.inl
deleted file mode 100644
index 032bfde238..0000000000
--- a/arm_compute/core/NEON/NEMath.inl
+++ /dev/null
@@ -1,529 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <cmath>
-#include <limits>
-
-#ifndef M_PI
-#define M_PI (3.14159265358979323846)
-#endif // M_PI
-
-namespace arm_compute
-{
-/** Exponent polynomial coefficients */
-const std::array<float32x4_t, 8> exp_tab =
-{
- {
- vdupq_n_f32(1.f),
- vdupq_n_f32(0.0416598916054f),
- vdupq_n_f32(0.500000596046f),
- vdupq_n_f32(0.0014122662833f),
- vdupq_n_f32(1.00000011921f),
- vdupq_n_f32(0.00833693705499f),
- vdupq_n_f32(0.166665703058f),
- vdupq_n_f32(0.000195780929062f),
- }
-};
-
-/** Logarithm polynomial coefficients */
-const std::array<float32x4_t, 8> log_tab =
-{
- {
- vdupq_n_f32(-2.29561495781f),
- vdupq_n_f32(-2.47071170807f),
- vdupq_n_f32(-5.68692588806f),
- vdupq_n_f32(-0.165253549814f),
- vdupq_n_f32(5.17591238022f),
- vdupq_n_f32(0.844007015228f),
- vdupq_n_f32(4.58445882797f),
- vdupq_n_f32(0.0141278216615f),
- }
-};
-
-/** Sin polynomial coefficients */
-constexpr float te_sin_coeff2 = 0.166666666666f; // 1/(2*3)
-constexpr float te_sin_coeff3 = 0.05f; // 1/(4*5)
-constexpr float te_sin_coeff4 = 0.023809523810f; // 1/(6*7)
-constexpr float te_sin_coeff5 = 0.013888888889f; // 1/(8*9)
-
-#ifndef DOXYGEN_SKIP_THIS
-inline float32x4_t vfloorq_f32(float32x4_t val)
-{
- static const float32x4_t CONST_1 = vdupq_n_f32(1.f);
-
- const int32x4_t z = vcvtq_s32_f32(val);
- const float32x4_t r = vcvtq_f32_s32(z);
-
- return vbslq_f32(vcgtq_f32(r, val), vsubq_f32(r, CONST_1), r);
-}
-
-inline float32x4_t vroundq_rte_f32(float32x4_t val)
-{
-#ifdef __aarch64__
- return vrndnq_f32(val);
-#else // __aarch64__
- static const float32x4_t CONST_HALF_FLOAT = vdupq_n_f32(0.5f);
- static const float32x4_t CONST_1_FLOAT = vdupq_n_f32(1.f);
- static const int32x4_t CONST_1_INT = vdupq_n_s32(1);
- const float32x4_t floor_val = vfloorq_f32(val);
- const float32x4_t diff = vsubq_f32(val, floor_val);
-
- /*
- * Select the floor value when (diff<0.5 || (diff==0.5 && floor_val%2==0).
- * This condition is checked by vorrq_u32(vcltq_f32(diff, CONST_HALF_FLOAT) ,vandq_u32(vceqq_f32(diff, CONST_HALF_FLOAT) , vmvnq_u32(vtstq_s32(vandq_s32(vcvtq_s32_f32(floor_val), CONST_1_INT),CONST_1_INT))))
- */
-
- return vbslq_f32(vorrq_u32(vcltq_f32(diff, CONST_HALF_FLOAT), vandq_u32(vceqq_f32(diff, CONST_HALF_FLOAT), vmvnq_u32(vtstq_s32(vandq_s32(vcvtq_s32_f32(floor_val), CONST_1_INT), CONST_1_INT)))),
- floor_val, vaddq_f32(floor_val, CONST_1_FLOAT));
-#endif // __aarch64__
-}
-
-inline float32x2_t vinvsqrt_f32(float32x2_t x)
-{
- float32x2_t sqrt_reciprocal = vrsqrte_f32(x);
- sqrt_reciprocal = vmul_f32(vrsqrts_f32(vmul_f32(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal);
- sqrt_reciprocal = vmul_f32(vrsqrts_f32(vmul_f32(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal);
-
- return sqrt_reciprocal;
-}
-
-inline float32x4_t vinvsqrtq_f32(float32x4_t x)
-{
- float32x4_t sqrt_reciprocal = vrsqrteq_f32(x);
- sqrt_reciprocal = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal);
- sqrt_reciprocal = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal);
-
- return sqrt_reciprocal;
-}
-
-inline float32x2_t vinv_f32(float32x2_t x)
-{
- float32x2_t recip = vrecpe_f32(x);
- recip = vmul_f32(vrecps_f32(x, recip), recip);
- recip = vmul_f32(vrecps_f32(x, recip), recip);
- return recip;
-}
-
-inline float32x4_t vinvq_f32(float32x4_t x)
-{
- float32x4_t recip = vrecpeq_f32(x);
- recip = vmulq_f32(vrecpsq_f32(x, recip), recip);
- recip = vmulq_f32(vrecpsq_f32(x, recip), recip);
- return recip;
-}
-
-inline float32x4_t vtaylor_polyq_f32(float32x4_t x, const std::array<float32x4_t, 8> &coeffs)
-{
- float32x4_t A = vmlaq_f32(coeffs[0], coeffs[4], x);
- float32x4_t B = vmlaq_f32(coeffs[2], coeffs[6], x);
- float32x4_t C = vmlaq_f32(coeffs[1], coeffs[5], x);
- float32x4_t D = vmlaq_f32(coeffs[3], coeffs[7], x);
- float32x4_t x2 = vmulq_f32(x, x);
- float32x4_t x4 = vmulq_f32(x2, x2);
- float32x4_t res = vmlaq_f32(vmlaq_f32(A, B, x2), vmlaq_f32(C, D, x2), x4);
- return res;
-}
-
-inline float32x4_t vexpq_f32(float32x4_t x)
-{
- static const float32x4_t CONST_LN2 = vdupq_n_f32(0.6931471805f); // ln(2)
- static const float32x4_t CONST_INV_LN2 = vdupq_n_f32(1.4426950408f); // 1/ln(2)
- static const float32x4_t CONST_INF = vdupq_n_f32(std::numeric_limits<float>::infinity());
- static const float32x4_t CONST_MAX_INPUT = vdupq_n_f32(88.7f);
- static const float32x4_t CONST_0 = vdupq_n_f32(0.f);
- static const int32x4_t CONST_NEGATIVE_126 = vdupq_n_s32(-126);
-
- // Perform range reduction [-log(2),log(2)]
- int32x4_t m = vcvtq_s32_f32(vmulq_f32(x, CONST_INV_LN2));
- float32x4_t val = vmlsq_f32(x, vcvtq_f32_s32(m), CONST_LN2);
-
- // Polynomial Approximation
- float32x4_t poly = vtaylor_polyq_f32(val, exp_tab);
-
- // Reconstruct
- poly = vreinterpretq_f32_s32(vqaddq_s32(vreinterpretq_s32_f32(poly), vqshlq_n_s32(m, 23)));
- poly = vbslq_f32(vcltq_s32(m, CONST_NEGATIVE_126), CONST_0, poly); // Handle underflow
- poly = vbslq_f32(vcgtq_f32(x, CONST_MAX_INPUT), CONST_INF, poly); // Handle overflow
-
- return poly;
-}
-
-inline float32x4_t vlogq_f32(float32x4_t x)
-{
- static const int32x4_t CONST_127 = vdupq_n_s32(127); // 127
- static const float32x4_t CONST_LN2 = vdupq_n_f32(0.6931471805f); // ln(2)
-
- // Extract exponent
- int32x4_t m = vsubq_s32(vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_f32(x), 23)), CONST_127);
- float32x4_t val = vreinterpretq_f32_s32(vsubq_s32(vreinterpretq_s32_f32(x), vshlq_n_s32(m, 23)));
-
- // Polynomial Approximation
- float32x4_t poly = vtaylor_polyq_f32(val, log_tab);
-
- // Reconstruct
- poly = vmlaq_f32(poly, vcvtq_f32_s32(m), CONST_LN2);
-
- return poly;
-}
-
-inline float32x4_t vtanhq_f32(float32x4_t val)
-{
- static const float32x4_t CONST_1 = vdupq_n_f32(1.f);
- static const float32x4_t CONST_2 = vdupq_n_f32(2.f);
- static const float32x4_t CONST_MIN_TANH = vdupq_n_f32(-10.f);
- static const float32x4_t CONST_MAX_TANH = vdupq_n_f32(10.f);
-
- float32x4_t x = vminq_f32(vmaxq_f32(val, CONST_MIN_TANH), CONST_MAX_TANH);
- float32x4_t exp2x = vexpq_f32(vmulq_f32(CONST_2, x));
- float32x4_t num = vsubq_f32(exp2x, CONST_1);
- float32x4_t den = vaddq_f32(exp2x, CONST_1);
- float32x4_t tanh = vmulq_f32(num, vinvq_f32(den));
- return tanh;
-}
-
-inline float32x4_t vpowq_f32(float32x4_t val, float32x4_t n)
-{
- return vexpq_f32(vmulq_f32(n, vlogq_f32(val)));
-}
-
-inline float32x4_t vsinq_f32(float32x4_t val)
-{
- const float32x4_t pi_v = vdupq_n_f32(M_PI);
- const float32x4_t pio2_v = vdupq_n_f32(M_PI / 2);
- const float32x4_t ipi_v = vdupq_n_f32(1 / M_PI);
-
- //Find positive or negative
- const int32x4_t c_v = vabsq_s32(vcvtq_s32_f32(vmulq_f32(val, ipi_v)));
- const uint32x4_t sign_v = vcleq_f32(val, vdupq_n_f32(0));
- const uint32x4_t odd_v = vandq_u32(vreinterpretq_u32_s32(c_v), vdupq_n_u32(1));
-
- uint32x4_t neg_v = veorq_u32(odd_v, sign_v);
-
- //Modulus a - (n * int(a*(1/n)))
- float32x4_t ma = vsubq_f32(vabsq_f32(val), vmulq_f32(pi_v, vcvtq_f32_s32(c_v)));
- const uint32x4_t reb_v = vcgeq_f32(ma, pio2_v);
-
- //Rebase a between 0 and pi/2
- ma = vbslq_f32(reb_v, vsubq_f32(pi_v, ma), ma);
-
- //Taylor series
- const float32x4_t ma2 = vmulq_f32(ma, ma);
-
- //2nd elem: x^3 / 3!
- float32x4_t elem = vmulq_f32(vmulq_f32(ma, ma2), vdupq_n_f32(te_sin_coeff2));
- float32x4_t res = vsubq_f32(ma, elem);
-
- //3rd elem: x^5 / 5!
- elem = vmulq_f32(vmulq_f32(elem, ma2), vdupq_n_f32(te_sin_coeff3));
- res = vaddq_f32(res, elem);
-
- //4th elem: x^7 / 7!float32x2_t vsin_f32(float32x2_t val)
- elem = vmulq_f32(vmulq_f32(elem, ma2), vdupq_n_f32(te_sin_coeff4));
- res = vsubq_f32(res, elem);
-
- //5th elem: x^9 / 9!
- elem = vmulq_f32(vmulq_f32(elem, ma2), vdupq_n_f32(te_sin_coeff5));
- res = vaddq_f32(res, elem);
-
- //Change of sign
- neg_v = vshlq_n_u32(neg_v, 31);
- res = vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(res), neg_v));
- return res;
-}
-
-inline float32x2_t vsin_f32(float32x2_t val)
-{
- const float32x2_t pi_v = vdup_n_f32(M_PI);
- const float32x2_t pio2_v = vdup_n_f32(M_PI / 2);
- const float32x2_t ipi_v = vdup_n_f32(1 / M_PI);
-
- //Find positive or negative
- const int32x2_t c_v = vabs_s32(vcvt_s32_f32(vmul_f32(val, ipi_v)));
- const uint32x2_t sign_v = vcle_f32(val, vdup_n_f32(0));
- const uint32x2_t odd_v = vand_u32(vreinterpret_u32_s32(c_v), vdup_n_u32(1));
-
- uint32x2_t neg_v = veor_u32(odd_v, sign_v);
-
- //Modulus a - (n * int(a*(1/n)))
- float32x2_t ma = vsub_f32(vabs_f32(val), vmul_f32(pi_v, vcvt_f32_s32(c_v)));
- const uint32x2_t reb_v = vcge_f32(ma, pio2_v);
-
- //Rebase a between 0 and pi/2
- ma = vbsl_f32(reb_v, vsub_f32(pi_v, ma), ma);
-
- //Taylor series
- const float32x2_t ma2 = vmul_f32(ma, ma);
-
- //2nd elem: x^3 / 3!
- float32x2_t elem = vmul_f32(vmul_f32(ma, ma2), vdup_n_f32(te_sin_coeff2));
- float32x2_t res = vsub_f32(ma, elem);
-
- //3rd elem: x^5 / 5!
- elem = vmul_f32(vmul_f32(elem, ma2), vdup_n_f32(te_sin_coeff3));
- res = vadd_f32(res, elem);
-
- //4th elem: x^7 / 7!float32x2_t vsin_f32(float32x2_t val)
- elem = vmul_f32(vmul_f32(elem, ma2), vdup_n_f32(te_sin_coeff4));
- res = vsub_f32(res, elem);
-
- //5th elem: x^9 / 9!
- elem = vmul_f32(vmul_f32(elem, ma2), vdup_n_f32(te_sin_coeff5));
- res = vadd_f32(res, elem);
-
- //Change of sign
- neg_v = vshl_n_u32(neg_v, 31);
- res = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(res), neg_v));
- return res;
-}
-
-#endif /* DOXYGEN_SKIP_THIS */
-
-inline int32x4_t rounding_divide_by_pow2(int32x4_t x, int32x4_t exponent)
-{
- const int32x4_t shift_vec = vnegq_s32(exponent);
- const int32x4_t fixup = vshrq_n_s32(vandq_s32(x, shift_vec), 31);
- const int32x4_t fixed_up_x = vqaddq_s32(x, fixup);
- return vrshlq_s32(fixed_up_x, shift_vec);
-}
-
-inline int32x4_t rounding_divide_by_pow2(int32x4_t x, int exponent)
-{
- const int32x4_t shift_vec = vdupq_n_s32(-exponent);
- const int32x4_t fixup = vshrq_n_s32(vandq_s32(x, shift_vec), 31);
- const int32x4_t fixed_up_x = vqaddq_s32(x, fixup);
- return vrshlq_s32(fixed_up_x, shift_vec);
-}
-
-inline int32_t rounding_divide_by_pow2(int32_t x, int exponent)
-{
- const int32_t mask = (1 << exponent) - 1;
- const int32_t threshold = (mask >> 1) + (x < 0 ? 1 : 0);
- return (x >> exponent) + ((x & mask) > threshold ? 1 : 0);
-}
-
-inline float32x4x4_t convert_uint8x16_to_float32x4x4(const uint8x16_t &in)
-{
- float32x4x4_t out;
-
- const auto tmp1 = vmovl_u8(vget_low_u8(in));
- out.val[0] = vcvtq_f32_u32(vmovl_u16(vget_low_u16(tmp1)));
- out.val[1] = vcvtq_f32_u32(vmovl_u16(vget_high_u16(tmp1)));
-
- const auto tmp2 = vmovl_u8(vget_high_u8(in));
- out.val[2] = vcvtq_f32_u32(vmovl_u16(vget_low_u16(tmp2)));
- out.val[3] = vcvtq_f32_u32(vmovl_u16(vget_high_u16(tmp2)));
- return out;
-}
-
-inline float32x4x4_t convert_int8x16_to_float32x4x4(const int8x16_t &in)
-{
- float32x4x4_t out;
-
- const auto tmp1 = vmovl_s8(vget_low_s8(in));
- out.val[0] = vcvtq_f32_s32(vmovl_s16(vget_low_s16(tmp1)));
- out.val[1] = vcvtq_f32_s32(vmovl_s16(vget_high_s16(tmp1)));
-
- const auto tmp2 = vmovl_s8(vget_high_s8(in));
- out.val[2] = vcvtq_f32_s32(vmovl_s16(vget_low_s16(tmp2)));
- out.val[3] = vcvtq_f32_s32(vmovl_s16(vget_high_s16(tmp2)));
- return out;
-}
-
-template <>
-inline float32x4x4_t convert_to_float32x4x4(const uint8x16_t &in)
-{
- return convert_uint8x16_to_float32x4x4(in);
-}
-
-template <>
-inline float32x4x4_t convert_to_float32x4x4(const int8x16_t &in)
-{
- return convert_int8x16_to_float32x4x4(in);
-}
-
-inline void convert_float32x4x3_to_uint8x8x3(const float32x4x3_t &in1, const float32x4x3_t &in2, uint8x8x3_t &out)
-{
- out.val[0] = vqmovn_u16(vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in1.val[0])),
- vqmovn_u32(vcvtq_u32_f32(in2.val[0]))));
- out.val[1] = vqmovn_u16(vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in1.val[1])),
- vqmovn_u32(vcvtq_u32_f32(in2.val[1]))));
- out.val[2] = vqmovn_u16(vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in1.val[2])),
- vqmovn_u32(vcvtq_u32_f32(in2.val[2]))));
-}
-
-inline void convert_float32x4x4_to_uint8x16(const float32x4x4_t &in, uint8x16_t &out)
-{
- const auto low = vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in.val[0])),
- vqmovn_u32(vcvtq_u32_f32(in.val[1])));
- const auto high = vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in.val[2])),
- vqmovn_u32(vcvtq_u32_f32(in.val[3])));
- out = vcombine_u8(vqmovn_u16(low), vqmovn_u16(high));
-}
-
-inline void convert_float32x4x4_to_int8x16(const float32x4x4_t &in, int8x16_t &out)
-{
- const auto low = vcombine_s16(vqmovn_s32(vcvtq_s32_f32(in.val[0])),
- vqmovn_s32(vcvtq_s32_f32(in.val[1])));
- const auto high = vcombine_s16(vqmovn_s32(vcvtq_s32_f32(in.val[2])),
- vqmovn_s32(vcvtq_s32_f32(in.val[3])));
- out = vcombine_s8(vqmovn_s16(low), vqmovn_s16(high));
-}
-
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-/** Exponent polynomial coefficients */
-/** Logarithm polynomial coefficients */
-#ifndef DOXYGEN_SKIP_THIS
-inline float16x8_t vfloorq_f16(float16x8_t val)
-{
- static const float16x8_t CONST_1 = vdupq_n_f16(1.f);
-
- const int16x8_t z = vcvtq_s16_f16(val);
- const float16x8_t r = vcvtq_f16_s16(z);
-
- return vbslq_f16(vcgtq_f16(r, val), vsubq_f16(r, CONST_1), r);
-}
-
-inline float16x8_t vroundq_rte_f16(float16x8_t val)
-{
- return vrndnq_f16(val);
-}
-
-inline float16x4_t vinvsqrt_f16(float16x4_t x)
-{
- float16x4_t sqrt_reciprocal = vrsqrte_f16(x);
- sqrt_reciprocal = vmul_f16(vrsqrts_f16(vmul_f16(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal);
- sqrt_reciprocal = vmul_f16(vrsqrts_f16(vmul_f16(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal);
- return sqrt_reciprocal;
-}
-
-inline float16x8_t vinvsqrtq_f16(float16x8_t x)
-{
- float16x8_t sqrt_reciprocal = vrsqrteq_f16(x);
- sqrt_reciprocal = vmulq_f16(vrsqrtsq_f16(vmulq_f16(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal);
- sqrt_reciprocal = vmulq_f16(vrsqrtsq_f16(vmulq_f16(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal);
- return sqrt_reciprocal;
-}
-
-inline float16x4_t vinv_f16(float16x4_t x)
-{
- float16x4_t recip = vrecpe_f16(x);
- recip = vmul_f16(vrecps_f16(x, recip), recip);
- recip = vmul_f16(vrecps_f16(x, recip), recip);
- return recip;
-}
-
-inline float16x8_t vinvq_f16(float16x8_t x)
-{
- float16x8_t recip = vrecpeq_f16(x);
- recip = vmulq_f16(vrecpsq_f16(x, recip), recip);
- recip = vmulq_f16(vrecpsq_f16(x, recip), recip);
- return recip;
-}
-
-inline float16x8_t vtanhq_f16(float16x8_t val)
-{
- const float16x8_t CONST_1 = vdupq_n_f16(1.f);
- const float16x8_t CONST_2 = vdupq_n_f16(2.f);
- const float16x8_t CONST_MIN_TANH = vdupq_n_f16(-10.f);
- const float16x8_t CONST_MAX_TANH = vdupq_n_f16(10.f);
-
- const float16x8_t x = vminq_f16(vmaxq_f16(val, CONST_MIN_TANH), CONST_MAX_TANH);
- const float16x8_t exp2x = vexpq_f16(vmulq_f16(CONST_2, x));
- const float16x8_t num = vsubq_f16(exp2x, CONST_1);
- const float16x8_t den = vaddq_f16(exp2x, CONST_1);
- const float16x8_t tanh = vmulq_f16(num, vinvq_f16(den));
- return tanh;
-}
-
-inline float16x8_t vtaylor_polyq_f16(float16x8_t x, const std::array<float16x8_t, 8> &coeffs)
-{
- const float16x8_t A = vaddq_f16(coeffs[0], vmulq_f16(coeffs[4], x));
- const float16x8_t B = vaddq_f16(coeffs[2], vmulq_f16(coeffs[6], x));
- const float16x8_t C = vaddq_f16(coeffs[1], vmulq_f16(coeffs[5], x));
- const float16x8_t D = vaddq_f16(coeffs[3], vmulq_f16(coeffs[7], x));
- const float16x8_t x2 = vmulq_f16(x, x);
- const float16x8_t x4 = vmulq_f16(x2, x2);
- const float16x8_t res = vaddq_f16(vaddq_f16(A, vmulq_f16(B, x2)), vmulq_f16(vaddq_f16(C, vmulq_f16(D, x2)), x4));
- return res;
-}
-
-inline float16x8_t vexpq_f16(float16x8_t x)
-{
- // TODO (COMPMID-1535) : Revisit FP16 approximations
- const float32x4_t x_high = vcvt_f32_f16(vget_high_f16(x));
- const float32x4_t x_low = vcvt_f32_f16(vget_low_f16(x));
-
- const float16x8_t res = vcombine_f16(vcvt_f16_f32(vexpq_f32(x_low)), vcvt_f16_f32(vexpq_f32(x_high)));
- return res;
-}
-
-inline float16x8_t vlogq_f16(float16x8_t x)
-{
- // TODO (COMPMID-1535) : Revisit FP16 approximations
- const float32x4_t x_high = vcvt_f32_f16(vget_high_f16(x));
- const float32x4_t x_low = vcvt_f32_f16(vget_low_f16(x));
-
- const float16x8_t res = vcombine_f16(vcvt_f16_f32(vlogq_f32(x_low)), vcvt_f16_f32(vlogq_f32(x_high)));
- return res;
-}
-
-inline float16x8_t vpowq_f16(float16x8_t val, float16x8_t n)
-{
- // TODO (giaiod01) - COMPMID-1535
- float32x4_t n0_f32 = vcvt_f32_f16(vget_low_f16(n));
- float32x4_t n1_f32 = vcvt_f32_f16(vget_high_f16(n));
- float32x4_t val0_f32 = vcvt_f32_f16(vget_low_f16(val));
- float32x4_t val1_f32 = vcvt_f32_f16(vget_high_f16(val));
-
- float32x4_t res0_f32 = vexpq_f32(vmulq_f32(n0_f32, vlogq_f32(val0_f32)));
- float32x4_t res1_f32 = vexpq_f32(vmulq_f32(n1_f32, vlogq_f32(val1_f32)));
-
- return vcombine_f16(vcvt_f16_f32(res0_f32), vcvt_f16_f32(res1_f32));
-}
-
-inline float16x8_t vsinq_f16(float16x8_t val)
-{
- const float32x4_t val_high = vcvt_f32_f16(vget_high_f16(val));
- const float32x4_t val_low = vcvt_f32_f16(vget_low_f16(val));
-
- const float32x4_t res_high = vsinq_f32(val_high);
- const float32x4_t res_low = vsinq_f32(val_low);
-
- return vcombine_f16(vcvt_f16_f32(res_low), vcvt_f16_f32(res_high));
-}
-
-inline float16x4_t vsin_f16(float16x4_t val)
-{
- const float32x4_t val_f32 = vcvt_f32_f16(val);
- const float32x2_t val_high = vget_high_f32(val_f32);
- const float32x2_t val_low = vget_low_f32(val_f32);
-
- const float32x2_t res_high = vsin_f32(val_high);
- const float32x2_t res_low = vsin_f32(val_low);
-
- return vcvt_f16_f32(vcombine_f32(res_low, res_high));
-}
-
-#endif /* DOXYGEN_SKIP_THIS */
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-} // namespace arm_compute
diff --git a/arm_compute/core/NEON/NESymm.h b/arm_compute/core/NEON/NESymm.h
deleted file mode 100644
index d6c5a7073a..0000000000
--- a/arm_compute/core/NEON/NESymm.h
+++ /dev/null
@@ -1,256 +0,0 @@
-/*
- * Copyright (c) 2019-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NESYMM_H
-#define ARM_COMPUTE_NESYMM_H
-
-#include "arm_compute/core/NEON/NEMath.h"
-#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-using qsymm8_t = int8_t; /**< 8 bit quantized symmetric scalar value */
-using qsymm16_t = int16_t; /**< 16 bit quantized symmetric scalar value */
-
-using qsymm16x8_t = int16x8_t; /**< 16 bit quantized symmetric vector with 8 elements */
-using qsymm16x8x2_t = int16x8x2_t; /**< 16 bit quantized symmetric vector with 16 elements */
-
-/** Performs final quantization step on 8 signed 16-bit elements
- *
- * @tparam is_bounded_relu Specified if a fused bounded relu should be applied
- *
- * @param[in] in_s32 Input to be quantized.
- * @param[in] result_fixedpoint_multiplier Result multiplier parameter
- * @param[in] result_shift Result shift parameter
- * @param[in] min_s16 Relu lower bound
- * @param[in] max_s16 Relu upper bound
- *
- * @return Quantized values
- */
-template <bool is_bounded_relu>
-int16x8_t finalize_quantization_int16(int32x4x2_t &in_s32,
- int result_fixedpoint_multiplier,
- int32_t result_shift,
- int16x8_t min_s16,
- int16x8_t max_s16)
-{
- if(result_shift < 0)
- {
- in_s32.val[0] = vmulq_n_s32(in_s32.val[0], (1 << -result_shift));
- in_s32.val[1] = vmulq_n_s32(in_s32.val[1], (1 << -result_shift));
-
- in_s32.val[0] = vqrdmulhq_n_s32(in_s32.val[0], result_fixedpoint_multiplier);
- in_s32.val[1] = vqrdmulhq_n_s32(in_s32.val[1], result_fixedpoint_multiplier);
- }
- else
- {
- // Fixed point multiplication with vector saturating rounding doubling multiply high with scalar
- in_s32.val[0] = vqrdmulhq_n_s32(in_s32.val[0], result_fixedpoint_multiplier);
- in_s32.val[1] = vqrdmulhq_n_s32(in_s32.val[1], result_fixedpoint_multiplier);
- // Round to the nearest division by a power-of-two using result_shift_s32
- in_s32.val[0] = rounding_divide_by_pow2(in_s32.val[0], result_shift);
- in_s32.val[1] = rounding_divide_by_pow2(in_s32.val[1], result_shift);
- }
-
- // Convert S32 to S16
- int16x8_t out_s16 = vcombine_s16(vqmovn_s32(in_s32.val[0]), vqmovn_s32(in_s32.val[1]));
-
- if(is_bounded_relu)
- {
- out_s16 = vmaxq_s16(out_s16, min_s16);
- out_s16 = vminq_s16(out_s16, max_s16);
- }
-
- return out_s16;
-}
-
-/** Performs final quantization step on single signed 16-bit element
- *
- * @tparam is_bounded_relu Specified if a fused bounded relu should be applied
- *
- * @param[in] in_value Input to be quantized.
- * @param[in] result_fixedpoint_multiplier Result multiplier parameter
- * @param[in] result_shift Result shift parameter
- * @param[in] min_s16 Relu lower bound
- * @param[in] max_s16 Relu upper bound
- *
- * @return Quantized values
- */
-template <bool is_bounded_relu>
-inline int16_t finalize_quantization_int16(int32_t in_value, int result_fixedpoint_multiplier,
- int32_t result_shift, int16_t min_s16, int16_t max_s16)
-{
- if(result_shift < 0)
- {
- const int64_t in_64 = static_cast<int64_t>(in_value) * (1 << (-result_shift)) * static_cast<int64_t>(result_fixedpoint_multiplier);
- in_value = static_cast<int32_t>((in_64 + (1 << 30)) >> 31);
- }
- else
- {
- // Fixed point multiplication with vector saturating rounding doubling multiply high with scalar
- const int64_t in_64 = static_cast<int64_t>(in_value) * static_cast<int64_t>(result_fixedpoint_multiplier);
- // Shift value by result_shift_s32
- in_value = rounding_divide_by_pow2(static_cast<int32_t>((in_64 + (1 << 30)) >> 31), result_shift);
- }
-
- // Bound the result
- int16_t out_s16 = static_cast<int16_t>(std::max<int32_t>(-32768, std::min<int32_t>(32767, in_value)));
-
- if(is_bounded_relu)
- {
- out_s16 = static_cast<int16_t>(std::max(min_s16, std::min(max_s16, out_s16)));
- }
-
- return out_s16;
-}
-
-/** Dequantize a neon vector holding 8 16-bit quantized values.
- *
- * @param[in] qv Input values to be dequantized.
- * @param[in] scale Quantization scale
- *
- * @return Dequantized values in a neon vector
- */
-inline float32x4x2_t vdequantize_int16(const int16x8_t &qv, float scale)
-{
- const float32x4_t vscale = vdupq_n_f32(scale);
- const float32x4x2_t vdequantized_input =
- {
- {
- vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(qv))), vscale),
- vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(qv))), vscale)
- }
- };
- return vdequantized_input;
-}
-
-/** Quantize a neon vector holding 8 floating point values.
- *
- * @param[in] qv Input values to be quantized.
- * @param[in] scale Quantization scale
- *
- * @return A neon vector holding the quantized values
- */
-inline int16x8_t vquantize_int16(const float32x4x2_t &qv, float scale)
-{
- const float32x4_t vinvscale = vdupq_n_f32(1.f / scale);
-
- const int32x4x2_t rf =
- {
- {
-#ifdef __aarch64__
- vcvtnq_s32_f32(vmulq_f32(qv.val[0], vinvscale)),
- vcvtnq_s32_f32(vmulq_f32(qv.val[1], vinvscale))
-#else //__aarch64__
- vcvtq_s32_f32(vmulq_f32(qv.val[0], vinvscale)),
- vcvtq_s32_f32(vmulq_f32(qv.val[1], vinvscale))
-#endif //__aarch64__
- }
- };
- return vcombine_s16(vqmovn_s32(rf.val[0]), vqmovn_s32(rf.val[1]));
-}
-
-/** Dequantize a neon vector holding 16 16-bit quantized values.
- *
- * @param[in] qv Input values to be dequantized.
- * @param[in] qi Quantization information to be used in the computation.
- *
- * @return Dequantized values in a neon vector
- */
-inline float32x4x4_t vdequantize(const int16x8x2_t &qv, const UniformQuantizationInfo &qi)
-{
- const float scale = qi.scale;
- const float32x4_t vscale = vdupq_n_f32(scale);
- const float32x4x4_t vdequantized_input =
- {
- {
- vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(qv.val[0]))), vscale),
- vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(qv.val[0]))), vscale),
- vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(qv.val[1]))), vscale),
- vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(qv.val[1]))), vscale),
- }
- };
- return vdequantized_input;
-}
-
-/** Quantize a neon vector holding 16 floating point values.
- *
- * @param[in] qv Input values to be quantized.
- * @param[in] qi Quantization information to be used in the computation.
- *
- * @return A neon vector holding the quantized values
- */
-inline qsymm16x8x2_t vquantize_qsymm16(const float32x4x4_t &qv, const UniformQuantizationInfo &qi)
-{
- const float scale = qi.scale;
- ARM_COMPUTE_ERROR_ON(scale == 0.f);
- const float32x4_t vinvscale = vdupq_n_f32(1.f / scale);
- const int32x4x4_t rf =
- {
- {
-#ifdef __aarch64__
- vcvtnq_s32_f32(vmulq_f32(qv.val[0], vinvscale)),
- vcvtnq_s32_f32(vmulq_f32(qv.val[1], vinvscale)),
- vcvtnq_s32_f32(vmulq_f32(qv.val[2], vinvscale)),
- vcvtnq_s32_f32(vmulq_f32(qv.val[3], vinvscale)),
-#else //__aarch64__
- vcvtq_s32_f32(vmulq_f32(qv.val[0], vinvscale)),
- vcvtq_s32_f32(vmulq_f32(qv.val[1], vinvscale)),
- vcvtq_s32_f32(vmulq_f32(qv.val[2], vinvscale)),
- vcvtq_s32_f32(vmulq_f32(qv.val[3], vinvscale)),
-#endif //__aarch64__
- }
- };
- const qsymm16x8x2_t res =
- {
- vcombine_s16(vqmovn_s32(rf.val[0]), vqmovn_s32(rf.val[1])),
- vcombine_s16(vqmovn_s32(rf.val[2]), vqmovn_s32(rf.val[3])),
- };
-
- return res;
-}
-
-/** Multiply a neon vector using quantized multiplier and shift
- *
- * @param[in] input Input vector to mutiply values to be quantized.
- * @param[in] qmul Quantized multipler
- * @param[in] shift Left bit shift
- *
- * @return A neon vector holding the multiplied value
- */
-inline int32x4x2_t multiply_by_quantized_multiplier_2row(int32x4x2_t input, int32_t qmul, int32_t shift)
-{
- const auto left_shift = shift > 0 ? shift : 0;
- const auto right_shift = shift > 0 ? 0 : -shift;
- const auto one_shifted = 1 << left_shift;
-
- int32x4x2_t result;
- result.val[0] = rounding_divide_by_pow2(vqrdmulhq_n_s32(vmulq_n_s32(input.val[0], one_shifted), qmul), right_shift);
- result.val[1] = rounding_divide_by_pow2(vqrdmulhq_n_s32(vmulq_n_s32(input.val[1], one_shifted), qmul), right_shift);
-
- return result;
-}
-
-} // namespace arm_compute
-#endif // ARM_COMPUTE_NESYMM_H
diff --git a/arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h b/arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h
deleted file mode 100644
index 7d35e40284..0000000000
--- a/arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEABSOLUTEDIFFERENCEKERNEL_H
-#define ARM_COMPUTE_NEABSOLUTEDIFFERENCEKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the absolute difference kernel
- *
- * Absolute difference is computed by:
- * @f[ output(x,y) = | input1(x,y) - input2(x,y) | @f]
- */
-class NEAbsoluteDifferenceKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEAbsoluteDifferenceKernel";
- }
- /** Default constructor */
- NEAbsoluteDifferenceKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEAbsoluteDifferenceKernel(const NEAbsoluteDifferenceKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEAbsoluteDifferenceKernel &operator=(const NEAbsoluteDifferenceKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEAbsoluteDifferenceKernel(NEAbsoluteDifferenceKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEAbsoluteDifferenceKernel &operator=(NEAbsoluteDifferenceKernel &&) = default;
- /** Default destructor */
- ~NEAbsoluteDifferenceKernel() = default;
-
- /** Set the inputs and output tensors
- *
- * @param[in] input1 Source tensor. Data types supported: U8/S16
- * @param[in] input2 Source tensor. Data types supported: U8/S16
- * @param[out] output Destination tensor, Data types supported: U8/S16
- */
- void configure(const ITensor *input1, const ITensor *input2, ITensor *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Common signature for all the specialised absolute difference functions
- *
- * @param[in] input1 An input tensor. Data types supported: U8/S16.
- * @param[in] input2 An input tensor. Data types supported: U8/S16.
- * @param[out] output The output tensor, Data types supported: U8 (Only if both inputs are U8), S16.
- * @param[in] window Region on which to execute the kernel.
- */
- using AbsDiffFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window);
-
- /** Absolute difference function to use for the particular tensor formats passed to configure() */
- AbsDiffFunction *_func;
- const ITensor *_input1;
- const ITensor *_input2;
- ITensor *_output;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEABSOLUTEDIFFERENCEKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEAccumulateKernel.h b/arm_compute/core/NEON/kernels/NEAccumulateKernel.h
deleted file mode 100644
index 367385dd7a..0000000000
--- a/arm_compute/core/NEON/kernels/NEAccumulateKernel.h
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEACCUMULATEKERNEL_H
-#define ARM_COMPUTE_NEACCUMULATEKERNEL_H
-
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the accumulate kernel
- *
- * Accumulation is computed by:
- * @f[ accum(x,y) = accum(x,y) + input(x,y) @f]
- */
-class NEAccumulateKernel : public INESimpleKernel
-{
-public:
- const char *name() const override
- {
- return "NEAccumulateKernel";
- }
- /** Set the input and accumulation tensors
- *
- * @param[in] input Source tensor. Data type supported: U8.
- * @param[out] accum Destination tensor. Data type supported: S16.
- */
- void configure(const ITensor *input, ITensor *accum);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-};
-
-/** Interface for the accumulate weighted kernel
- *
- * Weighted accumulation is computed:
- * @f[ accum(x,y) = (1 - \alpha)*accum(x,y) + \alpha*input(x,y) @f]
- *
- * Where @f$ 0 \le \alpha \le 1 @f$
- * Conceptually, the rounding for this is defined as:
- * @f[ output(x,y)= uint8( (1 - \alpha) * float32( int32( output(x,y) ) ) + \alpha * float32( int32( input(x,y) ) ) ) @f]
-*/
-class NEAccumulateWeightedKernel : public INESimpleKernel
-{
-public:
- const char *name() const override
- {
- return "NEAccumulateWeightedKernel";
- }
- /** Default constructor */
- NEAccumulateWeightedKernel();
- /** Set the input and accumulation tensors, and the scale value
- *
- * @param[in] input Source tensor. Data type supported: U8.
- * @param[in] alpha Scalar value in the range [0.0f, 1.0f]
- * @param[in,out] accum Accumulated tensor. Data type supported: U8.
- */
- void configure(const ITensor *input, float alpha, ITensor *accum);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-protected:
- float _alpha;
-};
-
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-/** Interface for the accumulate weighted kernel using F16 */
-class NEAccumulateWeightedFP16Kernel : public NEAccumulateWeightedKernel
-{
-public:
- const char *name() const override
- {
- return "NEAccumulateWeightedFP16Kernel";
- }
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-};
-#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-/** Interface for the accumulate weighted kernel using F16 */
-using NEAccumulateWeightedFP16Kernel = NEAccumulateWeightedKernel;
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-
-/** Interface for the accumulate squared kernel
- *
- * The accumulation of squares is computed:
- * @f[ accum(x,y) = saturate_{int16} ( (uint16) accum(x,y) + (((uint16)(input(x,y)^2)) >> (shift)) ) @f]
- *
- * Where @f$ 0 \le shift \le 15 @f$
-*/
-class NEAccumulateSquaredKernel : public INESimpleKernel
-{
-public:
- const char *name() const override
- {
- return "NEAccumulateSquaredKernel";
- }
- /** Default constructor */
- NEAccumulateSquaredKernel();
- /** Set the input and accumulation tensors and the shift value.
- *
- * @param[in] input Source tensor. Data type supported: U8.
- * @param[in] shift Shift value in the range of [0, 15]
- * @param[in,out] accum Accumulated tensor. Data type supported: S16.
- */
- void configure(const ITensor *input, uint32_t shift, ITensor *accum);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- uint32_t _shift;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEACCUMULATEKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h
deleted file mode 100644
index 82103b988b..0000000000
--- a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEACTIVATIONLAYERKERNEL_H
-#define ARM_COMPUTE_NEACTIVATIONLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/utils/misc/Traits.h"
-
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-#include <arm_fp16.h>
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the activation layer kernel. */
-class NEActivationLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEActivationLayerKernel";
- }
- /** Constructor */
- NEActivationLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEActivationLayerKernel(const NEActivationLayerKernel &) = delete;
- /** Default move constructor */
- NEActivationLayerKernel(NEActivationLayerKernel &&) = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEActivationLayerKernel &operator=(const NEActivationLayerKernel &) = delete;
- /** Default move assignment operator */
- NEActivationLayerKernel &operator=(NEActivationLayerKernel &&) = default;
- /** Set the input and output tensor.
- *
- * @note If the output tensor is a nullptr, the activation function will be performed in-place
- *
- * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
- * of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32.
- * @param[out] output Destination tensor. Data type supported: same as @p input
- * @param[in] activation_info Activation layer information.
- */
- void configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info);
- /** Static function to check if given info will lead to a valid configuration of @ref NEActivationLayerKernel
- *
- * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result
- * of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32.
- * @param[in] output Destination tensor info. Data type supported: same as @p input
- * @param[in] act_info Activation layer information.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- using ActivationFunction = ActivationLayerInfo::ActivationFunction;
- /** Common signature for all the specialised @ref NEActivationLayerKernel functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using ActivationFunctionExecutorPtr = void (NEActivationLayerKernel::*)(const Window &window);
- /** Function to apply an activation function on a tensor.
- *
- * @param[in] window Region on which to execute the kernel
- */
- template <ActivationLayerInfo::ActivationFunction F, typename T>
- typename std::enable_if<arm_compute::utils::traits::is_floating_point<T>::value, void>::type
- activation(const Window &window);
- /** Function to apply an activation function on a tensor.
- *
- * @param[in] window Region on which to execute the kernel
- */
- template <ActivationLayerInfo::ActivationFunction F, typename T>
- typename std::enable_if<std::is_same<T, qasymm8_t>::value, void>::type activation(const Window &window);
- /** Function to apply an activation function on a tensor.
- *
- * @param[in] window Region on which to execute the kernel
- */
- template <ActivationLayerInfo::ActivationFunction F, typename T>
- typename std::enable_if<std::is_same<T, qasymm8_signed_t>::value, void>::type activation(const Window &window);
- /** Function to apply an activation function on a tensor.
- *
- * @param[in] window Region on which to execute the kernel
- */
- template <ActivationLayerInfo::ActivationFunction F, typename T>
- typename std::enable_if<std::is_same<T, qsymm16_t>::value, void>::type activation(const Window &window);
-
-private:
- ITensor *_input;
- ITensor *_output;
- ActivationFunctionExecutorPtr _func;
- ActivationLayerInfo _act_info;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEACTIVATIONLAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h b/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h
deleted file mode 100644
index 36d257b886..0000000000
--- a/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEARITHMETICADDITIONKERNEL_H
-#define ARM_COMPUTE_NEARITHMETICADDITIONKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to perform addition between two tensors */
-class NEArithmeticAdditionKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEArithmeticAdditionKernel";
- }
- /** Default constructor */
- NEArithmeticAdditionKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEArithmeticAdditionKernel(const NEArithmeticAdditionKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEArithmeticAdditionKernel &operator=(const NEArithmeticAdditionKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEArithmeticAdditionKernel(NEArithmeticAdditionKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEArithmeticAdditionKernel &operator=(NEArithmeticAdditionKernel &&) = default;
- /** Default destructor */
- ~NEArithmeticAdditionKernel() = default;
-
- /** Initialise the kernel's input, output and border mode.
- *
- * Valid configurations (Input1,Input2) -> Output :
- *
- * - (U8,U8) -> U8
- * - (U8,U8) -> S16
- * - (S16,U8) -> S16
- * - (U8,S16) -> S16
- * - (S16,S16) -> S16
- * - (F16,F16) -> F16
- * - (F32,F32) -> F32
- * - (QASYMM8,QASYMM8) -> QASYMM8
- * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
- * - (QSYMM16,QSYMM16) -> QSYMM16
- *
- * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32
- * @param[in] input2 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32
- * @param[out] output The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
- * @param[in] policy Overflow policy.
- */
- void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy);
- /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticAdditionKernel
- *
- * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32
- * @param[in] input2 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32
- * @param[in] output The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
- * @param[in] policy Overflow policy.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Common signature for all the specialised add functions
- *
- * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/F32
- * @param[in] input2 An input tensor. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/F32
- * @param[out] output The output tensor. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/F32.
- * @param[in] policy Overflow policy.
- * @param[in] window Region on which to execute the kernel.
- */
- using AddFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy, const Window &window);
- /** Add function to use for the particular tensor types passed to configure() */
- AddFunction *_func;
- const ITensor *_input1;
- const ITensor *_input2;
- ITensor *_output;
- ConvertPolicy _policy;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEARITHMETICADDITIONKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h b/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h
deleted file mode 100644
index f75c6bfb98..0000000000
--- a/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEARITHMETICSUBTRACTIONKERNEL_H
-#define ARM_COMPUTE_NEARITHMETICSUBTRACTIONKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to perform subtraction between two tensors */
-class NEArithmeticSubtractionKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEArithmeticSubtractionKernel";
- }
- /** Default constructor */
- NEArithmeticSubtractionKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEArithmeticSubtractionKernel(const NEArithmeticSubtractionKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEArithmeticSubtractionKernel &operator=(const NEArithmeticSubtractionKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEArithmeticSubtractionKernel(NEArithmeticSubtractionKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEArithmeticSubtractionKernel &operator=(NEArithmeticSubtractionKernel &&) = default;
- /** Default destructor */
- ~NEArithmeticSubtractionKernel() = default;
-
- /** Initialise the kernel's input and output.
- *
- * Valid configurations (Input1,Input2) -> Output :
- *
- * - (U8,U8) -> U8
- * - (U8,U8) -> S16
- * - (QASYMM8, QASYMM8) -> QASYMM8
- * - (QASYMM8_SIGNED, QASYMM8_SIGNED) -> QASYMM8_SIGNED
- * - (S16,U8) -> S16
- * - (U8,S16) -> S16
- * - (S16,S16) -> S16
- * - (F16,F16) -> F16
- * - (F32,F32) -> F32
- *
- * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32
- * @param[in] input2 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32
- * @param[out] output The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32.
- * @param[in] policy Overflow policy. Convert policy cannot be WRAP if datatype is quantized.
- */
- void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy);
- /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticSubtractionKernel
- *
- * @note Convert policy cannot be WRAP if datatype is QASYMM8
- *
- * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32
- * @param[in] input2 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32
- * @param[in] output The output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32.
- * @param[in] policy Policy to use to handle overflow. Convert policy cannot be WRAP if datatype is quantized.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Common signature for all the specialised sub functions
- *
- * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32
- * @param[in] input2 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32
- * @param[out] output The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32.
- * @param[in] window Region on which to execute the kernel.
- * @param[in] is_sat Flag to indicate if the policy is SATURATE.
- */
- using SubFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window, bool is_sat);
- /** Sub function to use for the particular tensor types passed to configure() */
- SubFunction *_func;
- const ITensor *_input1;
- const ITensor *_input2;
- ITensor *_output;
- ConvertPolicy _policy;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEARITHMETICSUBTRACTIONKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h b/arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h
deleted file mode 100644
index f943744ba0..0000000000
--- a/arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright (c) 2019-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_NEBATCHCONCATENATEKERNEL_H
-#define ARM_COMPUTE_NEBATCHCONCATENATEKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the batch concatenate kernel.
- * The input tensor will be concatenated into the output tensor.
- */
-class NEBatchConcatenateLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEBatchConcatenateLayerKernel";
- }
- /** Default constructor */
- NEBatchConcatenateLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEBatchConcatenateLayerKernel(const NEBatchConcatenateLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEBatchConcatenateLayerKernel &operator=(const NEBatchConcatenateLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEBatchConcatenateLayerKernel(NEBatchConcatenateLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEBatchConcatenateLayerKernel &operator=(NEBatchConcatenateLayerKernel &&) = default;
- /** Default destructor */
- ~NEBatchConcatenateLayerKernel() = default;
- /** Initialise the kernel's inputs and output
- *
- * @param[in] input Input tensor. Data types supported: All.
- * @param[in] batch_offset The offset on axis # 3.
- * @param[in,out] output Output tensor. Data types supported: Same as @p input.
- *
- * @note: The output tensor's low two dimensions can't be smaller than the input one's.
- * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2.
- *
- */
- void configure(const ITensor *input, unsigned int batch_offset, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEBatchConcatenateLayerKernel
- *
- * @param[in] input Input tensor info. Data types supported: All.
- * @param[in] batch_offset The offset on axis # 3.
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, unsigned int batch_offset, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- using BatchConcatFunction = void(const ITensor *in, ITensor *out, unsigned int batch_offset, const Window &window);
-
-private:
- BatchConcatFunction *_func;
- const ITensor *_input;
- ITensor *_output;
- unsigned int _batch_offset;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEBATCHCONCATENATEKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h
deleted file mode 100644
index d59ed7baf0..0000000000
--- a/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEBATCHNORMALIZATIONLAYERKERNEL_H
-#define ARM_COMPUTE_NEBATCHNORMALIZATIONLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the batch normalization layer kernel.
- */
-class NEBatchNormalizationLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEBatchNormalizationLayerKernel";
- }
- /** Default constructor */
- NEBatchNormalizationLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEBatchNormalizationLayerKernel(const NEBatchNormalizationLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEBatchNormalizationLayerKernel &operator=(const NEBatchNormalizationLayerKernel &) = delete;
- /** Default Move Constructor. */
- NEBatchNormalizationLayerKernel(NEBatchNormalizationLayerKernel &&) = default;
- /** Default move assignment operator */
- NEBatchNormalizationLayerKernel &operator=(NEBatchNormalizationLayerKernel &&) = default;
- /** Default destructor */
- ~NEBatchNormalizationLayerKernel() = default;
- /** Set the input and output tensors.
- *
- * @note If the output tensor is a nullptr, the batch normalization function will be performed in-place
- *
- * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result.
- * 3 lower dimensions represent a single input with dimensions [width, height, FM].
- * The rest are optional and used for representing batches. Data types supported: F16/F32.
- * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input
- * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
- * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
- * @param[in] beta (Optional) Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for beta is 0. Data types supported: Same as @p input
- * @param[in] gamma (Optional) Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for gamma is 1. Data types supported: Same as @p input
- * @param[in] epsilon (Optional) Small value to avoid division with zero. Default value is 0.001f.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
- */
- void configure(ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta = nullptr, const ITensor *gamma = nullptr, float epsilon = 0.001f,
- ActivationLayerInfo act_info = ActivationLayerInfo());
- /** Static function to check if given info will lead to a valid configuration of @ref NEBatchNormalizationLayerKernel
- *
- * @param[in] input Source tensor info. In case of @p output tensor = nullptr, this tensor will store the result.
- * 3 lower dimensions represent a single input with dimensions [width, height, FM].
- * The rest are optional and used for representing batches. Data types supported: F16/F32.
- * @param[in] output Destination tensor info. Output will have the same number of dimensions as input. Data type supported: same as @p input
- * @param[in] mean Mean values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
- * @param[in] var Variance values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
- * @param[in] beta (Optional) Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for beta is 0. Data types supported: Same as @p input
- * @param[in] gamma (Optional) Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for gamma is 1. Data types supported: Same as @p input
- * @param[in] epsilon (Optional) Small value to avoid division with zero. Default value is 0.001f.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output,
- const ITensorInfo *mean, const ITensorInfo *var,
- const ITensorInfo *beta = nullptr, const ITensorInfo *gamma = nullptr,
- float epsilon = 0.001f, ActivationLayerInfo act_info = ActivationLayerInfo());
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Configure execution function in case of non-fused activation **/
- void configure_non_fused();
- /** Configure execution function in case of fused activation **/
- void configure_fused();
-
- /** Template function to run batch normalization on fp16
- *
- * @tparam fused_activation Boolean that flags if its a fused activation or not
- *
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
- */
- template <bool fused_activation, typename F>
- void batch_normalization_fp16_nchw(const Window &window);
- /** Template function to run batch normalization on fp16 on tensors with NHWC format
- *
- * @tparam fused_activation Boolean that flags if its a fused activation or not
- *
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
- */
- template <bool fused_activation, typename F>
- void batch_normalization_fp16_nhwc(const Window &window);
- /** Template function to run batch normalization on fp32
- *
- * @tparam fused_activation Boolean that flags if its a fused activation or not
- * @tparam F Activation function functor to run
- *
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
- */
- template <bool fused_activation, typename F>
- void batch_normalization_fp32_nchw(const Window &window);
- /** Template function to run batch normalization on fp32 on tensors with NHWC format
- *
- * @tparam fused_activation Boolean that flags if its a fused activation or not
- * @tparam F Activation function functor to run
- *
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
- */
- template <bool fused_activation, typename F>
- void batch_normalization_fp32_nhwc(const Window &window);
- /** Common signature for all the batch normalization functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using BatchNormFunctionPtr = void (NEBatchNormalizationLayerKernel::*)(const Window &window);
-
-private:
- BatchNormFunctionPtr _func;
- ITensor *_input;
- ITensor *_output;
- const ITensor *_mean;
- const ITensor *_var;
- const ITensor *_gamma;
- const ITensor *_beta;
- float _epsilon;
- ActivationLayerInfo _act_info;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEBATCHNORMALIZATIONLAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEBatchToSpaceLayerKernel.h b/arm_compute/core/NEON/kernels/NEBatchToSpaceLayerKernel.h
deleted file mode 100644
index 61e47b0ea4..0000000000
--- a/arm_compute/core/NEON/kernels/NEBatchToSpaceLayerKernel.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright (c) 2019-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEBATCHTOSPACELAYERKERNEL_H
-#define ARM_COMPUTE_NEBATCHTOSPACELAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the batch to space kernel */
-class NEBatchToSpaceLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEBatchToSpaceLayerKernel";
- }
- /** Default constructor */
- NEBatchToSpaceLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEBatchToSpaceLayerKernel(const NEBatchToSpaceLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEBatchToSpaceLayerKernel &operator=(const NEBatchToSpaceLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEBatchToSpaceLayerKernel(NEBatchToSpaceLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEBatchToSpaceLayerKernel &operator=(NEBatchToSpaceLayerKernel &&) = default;
- /** Default destructor */
- ~NEBatchToSpaceLayerKernel() = default;
- /** Initialise the kernel's inputs and output.
- *
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
- * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32
- * @param[out] output Tensor output. Data types supported: same as @p input
- */
- void configure(const ITensor *input, const ITensor *block_shape, ITensor *output);
- /** Initialise the kernel's inputs and output (Static block shape).
- *
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
- * @param[in] block_shape_x Block shape x value.
- * @param[in] block_shape_y Block shape y value.
- * @param[out] output Tensor output. Data types supported: same as @p input
- */
- void configure(const ITensor *input, const int32_t block_shape_x, const int32_t block_shape_y, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEBatchToSpaceLayerKernel
- *
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
- * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32
- * @param[in] output Tensor output. Data types supported: same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEBatchToSpaceLayerKernel (Static block shape).
- *
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
- * @param[in] block_shape_x Block shape x value.
- * @param[in] block_shape_y Block shape y value.
- * @param[in] output Tensor output. Data types supported: same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const int32_t block_shape_x, const int32_t block_shape_y, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input; /**< Source tensor */
- const ITensor *_block_shape; /**< Block shape tensor */
- ITensor *_output; /**< Destination tensor */
- DataLayout _data_layout; /**< Data layout to be used at run-time */
-
- int32_t _block_shape_x;
- int32_t _block_shape_y;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEBATCHTOSPACELAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h b/arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h
deleted file mode 100644
index 7a777678dc..0000000000
--- a/arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEBITWISEANDKERNEL_H
-#define ARM_COMPUTE_NEBITWISEANDKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to perform bitwise AND between XY-planes of two tensors
- *
- * Result is computed by:
- * @f[ output(x,y) = input1(x,y) \land input2(x,y) @f]
- */
-class NEBitwiseAndKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEBitwiseAndKernel";
- }
- /** Default constructor */
- NEBitwiseAndKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEBitwiseAndKernel(const NEBitwiseAndKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEBitwiseAndKernel &operator=(const NEBitwiseAndKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEBitwiseAndKernel(NEBitwiseAndKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEBitwiseAndKernel &operator=(NEBitwiseAndKernel &&) = default;
- /** Initialise the kernel's inputs and output
- *
- * @param[in] input1 An input tensor. Data type supported: U8.
- * @param[in] input2 An input tensor. Data type supported: U8
- * @param[out] output Output tensor. Data type supported: U8.
- */
- void configure(const ITensor *input1, const ITensor *input2, ITensor *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input1; /**< Source tensor 1 */
- const ITensor *_input2; /**< Source tensor 2 */
- ITensor *_output; /**< Destination tensor */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEBITWISEANDKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h b/arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h
deleted file mode 100644
index 3fb8c083a8..0000000000
--- a/arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEBITWISENOTKERNEL_H
-#define ARM_COMPUTE_NEBITWISENOTKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to perform bitwise NOT operation
- *
- * Result is computed by:
- * @f[ output(x,y) = \lnot input(x,y) @f]
- */
-class NEBitwiseNotKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEBitwiseNotKernel";
- }
- /** Default constructor */
- NEBitwiseNotKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEBitwiseNotKernel(const NEBitwiseNotKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEBitwiseNotKernel &operator=(const NEBitwiseNotKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEBitwiseNotKernel(NEBitwiseNotKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEBitwiseNotKernel &operator=(NEBitwiseNotKernel &&) = default;
- /** Initialise the kernel's input and output
- *
- * @param[in] input An input tensor. Data type supported: U8.
- * @param[out] output The output tensor. Data type supported: U8.
- */
- void configure(const ITensor *input, ITensor *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input; /**< Source tensor */
- ITensor *_output; /**< Destination tensor */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEBITWISENOTKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h b/arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h
deleted file mode 100644
index 5b532510ad..0000000000
--- a/arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEBITWISEORKERNEL_H
-#define ARM_COMPUTE_NEBITWISEORKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to perform bitwise inclusive OR between two tensors
- *
- * Result is computed by:
- * @f[ output(x,y) = input1(x,y) \lor input2(x,y) @f]
- */
-class NEBitwiseOrKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEBitwiseOrKernel";
- }
- /** Default constructor */
- NEBitwiseOrKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEBitwiseOrKernel(const NEBitwiseOrKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEBitwiseOrKernel &operator=(const NEBitwiseOrKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEBitwiseOrKernel(NEBitwiseOrKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEBitwiseOrKernel &operator=(NEBitwiseOrKernel &&) = default;
- /** Initialise the kernel's inputs and output.
- *
- * @param[in] input1 An input tensor. Data type supported: U8.
- * @param[in] input2 An input tensor. Data type supported: U8
- * @param[out] output Output tensor. Data type supported: U8.
- */
- void configure(const ITensor *input1, const ITensor *input2, ITensor *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input1; /**< Source tensor 1 */
- const ITensor *_input2; /**< Source tensor 2 */
- ITensor *_output; /**< Destination tensor */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEBITWISEORKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEBitwiseXorKernel.h b/arm_compute/core/NEON/kernels/NEBitwiseXorKernel.h
deleted file mode 100644
index 0d9120501b..0000000000
--- a/arm_compute/core/NEON/kernels/NEBitwiseXorKernel.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEBITWISEXORKERNEL_H
-#define ARM_COMPUTE_NEBITWISEXORKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to perform bitwise exclusive OR (XOR) between two tensors
- *
- * Result is computed by:
- * @f[ output(x,y) = input1(x,y) \oplus input2(x,y) @f]
- */
-class NEBitwiseXorKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEBitwiseXorKernel";
- }
- /** Default constructor */
- NEBitwiseXorKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEBitwiseXorKernel(const NEBitwiseXorKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEBitwiseXorKernel &operator=(const NEBitwiseXorKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEBitwiseXorKernel(NEBitwiseXorKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEBitwiseXorKernel &operator=(NEBitwiseXorKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in] input1 An input tensor. Data type supported: U8.
- * @param[in] input2 An input tensor. Data type supported: U8
- * @param[out] output The output tensor. Data type supported: U8.
- */
- void configure(const ITensor *input1, const ITensor *input2, ITensor *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input1; /**< Source tensor 1 */
- const ITensor *_input2; /**< Source tensor 2 */
- ITensor *_output; /**< Destination tensor */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEBITWISEXORKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEBoundingBoxTransformKernel.h b/arm_compute/core/NEON/kernels/NEBoundingBoxTransformKernel.h
deleted file mode 100644
index e94f228f2a..0000000000
--- a/arm_compute/core/NEON/kernels/NEBoundingBoxTransformKernel.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEBOUNDINGBOXTRANSFORMKERNEL_H
-#define ARM_COMPUTE_NEBOUNDINGBOXTRANSFORMKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the bounding box kernel */
-class NEBoundingBoxTransformKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEBoundingBoxTransformKernel";
- }
-
- /** Default constructor */
- NEBoundingBoxTransformKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEBoundingBoxTransformKernel(const NEBoundingBoxTransformKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEBoundingBoxTransformKernel &operator=(const NEBoundingBoxTransformKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEBoundingBoxTransformKernel(NEBoundingBoxTransformKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEBoundingBoxTransformKernel &operator=(NEBoundingBoxTransformKernel &&) = default;
- /** Default destructor */
- ~NEBoundingBoxTransformKernel() = default;
-
- /** Set the input and output tensors.
- *
- * @param[in] boxes Source tensor. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32.
- * @param[out] pred_boxes Destination tensor. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input
- * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes.
- * Data types supported: QASYMM8 if @p input is QASYMM16, otherwise same as @p input.
- * @param[in] info Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo.
- *
- * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct.
- *
- */
- void configure(const ITensor *boxes, ITensor *pred_boxes, const ITensor *deltas, const BoundingBoxTransformInfo &info);
-
- /** Static function to check if given info will lead to a valid configuration of @ref CLBoundingBoxTransform
- *
- * @param[in] boxes Source tensor info. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32.
- * @param[in] pred_boxes Destination tensor info. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input
- * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes.
- * Data types supported: QASYMM8 if @p input is QASYMM16, otherwise same as @p input.
- * @param[in] info Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo.
- *
- * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct.
- *
- * @return a Status
- */
- static Status validate(const ITensorInfo *boxes, const ITensorInfo *pred_boxes, const ITensorInfo *deltas, const BoundingBoxTransformInfo &info);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- template <typename T>
- void internal_run(const Window &window);
-
- const ITensor *_boxes;
- ITensor *_pred_boxes;
- const ITensor *_deltas;
- BoundingBoxTransformInfo _bbinfo;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEBOUNDINGBOXTRANSFORMKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEBox3x3Kernel.h b/arm_compute/core/NEON/kernels/NEBox3x3Kernel.h
deleted file mode 100644
index 448e33be3c..0000000000
--- a/arm_compute/core/NEON/kernels/NEBox3x3Kernel.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEBOX3x3KERNEL_H
-#define ARM_COMPUTE_NEBOX3x3KERNEL_H
-
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to perform a Box 3x3 filter */
-class NEBox3x3Kernel : public INESimpleKernel
-{
-public:
- const char *name() const override
- {
- return "NEBox3x3Kernel";
- }
- /** Set the source, destination and border mode of the kernel
- *
- * @param[in] input Source tensor. Data type supported: U8.
- * @param[out] output Destination tensor. Data type supported: U8.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ITensor *input, ITensor *output, bool border_undefined);
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-};
-
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-/** NEON kernel to perform a Box 3x3 filter for FP16 datatype
- */
-class NEBox3x3FP16Kernel : public NEBox3x3Kernel
-{
-public:
- const char *name() const override
- {
- return "NEBox3x3FP16Kernel";
- }
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-};
-#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-/** NEON kernel to perform a Box 3x3 filter for FP16 datatype */
-using NEBox3x3FP16Kernel = NEBox3x3Kernel;
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEBOX3x3KERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h b/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h
deleted file mode 100644
index 1979c5bd2b..0000000000
--- a/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h
+++ /dev/null
@@ -1,189 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NECANNYEDGEKERNEL_H
-#define ARM_COMPUTE_NECANNYEDGEKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Computes magnitude and quantised phase from inputs gradients. */
-class NEGradientKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEGradientKernel";
- }
- /** Default constructor */
- NEGradientKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEGradientKernel(const NEGradientKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEGradientKernel &operator=(const NEGradientKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEGradientKernel(NEGradientKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEGradientKernel &operator=(NEGradientKernel &&) = default;
- /** Default destructor */
- virtual ~NEGradientKernel() = default;
-
- /** Initialise the kernel's sources, destinations and border mode.
- *
- * @note gx, gy and magnitude must all be the same size (either 16 or 32)
- *
- * @param[in] gx Source tensor - Gx component. Data type supported: S16/S32.
- * @param[in] gy Source tensor - Gy component. Data type supported: same as @p gx.
- * @param[out] magnitude Destination tensor - Magnitude. Data type supported: U16 (if the data type of @p gx is S16) / U32 (if the data type of @p gx is S32).
- * @param[out] phase Destination tensor - Quantized phase. Data type supported: U8.
- * @param[in] norm_type Normalization type. If 1, L1-Norm otherwise L2-Norm
- */
- virtual void configure(const ITensor *gx, const ITensor *gy, ITensor *magnitude, ITensor *phase, int32_t norm_type);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-protected:
- /** Common signature for all the specialised gradient functions
- *
- * @param[in] gx_ptr Pointer to the first input tensor.
- * @param[in] gy_ptr Pointer to the second input tensor.
- * @param[out] magnitude_ptr Pointer to the first output tensor
- * @param[out] phase_ptr Pointer to the second output tensor
- */
- using GradientFunction = void(const void *__restrict gx_ptr, const void *__restrict gy_ptr, void *__restrict magnitude_ptr, void *__restrict phase_ptr);
-
- GradientFunction *_func; /**< Gradient function to use for the particular tensor types passed to configure() */
- const ITensor *_gx; /**< Source tensor - Gx component */
- const ITensor *_gy; /**< Source tensor - Gy component */
- ITensor *_magnitude; /**< Destination tensor - Magnitude */
- ITensor *_phase; /**< Destination tensor - Quantized phase */
-};
-
-/** NEON kernel to perform Non-Maxima suppression for Canny Edge.
- *
- * @note This kernel is meant to be used alongside CannyEdge and performs a non-maxima suppression using magnitude and phase of input
- * to characterize points as possible edges. Thus, at the end, each point will be set to EDGE, NO_EDGE or MAYBE.
- *
- * @note Hysteresis is computed in @ref NEEdgeTraceKernel
- */
-class NEEdgeNonMaxSuppressionKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEEdgeNonMaxSuppressionKernel";
- }
- /** Default constructor */
- NEEdgeNonMaxSuppressionKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEEdgeNonMaxSuppressionKernel(const NEEdgeNonMaxSuppressionKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEEdgeNonMaxSuppressionKernel &operator=(const NEEdgeNonMaxSuppressionKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEEdgeNonMaxSuppressionKernel(NEEdgeNonMaxSuppressionKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEEdgeNonMaxSuppressionKernel &operator=(NEEdgeNonMaxSuppressionKernel &&) = default;
- /** Default destructor */
- ~NEEdgeNonMaxSuppressionKernel() = default;
-
- /** Initialise the kernel's sources, destination and border mode.
- *
- * @param[in] magnitude Source tensor - Magnitude. Data type supported: U16/U32.
- * @param[in] phase Source tensor - Quantized phase. Data type supported: U8.
- * @param[out] output Output tensor. Data type supported: U8. It will be filled with 0 for "no edge", 127 for "maybe", 255 for "edge"
- * @param[in] upper_thr Upper threshold used for the hysteresis
- * @param[in] lower_thr Lower threshold used for the hysteresis
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ITensor *magnitude, const ITensor *phase, ITensor *output, int32_t upper_thr, int32_t lower_thr, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- /** Common signature for all the specialised non-maxima suppression functions
- *
- * @param[in] magnitude_ptr Pointer to the first input tensor.
- * @param[in] phase_ptr Pointer to the second input tensor.
- * @param[out] output_ptr Pointer to the output tensor
- * @param[in] stride_mag Stride of the magnitude tensor
- * @param[in] upper_thr Upper threshold used for the hysteresis
- * @param[in] lower_thr Lower threshold used for the hysteresis
- */
- using EdgeNonMaxSupprFunction = void(const void *__restrict magnitude_ptr, const void *__restrict phase_ptr, void *__restrict output_ptr, const uint32_t stride_mag, const int32_t upper_thr,
- const int32_t lower_thr);
-
- EdgeNonMaxSupprFunction *_func; /**< Non-Maxima suppression function to use for the particular tensor types passed to configure() */
- const ITensor *_magnitude; /**< Source tensor - Magnitude */
- const ITensor *_phase; /**< Source tensor - Quantized phase */
- ITensor *_output; /**< Destination tensor */
- int32_t _lower_thr; /**< Lower threshold used for the hysteresis */
- int32_t _upper_thr; /**< Upper threshold used for the hysteresis */
-};
-
-/** NEON kernel to perform Edge tracing */
-class NEEdgeTraceKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEEdgeTraceKernel";
- }
- /** Default constructor */
- NEEdgeTraceKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEEdgeTraceKernel(const NEEdgeTraceKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEEdgeTraceKernel &operator=(const NEEdgeTraceKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEEdgeTraceKernel(NEEdgeTraceKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEEdgeTraceKernel &operator=(NEEdgeTraceKernel &&) = default;
- /** Default constructor */
- ~NEEdgeTraceKernel() = default;
-
- /** Initialise the kernel's source, destination and border mode.
- *
- * @param[in,out] input Source tensor. Data type supported: U8. Must contain 0 for "no edge", 127 for "maybe", 255 for "edge"
- * @param[in,out] output Destination tensor. Data type supported: U8. Must be initialized to 0 (No edge).
- */
- void configure(ITensor *input, ITensor *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
- bool is_parallelisable() const override;
-
-private:
- ITensor *_input; /**< Source tensor */
- ITensor *_output; /**< Destination tensor */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NECANNYEDGEKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEChannelCombineKernel.h b/arm_compute/core/NEON/kernels/NEChannelCombineKernel.h
deleted file mode 100644
index 8f019384d9..0000000000
--- a/arm_compute/core/NEON/kernels/NEChannelCombineKernel.h
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NECHANNELCOMBINEKERNEL_H
-#define ARM_COMPUTE_NECHANNELCOMBINEKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-#include <array>
-#include <cstdint>
-
-namespace arm_compute
-{
-class IMultiImage;
-class ITensor;
-using IImage = ITensor;
-
-/** Interface for the channel combine kernel */
-class NEChannelCombineKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEChannelCombineKernel";
- }
- /** Default constructor */
- NEChannelCombineKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEChannelCombineKernel(const NEChannelCombineKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEChannelCombineKernel &operator=(const NEChannelCombineKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEChannelCombineKernel(NEChannelCombineKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEChannelCombineKernel &operator=(NEChannelCombineKernel &&) = default;
- /** Default destructor */
- ~NEChannelCombineKernel() = default;
-
- /** Configure function's inputs and outputs.
- *
- * @param[in] plane0 The 2D plane that forms channel 0. Data type supported: U8
- * @param[in] plane1 The 2D plane that forms channel 1. Data type supported: U8
- * @param[in] plane2 The 2D plane that forms channel 2. Data type supported: U8
- * @param[in] plane3 The 2D plane that forms channel 3. Data type supported: U8
- * @param[out] output The single planar output tensor. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422
- */
- void configure(const ITensor *plane0, const ITensor *plane1, const ITensor *plane2, const ITensor *plane3, ITensor *output);
- /** Configure function's inputs and outputs.
- *
- * @param[in] plane0 The 2D plane that forms channel 0. Data type supported: U8
- * @param[in] plane1 The 2D plane that forms channel 1. Data type supported: U8
- * @param[in] plane2 The 2D plane that forms channel 2. Data type supported: U8
- * @param[out] output The multi planar output tensor. Formats supported: NV12/NV21/IYUV/YUV444
- */
- void configure(const IImage *plane0, const IImage *plane1, const IImage *plane2, IMultiImage *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- bool is_parallelisable() const override;
-
-private:
- /** Combine 3 planes to form a three channel single plane tensor.
- *
- * @param[in] win Region on which to execute the kernel.
- */
- void combine_3C(const Window &win);
- /** Combine 4 planes to form a four channel single plane tensor.
- *
- * @param[in] win Region on which to execute the kernel.
- */
- void combine_4C(const Window &win);
- /** Combine 3 planes to form a single plane YUV tensor.
- *
- * @param[in] win Region on which to execute the kernel.
- */
- template <bool is_yuyv>
- void combine_YUV_1p(const Window &win);
- /** Combine 3 planes to form a two plane YUV tensor.
- *
- * @param[in] win Region on which to execute the kernel.
- */
- void combine_YUV_2p(const Window &win);
- /** Combine 3 planes to form a three plane YUV tensor.
- *
- * @param[in] win Region on which to execute the kernel.
- */
- void combine_YUV_3p(const Window &win);
- /** Copies a full plane to the output tensor.
- *
- * @param[in] win Region on which to execute the kernel.
- */
- void copy_plane(const Window &win, uint32_t plane_id);
- /** Common signature for all the specialised ChannelCombine functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using ChannelCombineFunction = void (NEChannelCombineKernel::*)(const Window &window);
- /** ChannelCombine function to use for the particular tensor types passed to configure() */
- ChannelCombineFunction _func;
- std::array<const ITensor *, 4> _planes;
- ITensor *_output;
- IMultiImage *_output_multi;
- std::array<uint32_t, 3> _x_subsampling;
- std::array<uint32_t, 3> _y_subsampling;
- unsigned int _num_elems_processed_per_iteration;
- bool _is_parallelizable;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NECHANNELCOMBINEKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEChannelExtractKernel.h b/arm_compute/core/NEON/kernels/NEChannelExtractKernel.h
deleted file mode 100644
index 8d62016fe5..0000000000
--- a/arm_compute/core/NEON/kernels/NEChannelExtractKernel.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NECHANNELEXTRACTKERNEL_H
-#define ARM_COMPUTE_NECHANNELEXTRACTKERNEL_H
-
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-#include "arm_compute/core/Types.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class IMultiImage;
-class ITensor;
-using IImage = ITensor;
-
-/** Interface for the channel extract kernel */
-class NEChannelExtractKernel : public INESimpleKernel
-{
-public:
- const char *name() const override
- {
- return "NEChannelExtractKernel";
- }
- /** Default constructor */
- NEChannelExtractKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEChannelExtractKernel(const NEChannelExtractKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEChannelExtractKernel &operator=(const NEChannelExtractKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEChannelExtractKernel(NEChannelExtractKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEChannelExtractKernel &operator=(NEChannelExtractKernel &&) = default;
- /** Default destructor */
- ~NEChannelExtractKernel() = default;
-
- /** Set the input and output of the kernel
- *
- * @param[in] input Source tensor. Formats supported: RGB888/RGBA8888/YUYV422/UYVY422
- * @param[in] channel Channel to extract.
- * @param[out] output Destination tensor. Format supported: u8
- */
- void configure(const ITensor *input, Channel channel, ITensor *output);
- /** Set the input and output of the kernel
- *
- * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV/YUV444
- * @param[in] channel Channel to extract.
- * @param[out] output Single-planar destination image. Format supported: U8
- */
- void configure(const IMultiImage *input, Channel channel, IImage *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Extract one channel from a two channel planar tensor.
- *
- * @param[in] win Region on which to execute the kernel.
- */
- void extract_1C_from_2C_img(const Window &win);
- /** Extract one channel from a three channel planar tensor.
- *
- * @param[in] win Region on which to execute the kernel.
- */
- void extract_1C_from_3C_img(const Window &win);
- /** Extract one channel from a four channel planar tensor.
- *
- * @param[in] win Region on which to execute the kernel.
- */
- void extract_1C_from_4C_img(const Window &win);
- /** Extract U/V channel from a single planar YUVY/UYVY tensor.
- *
- * @param[in] win Region on which to execute the kernel.
- */
- void extract_YUYV_uv(const Window &win);
- /** Copies a full plane to the output tensor.
- *
- * @param[in] win Region on which to execute the kernel.
- */
- void copy_plane(const Window &win);
- /** Common signature for all the specialised ChannelExtract functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using ChannelExtractFunction = void (NEChannelExtractKernel::*)(const Window &window);
- /** ChannelExtract function to use for the particular tensor types passed to configure() */
- ChannelExtractFunction _func;
- unsigned int _lut_index;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NECHANNELEXTRACTKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEChannelShuffleLayerKernel.h b/arm_compute/core/NEON/kernels/NEChannelShuffleLayerKernel.h
deleted file mode 100644
index 71659c4fcb..0000000000
--- a/arm_compute/core/NEON/kernels/NEChannelShuffleLayerKernel.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NECHANNELSHUFFLELAYERKERNEL_H
-#define ARM_COMPUTE_NECHANNELSHUFFLELAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the channel shuffle kernel */
-class NEChannelShuffleLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEChannelShuffleLayerKernel";
- }
- /** Default constructor */
- NEChannelShuffleLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEChannelShuffleLayerKernel(const NEChannelShuffleLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEChannelShuffleLayerKernel &operator=(const NEChannelShuffleLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEChannelShuffleLayerKernel(NEChannelShuffleLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEChannelShuffleLayerKernel &operator=(NEChannelShuffleLayerKernel &&) = default;
- /** Default destructor */
- ~NEChannelShuffleLayerKernel() = default;
- /** Configure function's inputs and outputs.
- *
- * @param[in] input Input tensor. Data types supported: All
- * @param[out] output Output tensor. Data type supported: Same as @p input
- * @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups.
- */
- void configure(const ITensor *input, ITensor *output, unsigned int num_groups);
- /** Static function to check if given info will lead to a valid configuration of @ref NEChannelShuffleLayerKernel
- *
- * @param[in] input Input tensor. Data types supported: All
- * @param[out] output Output tensor. Data type supported: Same as @p input
- * @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int num_groups);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input;
- ITensor *_output;
- unsigned int _num_groups;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NECHANNELSHUFFLELAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NECol2ImKernel.h b/arm_compute/core/NEON/kernels/NECol2ImKernel.h
deleted file mode 100644
index 9aa1062622..0000000000
--- a/arm_compute/core/NEON/kernels/NECol2ImKernel.h
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NECOL2IMKERNEL_H
-#define ARM_COMPUTE_NECOL2IMKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-#include "arm_compute/core/Size2D.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to perform col2im reshaping.
- *
- * Rearranges each matrix column into image blocks. It's the inverse operation of @ref NEIm2ColKernel.
- *
- * For example, a vector of 9 elements can be reshaped to a block(image) of 3x3:
- *
- * @f[
- * \left( \begin{array}{ccccccccc}
- * a0 & a1 & a2 & a3 & a4 & a5 & a6 & a7 & a8 \\
- * \end{array} \right)
- * \rightarrow
- * \left( \begin{array}{ccc}
- * a0 & a1 & a2 \\
- * a3 & a4 & a5 \\
- * a6 & a7 & a8 \\
- * \end{array} \right)
- * @f]
- */
-class NECol2ImKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NECol2ImKernel";
- }
- /** Default constructor */
- NECol2ImKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NECol2ImKernel(const NECol2ImKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NECol2ImKernel &operator=(const NECol2ImKernel &) = delete;
- /** Allow instances of this class to be moved */
- NECol2ImKernel(NECol2ImKernel &&) = default;
- /** Allow instances of this class to be moved */
- NECol2ImKernel &operator=(NECol2ImKernel &&) = default;
- /** Default destructor */
- ~NECol2ImKernel() = default;
-
- /** Set the input and output of the kernel.
- *
- * @param[in] input The input tensor to convert. Data types supported: All
- * @param[out] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM],
- * while the rest represent batch of outputs. Data types supported: Same as @p input
- * @param[in] convolved_dims Output convolved dimensions.
- */
- void configure(const ITensor *input, ITensor *output, const Size2D &convolved_dims);
- /** Static function to check if given info will lead to a valid configuration of @ref NECol2ImKernel
- *
- * @param[in] input The input tensor to convert. Data types supported: All
- * @param[in] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM],
- * while the rest represent batch of outputs. Data types supported: Same as @p input
- * @param[in] convolved_dims Output convolved dimensions.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &convolved_dims);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Template function to run the col2im
- *
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
- */
- template <typename T>
- void run_col2im(const Window &window);
-
- /** Common signature for all the specialised col2im functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using Col2ImFunctionPtr = void (NECol2ImKernel::*)(const Window &window);
-
- Col2ImFunctionPtr _func;
- const ITensor *_input;
- ITensor *_output;
- Size2D _convolved_dims;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NECOL2IMKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEColorConvertKernel.h b/arm_compute/core/NEON/kernels/NEColorConvertKernel.h
deleted file mode 100644
index 3059288ab4..0000000000
--- a/arm_compute/core/NEON/kernels/NEColorConvertKernel.h
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_COLORCONVERTKERNEL_H
-#define ARM_COMPUTE_COLORCONVERTKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class IMultiImage;
-class ITensor;
-using IImage = ITensor;
-
-/** Interface for the color convert kernel */
-class NEColorConvertKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEColorConvertKernel";
- }
- /** Default constructor */
- NEColorConvertKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEColorConvertKernel(const NEColorConvertKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEColorConvertKernel &operator=(const NEColorConvertKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEColorConvertKernel(NEColorConvertKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEColorConvertKernel &operator=(NEColorConvertKernel &&) = default;
- /** Default destructor */
- ~NEColorConvertKernel() = default;
-
- /** Set the input and output of the kernel
- *
- * @param[in] input Source tensor. Formats supported: RGBA8888/UYVY422/YUYV422/RGB888
- * @param[out] output Destination tensor. Formats supported: RGB888 (if the formats of @p input are RGBA8888/UYVY422/YUYV422),
- * RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/),
- * U8 (if the formats of @p input is RGB888)
- */
- void configure(const ITensor *input, ITensor *output);
- /** Set the input and output of the kernel
- *
- * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV
- * @param[out] output Single-planar destination image. Formats supported: RGB888/RGBA8888
- */
- void configure(const IMultiImage *input, IImage *output);
- /** Set the input and output of the kernel
- *
- * @param[in] input Single-planar source image. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422
- * @param[out] output Multi-planar destination image. Formats supported: NV12/IYUV/YUV444 (if the formats of @p input are RGB888/RGB8888)
- */
- void configure(const IImage *input, IMultiImage *output);
- /** Set the input and output of the kernel
- *
- * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV
- * @param[out] output Multi-planar destination image. Formats supported: YUV444/IYUV (if the formats of @p input are NV12/NV21)/NV12 (if the format of @p input is IYUV)
- */
- void configure(const IMultiImage *input, IMultiImage *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- using ColorConvertFunction = void(const void *__restrict input_ptr, void *__restrict output_ptr, const Window &win);
- const void *_input;
- void *_output;
- ColorConvertFunction *_func;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NECOLORCONVERTKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h b/arm_compute/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h
deleted file mode 100644
index d45191949a..0000000000
--- a/arm_compute/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTSKERNEL_H
-#define ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTSKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface to convert the 2D Fully Connected weights from NCHW to NHWC or vice versa.
- *
- * @note This function can be applied to the 2D weights used by a Fully Connected layer if:
- * - It follows a Convolution layer
- * - The data layout used by the network does not match the one the model has been trained in.
- *
- * @note This function assumes the weights are already reshaped (transposed)
- */
-class NEConvertFullyConnectedWeightsKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEConvertFullyConnectedWeightsKernel";
- }
- /** Default constructor */
- NEConvertFullyConnectedWeightsKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEConvertFullyConnectedWeightsKernel(const NEConvertFullyConnectedWeightsKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEConvertFullyConnectedWeightsKernel &operator=(const NEConvertFullyConnectedWeightsKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEConvertFullyConnectedWeightsKernel(NEConvertFullyConnectedWeightsKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEConvertFullyConnectedWeightsKernel &operator=(NEConvertFullyConnectedWeightsKernel &&) = default;
- /** Default destructor */
- ~NEConvertFullyConnectedWeightsKernel() = default;
- /** Set the input and output tensor.
- *
- * @param[in] input Source weights tensor to convert. Must be 2 dimensional. Data types supported: All.
- * @param[out] output The converted weights tensor. Shape and Data Type: Same as @p input.
- * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer).
- * @param[in] data_layout The data layout the weights have been trained in.
- */
- void configure(const ITensor *input, ITensor *output, const TensorShape &original_input_shape, DataLayout data_layout);
- /** Static function to check if given info will lead to a valid configuration of @ref NEConvertFullyConnectedWeightsKernel
- *
- * @param[in] input Source weights tensor info to convert. Must be 2 dimensional. Data types supported: All.
- * @param[in] output The converted weights tensor info. Shape and Data Type: Same as @p input.
- * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer).
- * @param[in] data_layout The data layout the weights have been trained in.
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const TensorShape &original_input_shape, DataLayout data_layout);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Template function to run the permute
- *
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
- */
- template <typename T>
- void run_convert_fc_weights(const Window &window);
-
- const ITensor *_input;
- ITensor *_output;
- unsigned int _factor1; /* equals to the number of elements per original input plane if @p data_layout == NCHW; its number of channels otherwise */
- unsigned int _factor2; /* equals to the number of elements per original input plane if @p data_layout == NHWC; its number of channels otherwise */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTSKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h b/arm_compute/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h
deleted file mode 100644
index 6ec2793484..0000000000
--- a/arm_compute/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NECONVERTQUANTIZEDSIGNEDNESSKERNEL_H
-#define ARM_COMPUTE_NECONVERTQUANTIZEDSIGNEDNESSKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** NEON kernel to convert asymmetric signed to asymmetric signed and vice-versa */
-class NEConvertQuantizedSignednessKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEConvertQuantizedSignednessKernel";
- }
- /** Default constructor */
- NEConvertQuantizedSignednessKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- NEConvertQuantizedSignednessKernel(const NEConvertQuantizedSignednessKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- NEConvertQuantizedSignednessKernel &operator=(const NEConvertQuantizedSignednessKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEConvertQuantizedSignednessKernel(NEConvertQuantizedSignednessKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEConvertQuantizedSignednessKernel &operator=(NEConvertQuantizedSignednessKernel &&) = default;
- /** Initialize the kernel's input, output.
- *
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED.
- * @param[out] output Destination tensor. Data types supported: opposite of @p input.
- */
- void configure(const ITensor *input, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NECopyKernel
- *
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED.
- * @param[in] output Destination tensor. Data types supported: opposite of @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input;
- ITensor *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NECONVERTQUANTIZEDSIGNEDNESSKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEConvolutionKernel.h b/arm_compute/core/NEON/kernels/NEConvolutionKernel.h
deleted file mode 100644
index 2b271de56b..0000000000
--- a/arm_compute/core/NEON/kernels/NEConvolutionKernel.h
+++ /dev/null
@@ -1,267 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NECONVOLUTIONKERNEL_H
-#define ARM_COMPUTE_NECONVOLUTIONKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-#include <array>
-#include <cstdint>
-#include <vector>
-
-namespace arm_compute
-{
-class ITensor;
-
-/****************************************************************************************\
- * Square Convolution *
-\****************************************************************************************/
-
-/** Interface for the kernel to run an arbitrary size convolution on a tensor. (Currently supports 3x3, 5x5, 7x7 and 9x9).
- * The client can supply a convolution matrix \f$ C_{m,n} \f$.
- * @f{eqnarray}{
- * k_0 &=& \frac{m}{2} \\
- * l_0 &=& \frac{n}{2} \\
- * sum &=& \sum_{k=0,l=0}^{k=m-1,l=n-1} input(x+k-k_0, y+l-l_0) C_{k,l}
- * @f}
- *
- * @note The above equation for this function is similar to the default OpenCV Filter2D function,
- * which actually computes a correlation and not a convolution.
- * In case of a real convolution the convolution matrix should be flipped both horizontally and vertically.
- */
-template <unsigned int matrix_size>
-class NEConvolutionKernel : public INESimpleKernel
-{
-public:
- const char *name() const override
- {
- return "NEConvolutionKernel";
- }
- /** Default constructor */
- NEConvolutionKernel();
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] input Source tensor. Data type supported: U8.
- * @param[out] output Destination tensor. Data types supported: U8, S16.
- * @param[in] conv Convolution matrix to apply to the input tensor.
- * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- template <typename OutputType>
- void convolution(const Window &win);
-
-protected:
- uint32_t _scale; /**< scale of the convolution */
- std::array<int16_t, matrix_size *matrix_size> _convolution; /**< convolution matrix */
-};
-
-/** Interface for the kernel which applied a 3x3 convolution to a tensor.*/
-using NEConvolution3x3Kernel = NEConvolutionKernel<3>;
-/** Interface for the kernel which applied a 5x5 convolution to a tensor.*/
-using NEConvolution5x5Kernel = NEConvolutionKernel<5>;
-/** Interface for the kernel which applied a 7x7 convolution to a tensor.*/
-using NEConvolution7x7Kernel = NEConvolutionKernel<7>;
-///** Interface for the kernel which applied a 9x9 convolution to a tensor.*/
-using NEConvolution9x9Kernel = NEConvolutionKernel<9>;
-
-/****************************************************************************************\
- * Separable Square Convolution *
-\****************************************************************************************/
-
-/** Kernel for the Horizontal pass of a Separable Convolution */
-template <unsigned int matrix_size>
-class NESeparableConvolutionHorKernel : public INESimpleKernel
-{
-public:
- const char *name() const override
- {
- return "NESeparableConvolutionHorKernel";
- }
- /** Default constructor */
- NESeparableConvolutionHorKernel();
-
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] input Source tensor. Data type supported: U8.
- * @param[out] output Destination tensor. Data types supported: U16, S16, S32.
- * @param[in] conv_row Convolution matrix to apply to the input tensor.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ITensor *input, ITensor *output, const int16_t *conv_row, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- /** Apply the object's convolution to the given window of the input tensor..
- *
- * @param[in] window Window to apply the convolution on.
- */
- template <typename OutputType>
- void convolve(const Window &window);
-
- std::array<int16_t, matrix_size> _conv_row; /**< Convolution coefficients */
- BorderSize _border_size; /**< Border size */
-};
-
-/** Interface for the kernel which applied a 5x1 horizontal convolution to a tensor.*/
-using NESeparableConvolution5x5HorKernel = NESeparableConvolutionHorKernel<5>;
-/** Interface for the kernel which applied a 7x1 horizontal convolution to a tensor.*/
-using NESeparableConvolution7x7HorKernel = NESeparableConvolutionHorKernel<7>;
-/** Interface for the kernel which applied a 9x1 horizontal convolution to a tensor.*/
-using NESeparableConvolution9x9HorKernel = NESeparableConvolutionHorKernel<9>;
-
-/** Kernel for the Vertical pass of a Separable Convolution */
-template <unsigned int matrix_size>
-class NESeparableConvolutionVertKernel : public INESimpleKernel
-{
-public:
- const char *name() const override
- {
- return "NESeparableConvolutionVertKernel";
- }
- /** Default constructor */
- NESeparableConvolutionVertKernel();
-
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] input Source tensor. Data type supported: U16, S16, S32.
- * @param[out] output Destination tensor, Data types supported: U8, S16.
- * @param[in] conv_col Convolution matrix to apply to the input tensor.
- * @param[in] scale Scale of the convolution matrix
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ITensor *input, ITensor *output, const int16_t *conv_col, uint32_t scale, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- /** Apply the object's convolution to the given window of the input tensor.
- * This function is used if the intermediate values have been stored as U16.
- *
- * @param[in] win Window to apply the convolution on.
- */
- template <typename OutputType>
- void convolution_u16(const Window &win);
- /** Apply the object's convolution to the given window of the input tensor.
- * This function is used if the intermediate values have been stored as S16.
- *
- * @param[in] win Window to apply the convolution on.
- */
- template <typename OutputType>
- void convolution_s16(const Window &win);
- /** Apply the object's convolution to the given window of the input tensor.
- * This function is used if the intermediate values have been stored as S32.
- *
- * @param[in] win Window to apply the convolution on.
- */
- template <typename OutputType>
- void convolution_s32(const Window &win);
-
- std::array<int16_t, matrix_size> _conv_col; /**< Convolution coefficients */
- uint32_t _scale; /**< Convolution's scale */
-};
-
-/** Interface for the kernel which applied a 1x5 vertical convolution to a tensor.*/
-using NESeparableConvolution5x5VertKernel = NESeparableConvolutionVertKernel<5>;
-/** Interface for the kernel which applied a 1x7 vertical convolution to a tensor.*/
-using NESeparableConvolution7x7VertKernel = NESeparableConvolutionVertKernel<7>;
-/** Interface for the kernel which applied a 1x9 vertical convolution to a tensor.*/
-using NESeparableConvolution9x9VertKernel = NESeparableConvolutionVertKernel<9>;
-
-/****************************************************************************************\
- * Rectangle Convolution *
-\****************************************************************************************/
-
-/** Kernel for the running convolution on a rectangle matrix.
- *
- * @note Supports combinations of 3,5,7 and 9.
- */
-class NEConvolutionRectangleKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEConvolutionRectangleKernel";
- }
- /** Default constructor */
- NEConvolutionRectangleKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEConvolutionRectangleKernel(NEConvolutionRectangleKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEConvolutionRectangleKernel &operator=(NEConvolutionRectangleKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEConvolutionRectangleKernel(NEConvolutionRectangleKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEConvolutionRectangleKernel &operator=(NEConvolutionRectangleKernel &&) = default;
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] input Source tensor. Data type supported: U8.
- * @param[out] output Destination tensor, Data types supported: U8, S16.
- * @param[in] conv Convolution matrix to apply to the input tensor.
- * @param[in] width Width of convolution matrix (Number of columns)
- * @param[in] height Height of convolution matrix (Number of rows)
- * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ITensor *input, ITensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- unsigned int get_index(uint32_t val);
- /** Apply the object's convolution to the given window of the input tensor.
- *
- * @param[in] win Window to apply the convolution on.
- */
- template <typename OutputType, unsigned int rows, unsigned int cols>
- void convolution(const Window &win);
-
-protected:
- const ITensor *_input; /**< Input tensor */
- ITensor *_output; /**< Output tensor */
- uint32_t _scale; /**< Scale of the convolution */
- std::vector<int16_t> _convolution; /**< Convolution matrix */
- BorderSize _border_size; /**< Calculated border width */
- uint32_t _func_idx; /**< Index used to specify convolution function to be used */
- const static unsigned int _nr_supported_sizes
- {
- 4
- }; /**< Number of supported permutations */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NECONVOLUTIONKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NECopyKernel.h b/arm_compute/core/NEON/kernels/NECopyKernel.h
deleted file mode 100644
index d2dbbaef98..0000000000
--- a/arm_compute/core/NEON/kernels/NECopyKernel.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NECOPYKERNEL_H
-#define ARM_COMPUTE_NECOPYKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to perform a copy between two tensors */
-class NECopyKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NECopyKernel";
- }
- /** Default constructor */
- NECopyKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- NECopyKernel(const NECopyKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- NECopyKernel &operator=(const NECopyKernel &) = delete;
- /** Allow instances of this class to be moved */
- NECopyKernel(NECopyKernel &&) = default;
- /** Allow instances of this class to be moved */
- NECopyKernel &operator=(NECopyKernel &&) = default;
- /** Initialize the kernel's input, output.
- *
- * @param[in] input Source tensor. Data types supported: All
- * @param[out] output Destination tensor. Data types supported: same as @p input.
- * @param[in] padding (Optional) Padding to be applied to the input tensor
- */
- void configure(const ITensor *input, ITensor *output, const PaddingList &padding = PaddingList());
- /** Static function to check if given info will lead to a valid configuration of @ref NECopyKernel
- *
- * @param[in] input Source tensor. Data types supported: All
- * @param[in] output Destination tensor. Data types supported: same as @p input.
- * @param[in] padding (Optional) Padding to be applied to the input tensor
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding = PaddingList());
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input;
- ITensor *_output;
- PaddingList _padding;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NECOPYKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NECropKernel.h b/arm_compute/core/NEON/kernels/NECropKernel.h
deleted file mode 100644
index ba58ab1e58..0000000000
--- a/arm_compute/core/NEON/kernels/NECropKernel.h
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Copyright (c) 2019-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEON_CROP_KERNEL_H
-#define ARM_COMPUTE_NEON_CROP_KERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the kernel to perform tensor cropping */
-class NECropKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NECropKernel";
- }
- /** Default constructor */
- NECropKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NECropKernel(const NECropKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NECropKernel &operator=(const NECropKernel &) = delete;
- /** Allow instances of this class to be moved */
- NECropKernel(NECropKernel &&) = default;
- /** Allow instances of this class to be moved */
- NECropKernel &operator=(NECropKernel &&) = default;
- /** Default destructor */
- ~NECropKernel() = default;
- /** Configure kernel
- *
- * @note Supported tensor rank: up to 4
- * @note Padding not supported.
- *
- * @param[in] input Source tensor. Data type supported: U8/U16/S16/U32/S32/F16/F32. Data layouts supported: NHWC.
- * @param[in] crop_boxes Tensor containing all possible boxes used to crop the image, each represented by 4 normalized values.
- * Data type supported: F32
- * @param[in] box_ind One dimensional tensor mapping the @p crop_box_ind to the index of the 3D image in @p input.
- * Data type supported: F32
- * @param[out] output Destination tensor. Data type supported: F32
- * @param[in] crop_box_ind Index of the crop box to be used from @p crop_boxes. Default is 0.
- * @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0.
- */
- void configure(const ITensor *input, const ITensor *crop_boxes, const ITensor *box_ind, ITensor *output, uint32_t crop_box_ind = 0, float extrapolation_value = 0);
-
- /** Static function to check if given info will lead to a valid configuration of @ref CLStridedSliceKernel
- *
- * @note Supported tensor rank: up to 4
- * @note Padding not supported.
- *
- * @param[in] input Source tensor info. Data type supported: U8/U16/S16/U32/S32/F16/F32. Data layouts supported: NHWC.
- * @param[in] crop_boxes Tensor info for tensor containing all possible boxes used to crop the image. Data type supported: F32
- * @param[in] box_ind Tensor info for the one dimensional tensor mapping the @p crop_box_ind to the index of the 3D image
- * in @p input. Data type supported: F32
- * @param[in] output Destination tensor. Data type supported: F32
- * @param[in] crop_box_ind Index of the crop box to be used from @p crop_boxes. Default is 0.
- * @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0.
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *crop_boxes, const ITensorInfo *box_ind, const ITensorInfo *output, uint32_t crop_box_ind = 0, float extrapolation_value = 0);
-
- /** Configure output tensor's shape as this can only be determined at runtime. */
- void configure_output_shape();
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
- /** Function to use for in bounds crop for the particular tensor types passed to configure() */
- using InBoundsCropFunction = void(const ITensor *, const ITensor *, float *, Coordinates, int32_t, int32_t, int32_t, bool, bool);
-
-private:
- const ITensor *_input;
- const ITensor *_crop_boxes;
- const ITensor *_box_ind;
- ITensor *_output;
-
- Coordinates _start;
- Coordinates _end;
- uint32_t _crop_box_ind;
- float _extrapolation_value;
- /** The number of rows out of bounds at the start and end of output. */
- std::array<uint32_t, 2> _rows_out_of_bounds;
- /** The number of columns out of bounds at the start and end of output. */
- std::array<uint32_t, 2> _cols_out_of_bounds;
-
- NECropKernel::InBoundsCropFunction *_in_bounds_crop_function;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEON_CROP_KERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h b/arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h
deleted file mode 100644
index 52442c3920..0000000000
--- a/arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NECUMULATIVEDISTRIBUTIONKERNEL_H
-#define ARM_COMPUTE_NECUMULATIVEDISTRIBUTIONKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class IDistribution1D;
-class ILut;
-class ITensor;
-using IImage = ITensor;
-
-/** Interface for the cumulative distribution (cummulative summmation) calculation kernel.
- *
- * This kernel calculates the cumulative sum of a given distribution (meaning that each output element
- * is the sum of all its previous elements including itself) and creates a lookup table with the normalized
- * pixel intensities which is used for improve the constrast of the image.
- */
-class NECumulativeDistributionKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NECumulativeDistributionKernel";
- }
- /** Default constructor */
- NECumulativeDistributionKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NECumulativeDistributionKernel(const NECumulativeDistributionKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NECumulativeDistributionKernel &operator=(const NECumulativeDistributionKernel &) = delete;
- /** Allow instances of this class to be moved */
- NECumulativeDistributionKernel(NECumulativeDistributionKernel &&) = default;
- /** Allow instances of this class to be moved */
- NECumulativeDistributionKernel &operator=(NECumulativeDistributionKernel &&) = default;
- /** Set the input and output distribution.
- *
- * @param[in] input Input image. Data type supported: U8
- * @param[in] distribution Unnormalized 256-bin distribution of the input image.
- * @param[out] cumulative_sum Cummulative distribution (Summed histogram). Should be same size as @p distribution.
- * @param[out] output Equalization lookup table. Should consist of 256 entries of U8 elements.
- */
- void configure(const IImage *input, const IDistribution1D *distribution, IDistribution1D *cumulative_sum, ILut *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- bool is_parallelisable() const override;
-
-private:
- const IImage *_input; /**< Input image. */
- const IDistribution1D *_distribution; /**< Input histogram of the input image. */
- IDistribution1D *_cumulative_sum; /**< The cummulative distribution. */
- ILut *_output; /**< Output with the equalization lookup table. */
-private:
- static const uint32_t _histogram_size = 256; /**< Default histogram size of 256. */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NECUMULATIVEDISTRIBUTIONKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h b/arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h
deleted file mode 100644
index 6690ac2236..0000000000
--- a/arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_NEDEPTHCONCATENATEKERNEL_H
-#define ARM_COMPUTE_NEDEPTHCONCATENATEKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the depth concatenate kernel.
- * The input tensor will be concatenated into the output tensor.
- */
-class NEDepthConcatenateLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEDepthConcatenateLayerKernel";
- }
- /** Default constructor */
- NEDepthConcatenateLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDepthConcatenateLayerKernel(const NEDepthConcatenateLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDepthConcatenateLayerKernel &operator=(const NEDepthConcatenateLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEDepthConcatenateLayerKernel(NEDepthConcatenateLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEDepthConcatenateLayerKernel &operator=(NEDepthConcatenateLayerKernel &&) = default;
- /** Default destructor */
- ~NEDepthConcatenateLayerKernel() = default;
- /** Initialise the kernel's inputs and output
- *
- * @param[in] input Input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] depth_offset The offset on the Z axis.
- * @param[in,out] output Output tensor. Data types supported: Same as @p input.
- *
- * @note: The output tensor's low two dimensions can't be smaller than the input one's.
- * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2.
- *
- */
- void configure(const ITensor *input, unsigned int depth_offset, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEDepthConcatenateLayerKernel
- *
- * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] depth_offset The offset on the Z axis.
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, unsigned int depth_offset, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- using DepthConcatFunction = void(const ITensor *in, ITensor *out, unsigned int depth_offset, const Window &window);
-
-private:
- DepthConcatFunction *_func;
- const ITensor *_input;
- ITensor *_output;
- unsigned int _depth_offset;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEDEPTHCONCATENATEKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h b/arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h
deleted file mode 100644
index 5cda3203ed..0000000000
--- a/arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_DEPTHCONVERTKERNEL_H
-#define ARM_COMPUTE_DEPTHCONVERTKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Depth conversion kernel
- * This function ignores the scale and zeroPoint of quanized tensors, i.e. QASYMM8 input is treated as uint8 values.
- */
-class NEDepthConvertLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEDepthConvertLayerKernel";
- }
- /** Default constructor*/
- NEDepthConvertLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDepthConvertLayerKernel(const NEDepthConvertLayerKernel &) = delete;
- /** Default move constructor */
- NEDepthConvertLayerKernel(NEDepthConvertLayerKernel &&) = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDepthConvertLayerKernel &operator=(const NEDepthConvertLayerKernel &) = delete;
- /** Default move assignment operator */
- NEDepthConvertLayerKernel &operator=(NEDepthConvertLayerKernel &&) = default;
- /** Set the input and output of the kernel
- *
- * Valid conversions Input -> Output :
- *
- * - QASYMM8_SIGNED -> S16, S32, F32, F16
- * - QASYMM8 -> U16, S16, S32, F32, F16
- * - U8 -> U16, S16, S32, F32, F16
- * - U16 -> U8, U32
- * - S16 -> QASYMM8_SIGNED, U8, S32
- * - BFLOAT16 -> F32
- * - F16 -> QASYMM8_SIGNED, QASYMM8, F32, S32, U8
- * - S32 -> QASYMM8_SIGNED, QASYMM8, F16, F32, U8
- * - F32 -> QASYMM8_SIGNED, QASYMM8, BFLOAT16, F16, S32, U8
- *
- * @param[in] input The input tensor to convert. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/BFLOAT16/F16/F32.
- * @param[out] output The output tensor. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/U32/S32/BFLOAT16/F16/F32.
- * @param[in] policy Conversion policy.
- * @param[in] shift (Optional) Value for down/up conversions. Must be 0 <= shift < 8.
- */
- void configure(const ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift = 0);
- /** Static function to check if given info will lead to a valid configuration of @ref NEDepthConvertLayerKernel
- *
- * @param[in] input Source tensor info. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/BFLOAT16/F16/F32.
- * @param[in] output Destination tensor info. Data type supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/U32/S32/BFLOAT16/F16/F32.
- * @param[in] policy Conversion policy
- * @param[in] shift (Optional) Value for down/up conversions. Must be 0 <= shift < 8.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift = 0);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input;
- ITensor *_output;
- ConvertPolicy _policy;
- uint32_t _shift;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEDEPTHCONVERTKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernel.h b/arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernel.h
deleted file mode 100644
index 0b645887ee..0000000000
--- a/arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernel.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEDEPTHTOSPACELAYERKERNEL_H
-#define ARM_COMPUTE_NEDEPTHTOSPACELAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the depth to space kernel */
-class NEDepthToSpaceLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEDepthToSpaceLayerKernel";
- }
- /** Default constructor */
- NEDepthToSpaceLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDepthToSpaceLayerKernel(const NEDepthToSpaceLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDepthToSpaceLayerKernel &operator=(const NEDepthToSpaceLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEDepthToSpaceLayerKernel(NEDepthToSpaceLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEDepthToSpaceLayerKernel &operator=(NEDepthToSpaceLayerKernel &&) = default;
- /** Default destructor */
- ~NEDepthToSpaceLayerKernel() = default;
- /** Initialise the kernel's inputs and output.
- *
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All
- * @param[out] output Tensor output. Data types supported: same as @p input
- * @param[in] block_shape Block shape x value.
- */
- void configure(const ITensor *input, ITensor *output, int32_t block_shape);
- /** Static function to check if given info will lead to a valid configuration of @ref NEDepthToSpaceLayerKernel.
- *
- * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All
- * @param[in] output Tensor output info. Data types supported: same as @p input
- * @param[in] block_shape Block shape value.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input; /**< Source tensor */
- ITensor *_output; /**< Destination tensor */
- int32_t _block_shape; /**< Block shape */
- DataLayout _data_layout; /**< Data layout of the operation */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEDEPTHTOSPACELAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h b/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h
deleted file mode 100644
index 227ddb4743..0000000000
--- a/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEDEPTHWISECONVOLUTIONKERNEL3x3_H
-#define ARM_COMPUTE_NEDEPTHWISECONVOLUTIONKERNEL3x3_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the kernel to run a 3x3 depthwise convolution on a tensor. */
-class NEDepthwiseConvolutionLayer3x3Kernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEDepthwiseConvolutionLayer3x3Kernel";
- }
- /** Default constructor */
- NEDepthwiseConvolutionLayer3x3Kernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDepthwiseConvolutionLayer3x3Kernel(const NEDepthwiseConvolutionLayer3x3Kernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDepthwiseConvolutionLayer3x3Kernel &operator=(const NEDepthwiseConvolutionLayer3x3Kernel &) = delete;
- /** Default Move Constructor. */
- NEDepthwiseConvolutionLayer3x3Kernel(NEDepthwiseConvolutionLayer3x3Kernel &&) = default;
- /** Default move assignment operator */
- NEDepthwiseConvolutionLayer3x3Kernel &operator=(NEDepthwiseConvolutionLayer3x3Kernel &&) = default;
- /** Initialize the function's source, destination, conv and border_size.
- *
- * @note Supported data layouts: NCHW and NHWC
- *
- * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] weights Weights tensor. This is a 3D tensor with dimensions [3, 3, IFM] for NCHW or [IFM, 3, 3] if NHWC data layout. Data type supported: Same as @p input.
- * @param[out] output Destination tensor. Data type supported: Same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- *
- */
- void configure(const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const Size2D &dilation = Size2D(1U, 1U));
- /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayer3x3Kernel
- *
- * @note Supported data layouts: NCHW and NHWC
- *
- * @param[in] input Source tensor info. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] weights Weights tensor info. This is a 3D tensor with dimensions [3, 3, IFM] for NCHW or [IFM, 3, 3] if NHWC data layout. Data type supported: Same as @p input.
- * @param[in] output Destination tensor info. Data type supported: Same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
- const Size2D &dilation = Size2D(1U, 1U));
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- BorderSize _border_size;
- const ITensor *_input;
- ITensor *_output;
- const ITensor *_weights;
- PadStrideInfo _conv_info;
- unsigned int _num_elems_written_per_iteration;
- unsigned int _depth_multiplier;
- Size2D _dilation;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEDEPTHWISECONVOLUTIONKERNEL3x3_H */
diff --git a/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h b/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h
deleted file mode 100644
index 9737c9932e..0000000000
--- a/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Copyright (c) 2019-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H
-#define ARM_COMPUTE_NEDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/utils/misc/Requires.h"
-
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-#include <arm_neon.h>
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the kernel to run a depthwise convolution native on a tensor. */
-class NEDepthwiseConvolutionLayerNativeKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEDepthwiseConvolutionLayerNativeKernel";
- }
- /** Default constructor */
- NEDepthwiseConvolutionLayerNativeKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDepthwiseConvolutionLayerNativeKernel(const NEDepthwiseConvolutionLayerNativeKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDepthwiseConvolutionLayerNativeKernel &operator=(const NEDepthwiseConvolutionLayerNativeKernel &) = delete;
- /** Default Move Constructor. */
- NEDepthwiseConvolutionLayerNativeKernel(NEDepthwiseConvolutionLayerNativeKernel &&) = default;
- /** Default move assignment operator */
- NEDepthwiseConvolutionLayerNativeKernel &operator=(NEDepthwiseConvolutionLayerNativeKernel &&) = default;
- /** Initialize the function's source, destination and parameters.
- *
- * @note Supported data layouts: NHWC
- *
- * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] weights Weights tensor. This is a 3D tensor with dimensions [IFM, W, H].
- * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
- * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
- * @param[out] output Destination tensor. Data type supported: Same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- *
- */
- void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
- const Size2D &dilation = Size2D(1U, 1U));
- /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayerNativeKernel
- *
- * @note Supported data layouts: NHWC
- *
- * @param[in] input Source tensor info. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] weights Weights tensor info. This is a 3D tensor with dimensions [IFM, W, H].
- * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
- * @param[in] biases Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
- * @param[in] output Destination tensor info. Data type supported: Same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
- const Size2D &dilation = Size2D(1U, 1U));
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- template < typename T, typename TW, int S, typename std::enable_if < std::is_same<T, float>::value
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
- || std::is_same<T, float16_t>::value
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
- ,
- int >::type = 0 >
- void run_depthwise(const Window &window, bool has_biases);
-
- template < typename T, typename TW, int S, REQUIRES_TA(std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value) >
- void run_depthwise(const Window &window, bool has_biases);
-
- /** Common signature for all the specialised depthwise convolution native functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using DepthwiseFunctionPtr = void (NEDepthwiseConvolutionLayerNativeKernel::*)(const Window &window, bool has_biases);
-
- DepthwiseFunctionPtr _func;
- BorderSize _border_size;
- const ITensor *_input;
- const ITensor *_weights;
- const ITensor *_biases;
- ITensor *_output;
- PadStrideInfo _conv_info;
- unsigned int _depth_multiplier;
- Size2D _dilation;
- std::vector<int> _output_multiplier;
- std::vector<int> _output_shift;
- bool _has_biases;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h
deleted file mode 100644
index 3792fb3bd7..0000000000
--- a/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEDEQUANTIZATIONLAYERKERNEL_H
-#define ARM_COMPUTE_NEDEQUANTIZATIONLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the dequantization layer kernel. */
-class NEDequantizationLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEDequantizationLayerKernel";
- }
- /** Default constructor */
- NEDequantizationLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDequantizationLayerKernel(const NEDequantizationLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDequantizationLayerKernel &operator=(const NEDequantizationLayerKernel &) = delete;
- /** Default Move Constructor. */
- NEDequantizationLayerKernel(NEDequantizationLayerKernel &&) = default;
- /** Default move assignment operator */
- NEDequantizationLayerKernel &operator=(NEDequantizationLayerKernel &&) = default;
- /** Default destructor */
- ~NEDequantizationLayerKernel() = default;
- /** Set input, output tensors.
- *
- * @param[in] input Source tensor. Data type supported: QASYMM8/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
- * @param[out] output Destination tensor with the same dimensions of input. Data type supported: F16/F32.
- */
- void configure(const ITensor *input, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEDequantizationLayerKernel
- *
- * @param[in] input Input tensor info. Data types supported: QASYMM8/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
- * @param[in] output Output tensor info. Data types supported: F16/F32.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input;
- ITensor *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEDEQUANTIZATIONLAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEDerivativeKernel.h b/arm_compute/core/NEON/kernels/NEDerivativeKernel.h
deleted file mode 100644
index 20aee9b5ce..0000000000
--- a/arm_compute/core/NEON/kernels/NEDerivativeKernel.h
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEDERIVATIVEKERNEL_H
-#define ARM_COMPUTE_NEDERIVATIVEKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to run the derivative along the X/Y directions on a tensor.
- *
- */
-class NEDerivativeKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEDerivativeKernel";
- }
- /** Default constructor */
- NEDerivativeKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDerivativeKernel(const NEDerivativeKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDerivativeKernel &operator=(const NEDerivativeKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEDerivativeKernel(NEDerivativeKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEDerivativeKernel &operator=(NEDerivativeKernel &&) = default;
- /** Initialise the kernel's sources, destination and border
- *
- * @note At least one of output_x or output_y must be set
- *
- * @param[in] input Source tensor. Data type supported: U8.
- * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S16.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S16.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- /** Function to perform derivative along the X direction on the given window
- *
- * @param[in] window Region on which to execute the kernel
- */
- void derivative_x(const Window &window);
- /** Function to perform derivative along the Y direction on the given window
- *
- * @param[in] window Region on which to execute the kernel
- */
- void derivative_y(const Window &window);
- /** Function to perform derivative along the X and Y direction on the given window
- *
- * @param[in] window Region on which to execute the kernel
- */
- void derivative_xy(const Window &window);
- /** Common signature for all the specialised derivative functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using DerivativeFunction = void (NEDerivativeKernel::*)(const Window &window);
- /** Derivative function to use for the particular tensor types passed to configure() */
- DerivativeFunction _func;
-
-private:
- const ITensor *_input; /**< Input tensor */
- ITensor *_output_x; /**< Output tensor - Derivate along the X direction */
- ITensor *_output_y; /**< Output tensor - Derivate along the Y direction */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEDERIVATIVEKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEDilateKernel.h b/arm_compute/core/NEON/kernels/NEDilateKernel.h
deleted file mode 100644
index 00a954d958..0000000000
--- a/arm_compute/core/NEON/kernels/NEDilateKernel.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEDILATEKERNEL_H
-#define ARM_COMPUTE_NEDILATEKERNEL_H
-
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to perform boolean image dilatation */
-class NEDilateKernel : public INESimpleKernel
-{
-public:
- const char *name() const override
- {
- return "NEDilateKernel";
- }
- /** Set the source, destination and border mode of the kernel
- *
- * @param[in] input Source tensor. Data type supported: U8
- * @param[out] output Destination tensor. Data type supported: U8
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ITensor *input, ITensor *output, bool border_undefined);
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEDILATEKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h
deleted file mode 100644
index 4ae283d69d..0000000000
--- a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERKERNEL_H
-#define ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON interface for Direct Convolution Layer kernel */
-class NEDirectConvolutionLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEDirectConvolutionLayerKernel";
- }
- /** Default constructor */
- NEDirectConvolutionLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDirectConvolutionLayerKernel(const NEDirectConvolutionLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDirectConvolutionLayerKernel &operator=(const NEDirectConvolutionLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEDirectConvolutionLayerKernel(NEDirectConvolutionLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEDirectConvolutionLayerKernel &operator=(NEDirectConvolutionLayerKernel &&) = default;
- /** Default destructor */
- ~NEDirectConvolutionLayerKernel() = default;
- /** Set the input, weights, and output tensors.
- *
- * @note: DirectConvolution only works in the following configurations:
- * 1x1 convolution with stride_x = 1/2/3, stride_y = 1/2/3
- * 3x3 convolution with stride_x = 1/2/3, stride_y = 1/2/3
- *
- * @param[in] input The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F16/F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
- * The 3rd dimension must be the same as the input's volume 3rd dimension.
- * Data type supported:Same as @p input.
- * @param[out] output Output tensor.
- * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: F16/F32
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- */
- void configure(const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info);
- /** Static function to check if given info will lead to a valid configuration of @ref NEDirectConvolutionLayerKernel
- *
- * @param[in] input The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F16/F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
- * The 3rd dimension must be the same as the input's volume 3rd dimension.
- * Data type supported:Same as @p input.
- * @param[in] output Output tensor.
- * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: F16/F32
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, const PadStrideInfo &conv_info);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- const ITensor *_input;
- const ITensor *_weights;
- ITensor *_output;
- PadStrideInfo _conv_info;
- BorderSize _border_size;
- unsigned int _kernel_size;
- unsigned int _num_weight_elems_read_per_row;
- unsigned int _num_elems_read_per_iteration;
- unsigned int _num_elems_written_per_iteration;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h
deleted file mode 100644
index b7632d70c4..0000000000
--- a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYEROUTPUTSTAGEKERNEL_H
-#define ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYEROUTPUTSTAGEKERNEL_H
-
-#include "arm_compute/core/KernelDescriptors.h"
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-/** NEON kernel to accumulate the biases, if provided, or downscale in case of quantized input.
- *
- * @note We assume bias to be shared
- * @note For quantized computations (i.e. @p input of S32 type) the output data type for auto-initialization must be passed as part
- * of the @ref DirectConvolutionLayerOutputStageKernelInfo.
- */
-class NEDirectConvolutionLayerOutputStageKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEDirectConvolutionLayerOutputStageKernel";
- }
- /** Default constructor */
- NEDirectConvolutionLayerOutputStageKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDirectConvolutionLayerOutputStageKernel(const NEDirectConvolutionLayerOutputStageKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDirectConvolutionLayerOutputStageKernel &operator=(const NEDirectConvolutionLayerOutputStageKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEDirectConvolutionLayerOutputStageKernel(NEDirectConvolutionLayerOutputStageKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEDirectConvolutionLayerOutputStageKernel &operator=(NEDirectConvolutionLayerOutputStageKernel &&) = default;
- /** Default destructor */
- ~NEDirectConvolutionLayerOutputStageKernel() = default;
- /** Set the accumulate buffer and the biases of the kernel.
- *
- * @param[in, out] input Input to add the bias to. If @p output is not specified then accumulation is done in-place.
- * Data type supported: F16/F32/S32
- * @param[in] bias (Optional) The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p input
- * @param[out] output (Optional) If the output tensor is specified the accumulation is done out-of-place. (Defaults to nullptr)
- * Note that in-place computation is only supported for F16/F32. For S32 this must not be nullptr.
- * Data type supported: F16/F32 or QASYMM8/QASYMM8_SIGNED if @p input is S32
- * @param[in] info (Optional) DirectConvolutionLayerOutputStageKernel descriptor metadata
- */
- void configure(ITensor *input, const ITensor *bias = nullptr, ITensor *output = nullptr,
- const DirectConvolutionLayerOutputStageKernelInfo &info = DirectConvolutionLayerOutputStageKernelInfo());
- /** Static function to check if given info will lead to a valid configuration of @ref NEDirectConvolutionLayerOutputStageKernel
- *
- * @param[in] input Input to add the bias to. If @p output is not specified then accumulation is done in-place.
- * Data type supported: F16/F32/S32
- * @param[in] bias (Optional) The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p input
- * @param[in] output (Optional) If the output tensor is specified the accumulation is done out-of-place. (Defaults to nullptr)
- * Note that in-place computation is only supported for F16/F32. For S32 this must not be nullptr.
- * Data type supported: F16/F32 or QASYMM8/QASYMM8_SIGNED if @p input is S32
- * @param[in] info (Optional) DirectConvolutionLayerOutputStageKernel descriptor metadata
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias = nullptr, const ITensorInfo *output = nullptr,
- const DirectConvolutionLayerOutputStageKernelInfo &info = DirectConvolutionLayerOutputStageKernelInfo());
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- using OutputStageKernel = void(ITensor *input, const ITensor *bias, const Window &window, ITensor *output,
- int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift);
-
-private:
- OutputStageKernel *_func;
- ITensor *_input;
- const ITensor *_bias;
- ITensor *_output;
- int _result_fixedpoint_multiplier;
- int _result_shift;
- int _result_offset_after_shift;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYEROUTPUTSTAGEKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h b/arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h
deleted file mode 100644
index 61c25e1a2a..0000000000
--- a/arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h
+++ /dev/null
@@ -1,214 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEELEMENTWISEOPERATIONKERNEL_H
-#define ARM_COMPUTE_NEELEMENTWISEOPERATIONKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for an element-wise operation kernel
- *
- * Element-wise operation is computed by:
- * @f[ output(x,y) = OP(input1(x,y), input2(x,y))@f]
- *
- */
-class NEElementwiseOperationKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEElementwiseOperationKernel";
- }
- /** Default constructor */
- NEElementwiseOperationKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEElementwiseOperationKernel(const NEElementwiseOperationKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEElementwiseOperationKernel &operator=(const NEElementwiseOperationKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEElementwiseOperationKernel(NEElementwiseOperationKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEElementwiseOperationKernel &operator=(NEElementwiseOperationKernel &&) = default;
- /** Default destructor */
- ~NEElementwiseOperationKernel() = default;
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
- /** Common signature for all the specialised arithmetic functions
- *
- * @param[in] input1 First tensor input. Data types supported: QASYMM8/S16/F16/S32/F32.
- * @param[in] input2 Second tensor input. Data types supported: Same as @p input1.
- * @param[in] output Output tensor. Data types supported: Dependent on subclass.
- * @param[in] window Region on which to execute the kernel.
- */
- using ElementwiseFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window);
-
-protected:
- /** Validate the argument passed to the kernel
- *
- * @param[in] input1 First tensor input. Data types supported: QASYMM8/S16/F16/S32/F32.
- * @param[in] input2 Second tensor input. Data types supported: Same as @p input1.
- * @param[in] output Output tensor. Data types supported: Dependent on subclass.
- */
- static Status validate_arguments_common(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output);
-
- /** Commmon configure function for element-wise operators with no additional options (e.g. Min, Max, SquaredDiff)
- *
- */
- void configure_common(const ITensor *input1, const ITensor *input2, ITensor *output);
-
- /** Function to use for the particular tensor types passed to configure() */
- std::function<void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window)> _function;
-
- const ITensor *_input1;
- const ITensor *_input2;
- ITensor *_output;
-};
-
-class NEArithmeticOperationKernel : public NEElementwiseOperationKernel
-{
-public:
- /** Default constructor */
- NEArithmeticOperationKernel() = default;
-
- /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel
- *
- * @param[in] op Arithmetic operation to be executed.
- * @param[in] input1 First tensor input. Data types supported: QASYMM8/S16/F16/S32/F32.
- * @param[in] input2 Second tensor input. Data types supported: Same as @p input1.
- * @param[in] output Output tensor. Data types supported: Same as @p input1.
- */
- void configure(ArithmeticOperation op, const ITensor *input1, const ITensor *input2, ITensor *output);
-
- /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel
- *
- * @param[in] op Arithmetic operation to be executed.
- * @param[in] input1 First tensor input info. Data types supported: QASYMM8/S16/F16/S32/F32.
- * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[in] output Output tensor info. Data types supported: Same as @p input1.
- *
- * @return a Status
- */
- static Status validate(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
-
-protected:
- // Inherited methods overridden:
- static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output);
-};
-
-class NEDivisionOperationKernel : public NEArithmeticOperationKernel
-{
-public:
- /** Default constructor */
- NEDivisionOperationKernel() = default;
-
- /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel
- *
- * @param[in] input1 First tensor input. Data types supported: F16/F32.
- * @param[in] input2 Second tensor input. Data types supported: Same as @p input1.
- * @param[in] output Output tensor. Data types supported: Same as @p input1.
- */
- void configure(const ITensor *input1, const ITensor *input2, ITensor *output);
-
- /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel
- *
- * @param[in] input1 First tensor input info. Data types supported: F16/F32.
- * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[in] output Output tensor info. Data types supported: Same as @p input1.
- *
- * @return a Status
- */
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
-
-protected:
- // Inherited methods overridden:
- static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output);
-};
-
-class NEPowerOperationKernel : public NEArithmeticOperationKernel
-{
-public:
- /** Default constructor */
- NEPowerOperationKernel() = default;
-
- /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel
- *
- * @param[in] input1 First tensor input. Data types supported: F16/F32.
- * @param[in] input2 Second tensor input. Data types supported: Same as @p input1.
- * @param[out] output Output tensor. Data types supported: Same as @p input1.
- */
- void configure(const ITensor *input1, const ITensor *input2, ITensor *output);
-
- /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel
- *
- * @param[in] input1 First tensor input info. Data types supported: F16/F32.
- * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[out] output Output tensor info. Data types supported: Same as @p input1.
- *
- * @return a Status
- */
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
-
-protected:
- // Inherited methods overridden:
- static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output);
-};
-
-class NEComparisonOperationKernel : public NEElementwiseOperationKernel
-{
-public:
- /** Default constructor */
- NEComparisonOperationKernel() = default;
-
- /** Static function to check if given info will lead to a valid configuration of @ref NEComparisonOperationKernel
- *
- * @param[in] op Comparison operation to be executed.
- * @param[in] input1 First tensor input. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
- * @param[in] input2 Second tensor input. Data types supported: Same as @p input1.
- * @param[in] output Output tensor. Data types supported: U16/U32.
- */
- void configure(ComparisonOperation op, const ITensor *input1, const ITensor *input2, ITensor *output);
-
- /** Static function to check if given info will lead to a valid configuration of @ref NEComparisonOperationKernel
- *
- * @param[in] op Comparison operation to be executed.
- * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
- * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[in] output Output tensor info. Data types supported: U16/U32.
- *
- * @return a Status
- */
- static Status validate(ComparisonOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
-
-protected:
- // Inherited methods overridden:
- static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output);
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEELEMENTWISEOPERATIONKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEElementwiseUnaryKernel.h b/arm_compute/core/NEON/kernels/NEElementwiseUnaryKernel.h
deleted file mode 100644
index 9a41cecf19..0000000000
--- a/arm_compute/core/NEON/kernels/NEElementwiseUnaryKernel.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEELEMENTWISEUNARYKERNEL_H
-#define ARM_COMPUTE_NEELEMENTWISEUNARYKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for an element-wise unary operation kernel
- *
- * Element-wise operation is computed by:
- * @f[ output(x) = OP(input(x))@f]
- *
- */
-class NEElementwiseUnaryKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEElementwiseUnaryKernel";
- }
- /** Default constructor */
- NEElementwiseUnaryKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEElementwiseUnaryKernel(const NEElementwiseUnaryKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEElementwiseUnaryKernel &operator=(const NEElementwiseUnaryKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEElementwiseUnaryKernel(NEElementwiseUnaryKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEElementwiseUnaryKernel &operator=(NEElementwiseUnaryKernel &&) = default;
- /** Default destructor */
- ~NEElementwiseUnaryKernel() = default;
-
- /** Static function to check if given info will lead to a valid configuration of @ref NEElementwiseUnaryKernel
- *
- * @param[in] op Arithmetic operation to be executed.
- * @param[in] input First tensor input. Data types supported: F16/F32.
- * @param[in] output Output tensor. Data types supported: Same as @p input.
- */
- void configure(ElementWiseUnary op, const ITensor *input, ITensor *output);
-
- /** Static function to check if given info will lead to a valid configuration of @ref NEElementwiseUnaryKernel
- *
- * @param[in] op Arithmetic operation to be executed.
- * @param[in] input First tensor input info. Data types supported: F16/F32.
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- *
- * @return a Status
- */
- static Status validate(ElementWiseUnary op, const ITensorInfo *input, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Common signature for all the specialised arithmetic functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using ElementwiseUnaryPtr = void (NEElementwiseUnaryKernel::*)(const Window &window);
-
- /** Template function to run elementwise unary operation
- *
- * @tparam ScalarType Scalar datatype
- *
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
- */
- template <typename ScalarType>
- void elementwise_op(const Window &window);
-
- ElementwiseUnaryPtr _func;
- const ITensor *_input;
- ITensor *_output;
- ElementWiseUnary _op;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEELEMENTWISEUNARYKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEErodeKernel.h b/arm_compute/core/NEON/kernels/NEErodeKernel.h
deleted file mode 100644
index e3fcc2847e..0000000000
--- a/arm_compute/core/NEON/kernels/NEErodeKernel.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEERODEKERNEL_H
-#define ARM_COMPUTE_NEERODEKERNEL_H
-
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to perform boolean image erosion */
-class NEErodeKernel : public INESimpleKernel
-{
-public:
- const char *name() const override
- {
- return "NEErodeKernel";
- }
- /** Set the source, destination and border mode of the kernel
- *
- * @param[in] input Source tensor. Data type supported: U8
- * @param[out] output Destination tensor. Data type supported: U8
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ITensor *input, ITensor *output, bool border_undefined);
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEERODEKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEFFTDigitReverseKernel.h b/arm_compute/core/NEON/kernels/NEFFTDigitReverseKernel.h
deleted file mode 100644
index ed17e3b8d5..0000000000
--- a/arm_compute/core/NEON/kernels/NEFFTDigitReverseKernel.h
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEFFTDIGITREVERSEKERNEL_H
-#define ARM_COMPUTE_NEFFTDIGITREVERSEKERNEL_H
-
-#include "arm_compute/core/KernelDescriptors.h"
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the digit reverse operation kernel. */
-class NEFFTDigitReverseKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEFFTDigitReverseKernel";
- }
- /** Constructor */
- NEFFTDigitReverseKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEFFTDigitReverseKernel(const NEFFTDigitReverseKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEFFTDigitReverseKernel &operator=(const NEFFTDigitReverseKernel &) = delete;
- /** Default Move Constructor. */
- NEFFTDigitReverseKernel(NEFFTDigitReverseKernel &&) = default;
- /** Default move assignment operator */
- NEFFTDigitReverseKernel &operator=(NEFFTDigitReverseKernel &&) = default;
- /** Default destructor */
- ~NEFFTDigitReverseKernel() = default;
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. Data types supported: F32. Number of channels supported: 1 (real tensor) or 2 (complex tensor).
- * @param[out] output Destination tensor. Data type supported: same as @p input. Number of channels supported: 2 (complex tensor).
- * @param[in] idx Digit reverse index tensor. Data type supported: U32
- * @param[in] config Kernel configuration.
- */
- void configure(const ITensor *input, ITensor *output, const ITensor *idx, const FFTDigitReverseKernelInfo &config);
-
- /** Static function to check if given info will lead to a valid configuration of @ref NEFFTDigitReverseKernel
- *
- * @param[in] input Source tensor info. Data types supported: F32. Number of channels supported: 1 (real tensor) or 2 (complex tensor).
- * @param[in] output Destination tensor info. Data type supported: same as @p input. Number of channels supported: 2 (complex tensor).
- * @param[in] idx Digit reverse index tensor info. Data type supported: U32
- * @param[in] config Kernel configuration
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *idx, const FFTDigitReverseKernelInfo &config);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- using NEFFTDigitReverseKernelFunctionPtr = void (NEFFTDigitReverseKernel::*)(const Window &window);
-
- template <bool is_input_complex, bool is_conj>
- void digit_reverse_kernel_axis_0(const Window &window);
-
- template <bool is_input_complex, bool is_conj>
- void digit_reverse_kernel_axis_1(const Window &window);
-
- NEFFTDigitReverseKernelFunctionPtr _func;
- const ITensor *_input;
- ITensor *_output;
- const ITensor *_idx;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEFFTDIGITREVERSEKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEFFTRadixStageKernel.h b/arm_compute/core/NEON/kernels/NEFFTRadixStageKernel.h
deleted file mode 100644
index 6e16fca0fb..0000000000
--- a/arm_compute/core/NEON/kernels/NEFFTRadixStageKernel.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEFFTRADIXSTAGEKERNEL_H
-#define ARM_COMPUTE_NEFFTRADIXSTAGEKERNEL_H
-
-#include "arm_compute/core/KernelDescriptors.h"
-#include "arm_compute/core/NEON/INEKernel.h"
-
-#include <arm_neon.h>
-#include <set>
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the FFT kernel. */
-class NEFFTRadixStageKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEFFTRadixStageKernel";
- }
- /** Constructor */
- NEFFTRadixStageKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEFFTRadixStageKernel(const NEFFTRadixStageKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEFFTRadixStageKernel &operator=(const NEFFTRadixStageKernel &) = delete;
- /** Default Move Constructor. */
- NEFFTRadixStageKernel(NEFFTRadixStageKernel &&) = default;
- /** Default move assignment operator */
- NEFFTRadixStageKernel &operator=(NEFFTRadixStageKernel &&) = default;
- /** Default destructor */
- ~NEFFTRadixStageKernel() = default;
- /** Set the input and output tensors.
- *
- * @note If the output tensor is nullptr, the FFT will be performed in-place
- *
- * @param[in,out] input Source tensor. Data types supported: F32. Number of channels supported: 2 (complex tensor).
- * @param[out] output Destination tensor. Data type supported: same as @p input. Number of channels supported: same as @p input.
- * @param[in] config FFT descriptor metadata.
- */
- void configure(ITensor *input, ITensor *output, const FFTRadixStageKernelInfo &config);
- /** Static function to check if given info will lead to a valid configuration of @ref NEFFTRadixStageKernel
- *
- * @param[in] input Source tensor info. Data types supported: F32. Number of channels supported: 2 (complex tensor).
- * @param[in] output Destination tensor info. Data type supported: same as @p input. Number of channels supported: same as @p input.
- * @param[in] config FFT descriptor metadata.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const FFTRadixStageKernelInfo &config);
- /** Returns the radix that are support by the FFT kernel
- *
- * @return A set of supported radix
- */
- static std::set<unsigned int> supported_radix();
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- ITensor *_input;
- ITensor *_output;
- bool _run_in_place;
- unsigned int _Nx;
- unsigned int _axis;
- unsigned int _radix;
-
- void set_radix_stage_axis0(const FFTRadixStageKernelInfo &config);
- void set_radix_stage_axis1(const FFTRadixStageKernelInfo &config);
-
- using FFTFunctionPointerAxis0 = std::function<void(float *, float *, unsigned int, unsigned int, const float32x2_t &, unsigned int)>;
- using FFTFunctionPointerAxis1 = std::function<void(float *, float *, unsigned int, unsigned int, const float32x2_t &, unsigned int, unsigned int)>;
-
- FFTFunctionPointerAxis0 _func_0;
- FFTFunctionPointerAxis1 _func_1;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEFFTRADIXSTAGEKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEFFTScaleKernel.h b/arm_compute/core/NEON/kernels/NEFFTScaleKernel.h
deleted file mode 100644
index 72963fa56d..0000000000
--- a/arm_compute/core/NEON/kernels/NEFFTScaleKernel.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEFFTSCALEKERNEL_H
-#define ARM_COMPUTE_NEFFTSCALEKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-#include "arm_compute/core/KernelDescriptors.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the inverse fft scale kernel. */
-class NEFFTScaleKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEFFTScaleKernel";
- }
- /** Constructor */
- NEFFTScaleKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEFFTScaleKernel(const NEFFTScaleKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEFFTScaleKernel &operator=(const NEFFTScaleKernel &) = delete;
- /** Default Move Constructor. */
- NEFFTScaleKernel(NEFFTScaleKernel &&) = default;
- /** Default move assignment operator */
- NEFFTScaleKernel &operator=(NEFFTScaleKernel &&) = default;
- /** Default destructor */
- ~NEFFTScaleKernel() = default;
- /** Set the input and output tensors.
- *
- * @param[in,out] input Source tensor. Data types supported: F32. Number of channels supported: 2 (complex tensor).
- * @param[out] output Destination tensor. Data type supported: same as @p input. Number of channels supported: 1 (real tensor) or 2 (complex tensor).
- * @param[in] config Kernel configuration
- */
- void configure(ITensor *input, ITensor *output, const FFTScaleKernelInfo &config);
- /** Static function to check if given info will lead to a valid configuration of @ref NEFFTScaleKernel
- *
- * @param[in] input Source tensor info. Data types supported: F32. Number of channels supported: 2 (complex tensor).
- * @param[in] output Destination tensor info. Data type supported: same as @p input. Number of channels supported: 1 (real tensor) or 2 (complex tensor).
- * @param[in] config Kernel configuration
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const FFTScaleKernelInfo &config);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- ITensor *_input;
- ITensor *_output;
- float _scale;
- bool _run_in_place;
- bool _is_conj;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEFFTSCALEKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEFastCornersKernel.h b/arm_compute/core/NEON/kernels/NEFastCornersKernel.h
deleted file mode 100644
index c0196c711a..0000000000
--- a/arm_compute/core/NEON/kernels/NEFastCornersKernel.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEFASTCORNERSKERNEL_H
-#define ARM_COMPUTE_NEFASTCORNERSKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-using IImage = ITensor;
-
-/** NEON kernel to perform fast corners */
-class NEFastCornersKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEFastCornersKernel";
- }
- /** Constructor */
- NEFastCornersKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEFastCornersKernel(const NEFastCornersKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEFastCornersKernel &operator=(const NEFastCornersKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEFastCornersKernel(NEFastCornersKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEFastCornersKernel &operator=(NEFastCornersKernel &&) = default;
- /** Initialise the kernel.
- *
- * @param[in] input Source image. Data type supported: U8.
- * @param[out] output Output image. Data type supported: U8.
- * @param[in] threshold Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3.
- * @param[in] non_max_suppression True if non-maxima suppresion is applied, false otherwise.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const IImage *input, IImage *output, uint8_t threshold, bool non_max_suppression, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- const IImage *_input; /**< source image */
- IImage *_output; /**< inermediate results */
- uint8_t _threshold; /**< threshold on difference between intensity */
- bool _non_max_suppression; /** true if non-maxima suppression is applied in the next stage */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEFASTCORNERSKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEFillArrayKernel.h b/arm_compute/core/NEON/kernels/NEFillArrayKernel.h
deleted file mode 100644
index e45caec34b..0000000000
--- a/arm_compute/core/NEON/kernels/NEFillArrayKernel.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEFILLARRAYKERNEL_H
-#define ARM_COMPUTE_NEFILLARRAYKERNEL_H
-
-#include "arm_compute/core/IArray.h"
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-using IImage = ITensor;
-
-/** This kernel adds all texels greater than or equal to the threshold value to the keypoint array. */
-class NEFillArrayKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEFillArrayKernel";
- }
- /** Default contructor */
- NEFillArrayKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEFillArrayKernel(const NEFillArrayKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEFillArrayKernel &operator=(const NEFillArrayKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEFillArrayKernel(NEFillArrayKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEFillArrayKernel &operator=(NEFillArrayKernel &&) = default;
- /** Default detructor */
- ~NEFillArrayKernel() = default;
-
- /** Initialise the kernel.
- *
- * @param[in] input Source image. Data type supported: U8.
- * @param[in] threshold Texels greater than the threshold will be added to the array.
- * @param[out] output Arrays of keypoints to store the results.
- */
- void configure(const IImage *input, uint8_t threshold, IKeyPointArray *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- bool is_parallelisable() const override;
-
-private:
- const IImage *_input;
- IKeyPointArray *_output;
- uint8_t _threshold;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEFILLARRAYKERNEL_H*/
diff --git a/arm_compute/core/NEON/kernels/NEFillBorderKernel.h b/arm_compute/core/NEON/kernels/NEFillBorderKernel.h
deleted file mode 100644
index 0c852e8232..0000000000
--- a/arm_compute/core/NEON/kernels/NEFillBorderKernel.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEFILLBORDERKERNEL_H
-#define ARM_COMPUTE_NEFILLBORDERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/PixelValue.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the kernel to fill borders */
-class NEFillBorderKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEFillBorderKernel";
- }
- /** Default Constructor */
- NEFillBorderKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEFillBorderKernel(const NEFillBorderKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEFillBorderKernel &operator=(const NEFillBorderKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEFillBorderKernel(NEFillBorderKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEFillBorderKernel &operator=(NEFillBorderKernel &&) = default;
- /** Default destructor */
- ~NEFillBorderKernel() = default;
-
- /** Initialise the function.
- *
- * @note This kernel fills the borders within the XY-planes.
- *
- * @param[in,out] tensor Tensor to process. Data types supported: All.
- * @param[in] border_size Size of the border to fill in elements.
- * @param[in] border_mode Border mode to use for the convolution.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- *
- */
- void configure(ITensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue());
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- void fill_replicate_single_channel(const Window &window);
- void fill_constant_value_single_channel(const Window &window);
-
- ITensor *_tensor;
- BorderSize _border_size;
- BorderMode _mode;
- PixelValue _constant_border_value;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEFILLBORDERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEFillInnerBorderKernel.h b/arm_compute/core/NEON/kernels/NEFillInnerBorderKernel.h
deleted file mode 100644
index 9c1059e606..0000000000
--- a/arm_compute/core/NEON/kernels/NEFillInnerBorderKernel.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEFILLINNERBORDERKERNEL_H
-#define ARM_COMPUTE_NEFILLINNERBORDERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/PixelValue.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to fill the interior borders */
-class NEFillInnerBorderKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEFillInnerBorderKernel";
- }
- /** Default constructor */
- NEFillInnerBorderKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEFillInnerBorderKernel(const NEFillInnerBorderKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEFillInnerBorderKernel &operator=(const NEFillInnerBorderKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEFillInnerBorderKernel(NEFillInnerBorderKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEFillInnerBorderKernel &operator=(NEFillInnerBorderKernel &&) = default;
- /** Default destructor */
- ~NEFillInnerBorderKernel() = default;
-
- /** Initialise the function.
- *
- * @note This kernel fills the borders within the XY-planes.
- *
- * @param[in,out] input Tensor to process. Data types supported: U8/S16/S32/F32.
- * @param[in] border_size Size of the border to fill in elements.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- *
- */
- void configure(ITensor *input, BorderSize border_size, const PixelValue &constant_border_value = PixelValue());
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- template <typename T>
- void fill_value_single_channel(const Window &window);
-
- ITensor *_tensor;
- BorderSize _border_size;
- PixelValue _constant_border_value;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEFILLINNERBORDERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h b/arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h
deleted file mode 100644
index ba2f99857f..0000000000
--- a/arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEFLATTENLAYERKERNEL_H
-#define ARM_COMPUTE_NEFLATTENLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the flatten layer kernel. */
-class NEFlattenLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEFlattenLayerKernel";
- }
- /** Default constructor */
- NEFlattenLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEFlattenLayerKernel(const NEFlattenLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEFlattenLayerKernel &operator=(const NEFlattenLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEFlattenLayerKernel(NEFlattenLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEFlattenLayerKernel &operator=(NEFlattenLayerKernel &&) = default;
- /** Default destructor */
- ~NEFlattenLayerKernel() = default;
-
- /** Set the input and output of the kernel.
- *
- * @param[in] input First input tensor to flatten with at least 3 dimensions.
- * The dimensions above the third will be interpreted as batches. Data types supported: All
- * @param[out] output Output tensor with shape [w*h*d, input_batches] where:
- * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input
- */
- void configure(const ITensor *input, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEFlattenLayerKernel
- *
- * @param[in] input First input tensor to flatten with at least 3 dimensions.
- * The dimensions above the third will be interpreted as batches. Data types supported: All
- * @param[out] output Output tensor with shape [w*h*d, input_batches] where:
- * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input;
- ITensor *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEFLATTENLAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEFloorKernel.h b/arm_compute/core/NEON/kernels/NEFloorKernel.h
deleted file mode 100644
index 4cdd9f2ac0..0000000000
--- a/arm_compute/core/NEON/kernels/NEFloorKernel.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEFLOORKERNEL_H
-#define ARM_COMPUTE_NEFLOORKERNEL_H
-
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to perform a floor operation */
-class NEFloorKernel : public INESimpleKernel
-{
-public:
- const char *name() const override
- {
- return "NEFloorKernel";
- }
- /** Set the source, destination of the kernel
- *
- * @param[in] input Source tensor. Data type supported: F16/F32.
- * @param[out] output Destination tensor. Same as @p input
- */
- void configure(const ITensor *input, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEFloorKernel
- *
- * @param[in] input Source tensor info. Data type supported: F16/F32.
- * @param[in] output Destination tensor info. Same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEFLOORKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEFuseBatchNormalizationKernel.h b/arm_compute/core/NEON/kernels/NEFuseBatchNormalizationKernel.h
deleted file mode 100644
index f598530d1e..0000000000
--- a/arm_compute/core/NEON/kernels/NEFuseBatchNormalizationKernel.h
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEFUSEBATCHNORMALIZATIONKERNEL_H
-#define ARM_COMPUTE_NEFUSEBATCHNORMALIZATIONKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** OpenNE kernel to fuse the batch normalization node to a preceding convolution node */
-class NEFuseBatchNormalizationKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEFuseBatchNormalizationKernel";
- }
- /** Default constructor */
- NEFuseBatchNormalizationKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEFuseBatchNormalizationKernel(const NEFuseBatchNormalizationKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEFuseBatchNormalizationKernel &operator=(const NEFuseBatchNormalizationKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEFuseBatchNormalizationKernel(NEFuseBatchNormalizationKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEFuseBatchNormalizationKernel &operator=(NEFuseBatchNormalizationKernel &&) = default;
- /** Default destructor */
- ~NEFuseBatchNormalizationKernel() = default;
- /** Set the source, destination of the kernel
- *
- * @param[in] input_weights Input weights tensor for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC
- * @param[in] bn_mean Batch normalization layer mean tensor. Same as @p input_weights
- * @param[in] bn_var Batch normalization layer variance tensor. Same as @p input_weights
- * @param[out] fused_weights (Optional) Output fused weights tensor. It can be a nullptr in case of in-place computation. Same as @p input_weights
- * @param[out] fused_bias (Optional) Output fused bias tensor. It can be a nullptr in case of in-place computation and input_bias != nullptr. Same as @p input_weights
- * @param[in] input_bias (Optional) Input bias tensor for convolution or depthwise convolution layer. It can be a nullptr in case the bias tensor is not required. Same as @p input_weights
- * @param[in] bn_beta (Optional) Batch normalization layer beta tensor. It can be a nullptr in case the beta tensor is not required. Same as @p input_weights
- * @note if nullptr, bn_beta is set to 0.0
- * @param[in] bn_gamma (Optional) Batch normalization layer gamma tensor. It can be a nullptr in case the gamma tensor is not required. Same as @p input_weights
- * @note if nullptr, bn_gamma is set to 1.0
- * @param[in] epsilon (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f.
- * @param[in] fbn_type (Optional) Fused batch normalization type. Defaults to CONVOLUTION.
- */
- void configure(const ITensor *input_weights, const ITensor *bn_mean, const ITensor *bn_var, ITensor *fused_weights, ITensor *fused_bias,
- const ITensor *input_bias = nullptr, const ITensor *bn_beta = nullptr, const ITensor *bn_gamma = nullptr,
- float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION);
- /** Static function to check if given info will lead to a valid configuration of @ref NEFuseBatchNormalizationKernel
- *
- * @param[in] input_weights Input weights tensor info for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC
- * @param[in] bn_mean Batch normalization layer mean tensor info. Same as @p input_weights
- * @param[in] bn_var Batch normalization layer variance tensor info. Same as @p input_weights
- * @param[in] fused_weights (Optional) Output fused weights tensor info. It can be a nullptr in case of in-place computation. Same as @p input_weights
- * @param[in] fused_bias (Optional) Output fused bias tensor info. It can be a nullptr in case of in-place computation and input_bias != nullptr. Same as @p input_weights
- * @param[in] input_bias (Optional) Input bias tensor info for convolution or depthwise convolution layer. It can be a nullptr in case the bias tensor is not required. Same as @p input_weights
- * @param[in] bn_beta (Optional) Batch normalization layer beta tensor info. It can be a nullptr in case the beta tensor is not required. Same as @p input_weights
- * @note if nullptr, bn_beta is set to 0.0
- * @param[in] bn_gamma (Optional) Batch normalization layer gamma tensor info. It can be a nullptr in case the gamma tensor is not required. Same as @p input_weights
- * @note if nullptr, bn_gamma is set to 1.0
- * @param[in] epsilon (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f.
- * @param[in] fbn_type (Optional) Fused batch normalization type. Defaults to CONVOLUTION.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input_weights, const ITensorInfo *bn_mean, const ITensorInfo *bn_var,
- const ITensorInfo *fused_weights, const ITensorInfo *fused_bias,
- const ITensorInfo *input_bias = nullptr, const ITensorInfo *bn_beta = nullptr, const ITensorInfo *bn_gamma = nullptr,
- float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input_weights;
- const ITensor *_input_bias;
- const ITensor *_bn_mean;
- const ITensor *_bn_var;
- const ITensor *_bn_gamma;
- const ITensor *_bn_beta;
- ITensor *_fused_weights;
- ITensor *_fused_bias;
- float _epsilon;
- bool _run_in_place_weights;
- bool _run_in_place_bias;
-
- using FuseBatchNormFunction = void(const ITensor *input_weights, const ITensor *input_bias, ITensor *fused_weights, ITensor *fused_bias,
- const ITensor *bn_mean, const ITensor *bn_var, const ITensor *bn_beta, const ITensor *bn_gamma, float epsilon, const Window &window);
-
- FuseBatchNormFunction *_func;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEFUSEBATCHNORMALIZATIONKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEGEMMAssemblyBaseKernel.h b/arm_compute/core/NEON/kernels/NEGEMMAssemblyBaseKernel.h
deleted file mode 100644
index 6aa8e250a4..0000000000
--- a/arm_compute/core/NEON/kernels/NEGEMMAssemblyBaseKernel.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGEMMASSEMBLYBASE_H
-#define ARM_COMPUTE_NEGEMMASSEMBLYBASE_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Base class for GEMM NEON kernels implemented in Assembly. */
-class NEGEMMAssemblyBaseKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEGEMMAssemblyBaseKernel";
- }
- /** Constructor */
- NEGEMMAssemblyBaseKernel()
- : _input0(nullptr), _input1(nullptr), _output(nullptr), _workspace(nullptr), _alpha(1.f), _beta(0.f), _is_transposed_0(false), _is_transposed_1(false)
- {
- }
-
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEGEMMAssemblyBaseKernel(const NEGEMMAssemblyBaseKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEGEMMAssemblyBaseKernel &operator=(const NEGEMMAssemblyBaseKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEGEMMAssemblyBaseKernel(NEGEMMAssemblyBaseKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEGEMMAssemblyBaseKernel &operator=(NEGEMMAssemblyBaseKernel &&) = default;
-
- virtual ~NEGEMMAssemblyBaseKernel() = default;
-
- /** Initialise the kernel's input and output.
- *
- * The computed function is C = a * AxB + b * C.
- *
- * @param[in] input0 Input tensor containing the Matrix A. Data types supported: F32
- * @param[in] input1 Input tensor containing the Matrix B. Data types supported: same as @p input0
- * @param[in,out] output Output tensor to store the result of matrix multiplication. If @p beta is not zero the values are multiplied by @p beta before the result is accumulated. Otherwise the values are overwritten by the result. Data types supported: same as @p input0.
- * @param[out] workspace Space for intermediate results.
- * @param[in] alpha Weight of the matrix product
- * @param[in] beta Weight of the accumulation.
- * @param[in] is_transposed_0 (Optional)True if @p input0 is transposed else false. (Defaults to false)
- * @param[in] is_transposed_1 (Optional)True if @p input1 is transposed else false. (Defaults to false)
- */
- void configure(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha = 1.f, float beta = 0.f, bool is_transposed_0 = false, bool is_transposed_1 = false)
- {
- internal_configure(input0, input1, output, workspace, alpha, beta, is_transposed_0, is_transposed_1);
- }
-
-protected:
- virtual void internal_configure(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool _is_transposed_0, bool _is_transposed_1) = 0;
-
- const ITensor *_input0;
- const ITensor *_input1;
- ITensor *_output;
- ITensor *_workspace;
- float _alpha;
- float _beta;
- bool _is_transposed_0;
- bool _is_transposed_1;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEGEMMASSEMBLYBASE_H*/
diff --git a/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h b/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h
deleted file mode 100644
index b6e6beab53..0000000000
--- a/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGEMMINTERLEAVE4x4KERNEL_H
-#define ARM_COMPUTE_NEGEMMINTERLEAVE4x4KERNEL_H
-
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to interleave the elements of a matrix
- *
- * This function puts the values in a 4x4 block of Matrix A on the same row (Interleaved values)
- *
- * @f[
- * \left( \begin{array}{cccc}
- * a00 & a01 & a02 & a03 \\
- * a10 & a11 & a12 & a13 \\
- * a20 & a21 & a22 & a23 \\
- * a30 & a31 & a32 & a33 \\
- * \end{array} \right)
- * \rightarrow
- * \left( \begin{array}{ccccccccccccccccc}
- * a00 & a10 & a20 & a30 & a01 & a11 & a21 & a31 & a02 & a12 & a22 & a32 & a03 & a13 & a23 & a33 \\
- * \end{array} \right)
- * @f]
- *
- * After this operation, the output matrix will have the following shape: [ height * 4, ceil(width / 4.0f) ]
- */
-class NEGEMMInterleave4x4Kernel : public INESimpleKernel
-{
-public:
- const char *name() const override
- {
- return "NEGEMMInterleave4x4Kernel";
- }
- /* Constructor */
- NEGEMMInterleave4x4Kernel();
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor. Data types supported: All
- * @param[out] output Output tensor which stores the interleaved matrix. Data type supported: same as @p input.
- */
- void configure(const ITensor *input, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMInterleave4x4Kernel
- *
- * @param[in] input Input tensor info. Data types supported: All
- * @param[in] output Output tensor info which stores the interleaved matrix. Data type supported: same as @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Common signature for all the transpose functions
- *
- * @param[in] input An input tensor. Data types supported: All
- * @param[out] output The output tensor. Data type supported: same as @p input
- * @param[in] window Region on which to execute the kernel.
- */
- using GEMMInterleaveFunction = void(const ITensor *input, ITensor *output, const Window &window);
-
- GEMMInterleaveFunction *_func; /**< GEMM interleave function to use for the particular tensor types passed to configure() */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEGEMMINTERLEAVE4x4KERNEL_H*/
diff --git a/arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h b/arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h
deleted file mode 100644
index 8f47c5089d..0000000000
--- a/arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYKERNEL_H
-#define ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to multiply matrices
- *
- * @note @ref NEGEMMLowpMatrixMultiplyKernel low precision matrix product kernel
- * This kernel performs the following computation:
- *
- * -# Convert a values from int8 to int32
- * -# Convert b values from int8 to int32
- * -# Compute the int32 matrix product of the resulting a * b and store the result as int32
- *
- */
-class NEGEMMLowpMatrixMultiplyKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEGEMMLowpMatrixMultiplyKernel";
- }
- /** Constructor */
- NEGEMMLowpMatrixMultiplyKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- NEGEMMLowpMatrixMultiplyKernel(const NEGEMMLowpMatrixMultiplyKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- NEGEMMLowpMatrixMultiplyKernel &operator=(const NEGEMMLowpMatrixMultiplyKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEGEMMLowpMatrixMultiplyKernel(NEGEMMLowpMatrixMultiplyKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEGEMMLowpMatrixMultiplyKernel &operator=(NEGEMMLowpMatrixMultiplyKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * The input matrices @p input0 and @p input1 must be the output of the kernels: @ref NEGEMMInterleave4x4Kernel and @ref NEGEMMTranspose1xWKernel. These two
- * kernels change the layout of the original matrices to be more cache-friendly.
- *
- * @param[in] input0 Input tensor containing the interleaved Matrix A. Data type supported: U8/QASYMM8/S8/QASYMM8_SIGNED
- * @param[in] input1 Input tensor containing the transposed1xW Matrix B. Data type supported: U8/QASYMM8/S8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL
- * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: S32
- */
- void configure(const ITensor *input0, const ITensor *input1, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpMatrixMultiplyKernel
- *
- * @param[in] input0 Input tensor info containing the interleaved Matrix A. Data type supported: U8/QASYMM8/S8/QASYMM8_SIGNED
- * @param[in] input1 Input tensor info containing the transposed Matrix B. Data type supported: U8/QASYMM8/S8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL
- * @param[in] output Output tensor info to store the result of matrix multiplication. Data type supported: S32
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input0;
- const ITensor *_input1;
- ITensor *_output;
- bool _slide_matrix_b;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYKERNEL_H*/
diff --git a/arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h b/arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h
deleted file mode 100644
index b069e4cfac..0000000000
--- a/arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGEMMLOWPOFFSETCONTRIBUTIONKERNEL_H
-#define ARM_COMPUTE_NEGEMMLOWPOFFSETCONTRIBUTIONKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel used to add the offset contribution after @ref NEGEMMLowpMatrixMultiplyKernel. The computation is performed in-place
- *
- * This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel),
- * and adds to it the offset contribution of matrix A and matrix B in-place.
- *
- * The final result is:
- *
- * mm_result[i][k] = mm_result[i][k] +
- * (vector_sum_col[k] * a_offset) +
- * (vector_sum_row[i] * b_offset) +
- * (a_offset * b_offset * k)
- *
- */
-class NEGEMMLowpOffsetContributionKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEGEMMLowpOffsetContributionKernel";
- }
- /** Constructor */
- NEGEMMLowpOffsetContributionKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- NEGEMMLowpOffsetContributionKernel(const NEGEMMLowpOffsetContributionKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- NEGEMMLowpOffsetContributionKernel &operator=(const NEGEMMLowpOffsetContributionKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEGEMMLowpOffsetContributionKernel(NEGEMMLowpOffsetContributionKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEGEMMLowpOffsetContributionKernel &operator=(NEGEMMLowpOffsetContributionKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in, out] mm_result Input tensor containing the result of @ref NEGEMMLowpMatrixMultiplyKernel. Data type supported: S32
- * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B.
- * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result
- * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A.
- * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result
- * @param[in] k Number of matrix A columns or Matrix B rows
- * @param[in] a_offset Offset to be added to each element of the matrix A.
- * @param[in] b_offset Offset to be added to each element of the matrix B.
- */
- void configure(ITensor *mm_result, const ITensor *vector_sum_col, const ITensor *vector_sum_row, int32_t k, int32_t a_offset, int32_t b_offset);
- /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpOffsetContributionKernel
- *
- * @param[in] mm_result Input tensor containing the result of @ref NEGEMMLowpMatrixMultiplyKernel. Data type supported: S32
- * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B.
- * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result
- * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A.
- * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result
- * @param[in] a_offset Offset to be added to each element of the matrix A.
- * @param[in] b_offset Offset to be added to each element of the matrix B.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *mm_result, const ITensorInfo *vector_sum_col, const ITensorInfo *vector_sum_row, int32_t a_offset, int32_t b_offset);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_vector_sum_col;
- const ITensor *_vector_sum_row;
- ITensor *_mm_result;
- int32_t _a_offset;
- int32_t _b_offset;
- int32_t _k_offset;
- bool _slide_vector_sum_col;
-};
-} // namespace arm_compute
-
-#endif /* ARM_COMPUTE_NEGEMMLOWPOFFSETCONTRIBUTIONKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h b/arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h
deleted file mode 100644
index 0dc64c9842..0000000000
--- a/arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGEMMLOWPOFFSETCONTRIBUTIONOUTPUTSTAGEKERNEL_H
-#define ARM_COMPUTE_NEGEMMLOWPOFFSETCONTRIBUTIONOUTPUTSTAGEKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel used to add the offset contribution and perform the output stage after @ref NEGEMMLowpMatrixMultiplyKernel.
- *
- * The computation is performed in-place
- *
- * This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel),
- * and adds to it the offset contribution of matrix A and matrix B in-place.
- *
- * The output stage can perform either QuantizeDownInt32ToUint8Scale or QuantizeDownInt32ToUint8ScaleByFixedPoint for Uint8.
- * The output stage can perform either QuantizeDownInt32ToInt8Scale or QuantizeDownInt32ToInt8ScaleByFixedPoint for Int8.
- *
- * For QuantizeDownInt32ToUint8Scale/QuantizeDownInt32ToInt8Scale the final result is:
- *
- * ((mm_result'[i][k] + result_offset) * result_mult_int) >> result_shift
- *
- * For QuantizeDownInt32ToUint8ScaleByFixedPoint/QuantizeDownInt32ToInt8ScaleByFixedPoint the final result is:
- *
- * (FixedPointMul(mm_result'[i][k], result_fixedpoint_multiplier) >> result_shift) + result_offset_after_shift
- *
- * where FixedPointMul(x, y) is the nearest integer to the following
- * mathematical expression, evaluated without overflow or intermediate rounding:
- *
- * (x * y) / 2^31
- *
- * and mm_result'[i][k] = mm_result[i][k] +
- * (vector_sum_col[k] * a_offset) +
- * (vector_sum_row[i] * b_offset) +
- * (a_offset * b_offset * k)
- */
-
-class NEGEMMLowpOffsetContributionOutputStageKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEGEMMLowpOffsetContributionOutputStageKernel";
- }
- /** Constructor */
- NEGEMMLowpOffsetContributionOutputStageKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- NEGEMMLowpOffsetContributionOutputStageKernel(const NEGEMMLowpOffsetContributionOutputStageKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- NEGEMMLowpOffsetContributionOutputStageKernel &operator=(const NEGEMMLowpOffsetContributionOutputStageKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEGEMMLowpOffsetContributionOutputStageKernel(NEGEMMLowpOffsetContributionOutputStageKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEGEMMLowpOffsetContributionOutputStageKernel &operator=(NEGEMMLowpOffsetContributionOutputStageKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in] mm_result Input tensor containing the result of @ref NEGEMMLowpMatrixMultiplyKernel. Data type supported: S32
- * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B.
- * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result
- * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A.
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p mm_result.
- * @param[out] output Output tensor containing the final quantized result. Data type supported: QASYMM8/QASYMM8_SIGNED
- * @param[in] k Number of matrix A columns or Matrix B rows
- * @param[in] a_offset Offset to be added to each element of the matrix A.
- * @param[in] b_offset Offset to be added to each element of the matrix B.
- * @param[in] output_stage GEMMLowp output stage info, providing the type of quantization and the necessary parameters.
- */
- void configure(const ITensor *mm_result, const ITensor *vector_sum_col, const ITensor *vector_sum_row, const ITensor *bias, ITensor *output, int32_t k, int32_t a_offset, int32_t b_offset,
- GEMMLowpOutputStageInfo output_stage);
- /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpOffsetContributionOutputStageKernel
- *
- * @param[in] mm_result Input tensor info containing the result of @ref NEGEMMLowpMatrixMultiplyKernel. Data type supported: S32
- * @param[in] vector_sum_col Tensor info for the input row-vector of sums of all the entries in each column of matrix B.
- * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result
- * @param[in] vector_sum_row Tensor info for the input row-vector of sums of all the entries in each row of matrix A.
- * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result
- * @param[in] bias Biases tensor info. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p mm_result.
- * @param[in] output Output tensor info containing the final quantized result. Data type supported: QASYMM8/QASYMM8_SIGNED
- * @param[in] a_offset Offset to be added to each element of the matrix A.
- * @param[in] b_offset Offset to be added to each element of the matrix B.
- * @param[in] output_stage GEMMLowp output stage info, providing the type of quantization and the necessary parameters.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *mm_result, const ITensorInfo *vector_sum_col, const ITensorInfo *vector_sum_row, const ITensorInfo *bias, const ITensorInfo *output, int32_t a_offset,
- int32_t b_offset,
- GEMMLowpOutputStageInfo output_stage);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
- using NEGEMMLowpOffsetContributionOutputStageFunction = std::function<void(const Window, const ITensor *, const ITensor *, const ITensor *, const ITensor *,
- ITensor *, int32_t, int32_t, int32_t, bool, GEMMLowpOutputStageInfo)>;
-
-private:
- /** Function to use for the particular tensors passed to configure() */
- NEGEMMLowpOffsetContributionOutputStageFunction _function;
- const ITensor *_vector_sum_col;
- const ITensor *_vector_sum_row;
- const ITensor *_bias;
- const ITensor *_mm_result;
- ITensor *_output;
- int32_t _a_offset;
- int32_t _b_offset;
- int32_t _k_offset;
- bool _slide_vector_sum_col;
- GEMMLowpOutputStageInfo _output_stage;
-};
-} // namespace arm_compute
-
-#endif /* ARM_COMPUTE_NEGEMMLOWPOFFSETCONTRIBUTIONOUTPUTSTAGEKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h b/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h
deleted file mode 100644
index b4a1419c9b..0000000000
--- a/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Copyright (c) 2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32SCALEKERNEL_H
-#define ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32SCALEKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8/QASYMM8_SIGNED
- *
- * This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel), and processes it to obtain the final QASYMM8/QASYMM8_SIGNED value.
- * The following computations will be performed by the kernel:
- *
- * -# Add offset terms to final result
- * -# Multiply each entry of result by result_mult_int
- * -# Add bias to final result if bias tensor is not a nullptr
- * -# Shift the int32 accumulator by result_shift
- * -# Clamp the value between the specified min and max bounds
- * -# Clamp the resulting int32 values:
- * -# -to the [0..255] range and cast to QASYMM8.
- * -# -to the [-128..127] range and cast to QASYMM8_SIGNED.
- *
- */
-class NEGEMMLowpQuantizeDownInt32ScaleKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEGEMMLowpQuantizeDownInt32ScaleKernel";
- }
- /** Constructor */
- NEGEMMLowpQuantizeDownInt32ScaleKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- NEGEMMLowpQuantizeDownInt32ScaleKernel(const NEGEMMLowpQuantizeDownInt32ScaleKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- NEGEMMLowpQuantizeDownInt32ScaleKernel &operator=(const NEGEMMLowpQuantizeDownInt32ScaleKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEGEMMLowpQuantizeDownInt32ScaleKernel(NEGEMMLowpQuantizeDownInt32ScaleKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEGEMMLowpQuantizeDownInt32ScaleKernel &operator=(NEGEMMLowpQuantizeDownInt32ScaleKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED
- * @param[out] output_stage GEMMLowp output stage metadata.
- */
- void configure(const ITensor *input, const ITensor *bias, ITensor *output, const GEMMLowpOutputStageInfo *output_stage);
- /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ScaleKernel
- *
- * @param[in] input Input tensor. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED
- * @param[out] output_stage GEMMLowp output stage metadata.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *output_stage);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Template function to run the NEGEMMLowpQuantizeDownInt32ScaleKernel
- *
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
- */
- template <typename T>
- void run(const Window &window);
-
- /** Common signature for all the specialised NEGEMMLowpQuantizeDownInt32ScaleKernel functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using QuantizeDownFunctionPtr = void (NEGEMMLowpQuantizeDownInt32ScaleKernel::*)(const Window &window);
-
- QuantizeDownFunctionPtr _func;
- const ITensor *_input;
- const ITensor *_bias;
- ITensor *_output;
- const GEMMLowpOutputStageInfo *_output_stage;
- bool _is_bounded_relu;
-};
-} // namespace arm_compute
-
-#endif /* ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32SCALEKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h b/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h
deleted file mode 100644
index 0806bd1df5..0000000000
--- a/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOINT16SCALEBYFIXEDPOINTKERNEL_H
-#define ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOINT16SCALEBYFIXEDPOINTKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel used to quantize down the int32 accumulator values of GEMMLowp to QSYMM16
- *
- * This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel), and processes it to obtain the final QSYMM16 value.
- * The following computations will be performed by the kernel:
- *
- * -# Compute fixed point multiplication between each entry of input by result_fixedpoint_multiplier
- * -# Add bias to final result if bias tensor is not a nullptr
- * -# Round to nearest division by a power-of-two using result_shift
- * -# Clamp the value between the specified min and max bounds
- * -# Clamp the resulting int32 values to the [-32768, 32767] range and cast to QSYMM16.
- *
- */
-class NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel";
- }
- /** Constructor */
- NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel(const NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &operator=(const NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel(NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &operator=(NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[out] output Output tensor. Data type supported: Data type supported: QSYMM16
- * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
- * @param[in] result_shift Integer value used to round to nearest division by a power-of-two the result after the fixed point multiplication
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to 0.
- * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16.
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to 0.
- */
- void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int min = 0, int max = 0);
- /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel
- *
- * @param[in] input Input tensor info. Data type supported: S32
- * @param[in] bias Biases tensor info. Only shared biases supported and it can be a nullptr if the biases addition is not required.
- * Biases are 1D tensor info with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[in] output Output tensor info. Data type supported: Data type supported: QSYMM16
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to 0.
- * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to 0.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Template function to run the NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel
- *
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
- */
- template <bool is_bounded_relu>
- void run(const Window &window);
-
- /** Common signature for all the specialised NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using QuantizeDownFunctionPtr = void (NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel::*)(const Window &window);
-
- QuantizeDownFunctionPtr _func;
- const ITensor *_input;
- const ITensor *_bias;
- ITensor *_output;
- int _result_fixedpoint_multiplier;
- int _result_shift;
- int _min;
- int _max;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOINT16SCALEBYFIXEDPOINTKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h b/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h
deleted file mode 100644
index 2b3657c728..0000000000
--- a/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOINT8SCALEBYFIXEDPOINTKERNEL_H
-#define ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOINT8SCALEBYFIXEDPOINTKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8_SIGNED
- *
- * This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel), and processes it to obtain the final QASYMM8_SIGNED value.
- * The following computations will be performed by the kernel:
- *
- * -# Compute fixed point multiplication between each entry of input by result_fixedpoint_multiplier
- * -# Add bias to final result if bias tensor is not a nullptr
- * -# Round to nearest division by a power-of-two using result_shift
- * -# Add offset to each result
- * -# Clamp the value between the specified min and max bounds
- * -# Clamp the resulting int32 values to the [-128..127] range and cast to QASYMM8_SIGNED.
- *
- */
-class NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel";
- }
- /** Constructor */
- NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel(const NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &operator=(const NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel(NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &operator=(NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8_SIGNED
- * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
- * @param[in] result_shift Integer value used to round to nearest division by a power-of-two the result after the fixed point multiplication
- * @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8_SIGNED
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED
- * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions
- */
- void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, int min = 0, int max = 0);
- /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel
- *
- * @param[in] input Input tensor. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8_SIGNED
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED
- * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Template function to run the NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel
- *
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
- */
- template <bool is_bounded_relu>
- void run(const Window &window);
-
- /** Common signature for all the specialised NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using QuantizeDownFunctionPtr = void (NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel::*)(const Window &window);
-
- QuantizeDownFunctionPtr _func;
- const ITensor *_input;
- const ITensor *_bias;
- ITensor *_output;
- int _result_fixedpoint_multiplier;
- int _result_shift;
- int _result_offset_after_shift;
- int _min;
- int _max;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOINT8SCALEBYFIXEDPOINTKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h b/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h
deleted file mode 100644
index 2f099a3ebb..0000000000
--- a/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOUINT8SCALEBYFIXEDPOINTKERNEL_H
-#define ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOUINT8SCALEBYFIXEDPOINTKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8
- *
- * This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel), and processes it to obtain the final QASYMM8 value.
- * The following computations will be performed by the kernel:
- *
- * -# Compute fixed point multiplication between each entry of input by result_fixedpoint_multiplier
- * -# Add bias to final result if bias tensor is not a nullptr
- * -# Round to nearest division by a power-of-two using result_shift
- * -# Add offset to each result
- * -# Clamp the value between the specified min and max bounds
- * -# Clamp the resulting int32 values to the [0..255] range and cast to QASYMM8.
- *
- */
-class NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel";
- }
- /** Constructor */
- NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel(const NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &operator=(const NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel(NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &operator=(NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8
- * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
- * @param[in] result_shift Integer value used to round to nearest division by a power-of-two the result after the fixed point multiplication
- * @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8
- * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions
- */
- void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, int min = 0, int max = 0);
- /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel
- *
- * @param[in] input Input tensor. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8
- * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Template function to run the NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel
- *
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
- */
- template <bool is_bounded_relu>
- void run(const Window &window);
-
- /** Common signature for all the specialised NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using QuantizeDownFunctionPtr = void (NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::*)(const Window &window);
-
- QuantizeDownFunctionPtr _func;
- const ITensor *_input;
- const ITensor *_bias;
- ITensor *_output;
- int _result_fixedpoint_multiplier;
- int _result_shift;
- int _result_offset_after_shift;
- int _min;
- int _max;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOUINT8SCALEBYFIXEDPOINTKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h b/arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h
deleted file mode 100644
index 1e472f5252..0000000000
--- a/arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGEMMLOWREDUCTIONKERNEL_H
-#define ARM_COMPUTE_NEGEMMLOWREDUCTIONKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-struct GEMMLowpReductionKernelInfo;
-
-/** Common interface for all NEON reduction kernels */
-class INEGEMMLowpReductionKernel : public INEKernel
-{
-public:
- /** Constructor */
- INEGEMMLowpReductionKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- INEGEMMLowpReductionKernel(const INEGEMMLowpReductionKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- INEGEMMLowpReductionKernel &operator=(const INEGEMMLowpReductionKernel &) = delete;
- /** Allow instances of this class to be moved */
- INEGEMMLowpReductionKernel(INEGEMMLowpReductionKernel &&) = default;
- /** Allow instances of this class to be moved */
- INEGEMMLowpReductionKernel &operator=(INEGEMMLowpReductionKernel &&) = default;
-
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED
- * @param[out] output Output row-vector of sums of all the entries in each row/col of input tensor. Data type supported: S32
- * @param[in] info Kernel metadata:
- * - k Number of matrix columns/rows depending on the type of reduction.
- * - is_reshaped True if the matrix has been reshaped.
- * - scalar Scalar value to multiply each reduced column/row by.
- * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value.
- */
- virtual void configure(const ITensor *input, ITensor *output, const GEMMLowpReductionKernelInfo &info) = 0;
-
-protected:
- const ITensor *_input;
- ITensor *_output;
- int32_t _k;
- bool _is_reshaped;
- int32_t _scalar;
- bool _mul_by_scalar;
-};
-
-/** NEON kernel used to compute the row-vectors of sums of all the entries in each row of Matrix A.
- *
- * @note This stage is needed to handle the offset of matrix product
- * https://github.com/google/gemmlowp/blob/master/doc/low-precision.md
- */
-class NEGEMMLowpMatrixAReductionKernel : public INEGEMMLowpReductionKernel
-{
-public:
- const char *name() const override
- {
- return "NEGEMMLowpMatrixAReductionKernel";
- }
- /** Initialise the kernel's input and output.
- *
- * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED
- * @param[out] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32
- * @param[in] info Kernel metadata:
- * - k (num_mtx_a_cols) Number of matrix A columns
- * - is_reshaped (is_interleaved4x4) True if the matrix A has been interleaved4x4
- * - scalar Scalar value to multiply each reduced row by.
- * - mul_byscalar True if each reduced column must be multiplied by a scalar value.
- */
- void configure(const ITensor *mtx_a, ITensor *vector_sum_row, const GEMMLowpReductionKernelInfo &info) override;
- /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpMatrixAReductionKernel
- *
- * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED
- * @param[in] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32
- * @param[in] info Kernel metadata:
- * - k (num_mtx_a_cols) Number of matrix A columns
- * - is_reshaped (is_interleaved4x4) True if the matrix A has been interleaved4x4
- * - scalar Scalar value to multiply each reduced row by.
- * - mul_byscalar True if each reduced column must be multiplied by a scalar value.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *mtx_a, const ITensorInfo *vector_sum_row, const GEMMLowpReductionKernelInfo &info);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Execution of the reduction kernel specialized on the input type
- *
- * @param[in] window Execution window
- */
- template <typename T>
- void run_internal(const Window &window);
-};
-
-/** NEON kernel used to compute the row-vectors of sums of all the entries in each column of Matrix B.
- *
- * @note This stage is needed to handle the offset of matrix product
- * https://github.com/google/gemmlowp/blob/master/doc/low-precision.md
- */
-class NEGEMMLowpMatrixBReductionKernel : public INEGEMMLowpReductionKernel
-{
-public:
- const char *name() const override
- {
- return "NEGEMMLowpMatrixBReductionKernel";
- }
- /** Initialise the kernel's input and output.
- *
- * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED
- * @param[out] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32
- * @param[in] info Kernel metadata:
- * - k (num_mtx_b_rows) Number of matrix B rows.
- * - is_reshaped (is_transposed1xW) True if the input tensor is transposed 1xW.
- * - scalar Scalar value to multiply each reduced row by.
- * - mul_byscalar True if each reduced row must be multiplied by a scalar value.
- */
- void configure(const ITensor *mtx_b, ITensor *vector_sum_col, const GEMMLowpReductionKernelInfo &info) override;
- /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpMatrixBReductionKernel
- *
- * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED
- * @param[in] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32
- * @param[in] info Kernel metadata:
- * - k (num_mtx_b_rows) Number of matrix B rows.
- * - is_reshaped (is_transposed1xW) True if the input tensor is transposed 1xW.
- * - scalar Scalar value to multiply each reduced row by.
- * - mul_byscalar True if each reduced row must be multiplied by a scalar value.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *mtx_b, const ITensorInfo *vector_sum_col, const GEMMLowpReductionKernelInfo &info);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Execution of the reduction kernel specialized on the input type
- *
- * @param[in] window Execution window
- * @param[in] info Thread-related information
- */
- template <typename T>
- void run_internal(const Window &window, const ThreadInfo &info);
-};
-} // namespace arm_compute
-
-#endif /* ARM_COMPUTE_NEGEMMLOWREDUCTIONKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h b/arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h
deleted file mode 100644
index a3ba57e4ab..0000000000
--- a/arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGEMMMATRIXACCUMULATEBIASESKERNEL_H
-#define ARM_COMPUTE_NEGEMMMATRIXACCUMULATEBIASESKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-/** NEON kernel to add a bias to each row of the input tensor */
-class NEGEMMMatrixAccumulateBiasesKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEGEMMMatrixAccumulateBiasesKernel";
- }
- /** Default constructor */
- NEGEMMMatrixAccumulateBiasesKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEGEMMMatrixAccumulateBiasesKernel(const NEGEMMMatrixAccumulateBiasesKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEGEMMMatrixAccumulateBiasesKernel &operator=(const NEGEMMMatrixAccumulateBiasesKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEGEMMMatrixAccumulateBiasesKernel(NEGEMMMatrixAccumulateBiasesKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEGEMMMatrixAccumulateBiasesKernel &operator=(NEGEMMMatrixAccumulateBiasesKernel &&) = default;
- /** Default destructor */
- ~NEGEMMMatrixAccumulateBiasesKernel() = default;
- /** Set the accumulate buffer and the biases of the kernel.
- *
- * @param[in, out] accum The accumulate tensor to convert. Data type supported: F32
- * @param[in] biases The shared biases tensor to append. It must be 1D Tensor. Data type supported: Same as @p input
- */
- void configure(ITensor *accum, const ITensor *biases);
- /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMMatrixAccumulateBiasesKernel
- *
- * @param[in] accum The accumulate tensor to convert. Data type supported: F32
- * @param[in] biases The shared biases tensor to append. It must be 1D Tensor. Data type supported: Same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *accum, const ITensorInfo *biases);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- ITensor *_accum;
- const ITensor *_biases;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEGEMMMATRIXACCUMULATEBIASESKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h b/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h
deleted file mode 100644
index e528c59d8f..0000000000
--- a/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGEMMMATRIXADDITIONKERNEL_H
-#define ARM_COMPUTE_NEGEMMMATRIXADDITIONKERNEL_H
-
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to perform the in-place matrix addition between 2 matrices taking into account that the second matrix might be weighted by a scalar value beta:
- *
- * @note [ MTX_OUT = MTX_0 + beta * MTX_1 ] with MTX_0 and MTX_1 of the same size
- *
- * @note This stage is used to finalize the GEMM result and it is computed if and only if beta != 0.0. In case this kernel is used for finalizing GEMM result, we have:
- * - MTX_0 = A * B * alpha, where MTX_0 is the output of @ref NEGEMMMatrixMultiplyKernel
- * - MTX_1 = C
- */
-class NEGEMMMatrixAdditionKernel : public INESimpleKernel
-{
-public:
- const char *name() const override
- {
- return "NEGEMMMatrixAdditionKernel";
- }
- /** Constructor */
- NEGEMMMatrixAdditionKernel();
- /** Prevent instances of this class from being copied */
- NEGEMMMatrixAdditionKernel(const NEGEMMMatrixAdditionKernel &) = delete;
- /** Prevent instances of this class from being copied */
- NEGEMMMatrixAdditionKernel &operator=(const NEGEMMMatrixAdditionKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEGEMMMatrixAdditionKernel(NEGEMMMatrixAdditionKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEGEMMMatrixAdditionKernel &operator=(NEGEMMMatrixAdditionKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @note The input and output tensor must have the same dimensions
- *
- * @param[in] input Input tensor (Matrix C). Data types supported: F16/F32
- * @param[in, out] output Output tensor. If this kernel is used to finalize the GEMM result, output contains the result obtained by the kernel @ref NEGEMMMatrixMultiplyKernel. Data type supported: the same as @p input.
- * @param[in] beta Weight of matrix C
- */
- void configure(const ITensor *input, ITensor *output, float beta);
- /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMMatrixAdditionKernel.
- *
- * @note The input and output tensor must have the same dimensions
- *
- * @param[in] input Input tensor info (Matrix C). Data types supported: F16/F32
- * @param[in] output Output tensor info. If this kernel is used to finalize the GEMM result, output contains the result obtained by the kernel @ref NEGEMMMatrixMultiplyKernel. Data type supported: the same as @p input.
- * @param[in] beta Weight of matrix C
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, float beta);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Common signature for all the matrix addition functions
- *
- * @param[in] input An input tensor. Data types supported: F16/F32
- * @param[out] output The output tensor. Data type supported: same as @p input
- * @param[in] window Region on which to execute the kernel.
- * @param[in] beta Weight of matrix C
- */
- using MatrixAdditionFunction = void(const ITensor *input, ITensor *output, const Window &window, float beta);
- /** Matrix addition function to use for the particular tensor types passed to configure() */
- MatrixAdditionFunction *_func;
- float _beta;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEGEMMMATRIXADDITIONKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h b/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h
deleted file mode 100644
index 841e08d0ef..0000000000
--- a/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGEMMMATRIXMULTIPLYKERNEL_H
-#define ARM_COMPUTE_NEGEMMMATRIXMULTIPLYKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to multiply two input matrices "A" and "B". All elements of the output matrix/vector will be multiplied by alpha after the matrix multiplication
- *
- * @note If the output tensor is a matrix, the implementation assumes that the input tensors @p input0 and @p input1 are both matrices and reshaped respectively with @ref NEGEMMInterleave4x4Kernel" and @ref NEGEMMTranspose1xWKernel
- * @note If the output tensor is a vector and the data type is F32, the implementation assumes that the first input tensor @p input0 is a vector and the second input tensor @p input1 a matrix. The implementation also assumes that both tensors have not been reshaped
- *
- */
-class NEGEMMMatrixMultiplyKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEGEMMMatrixMultiplyKernel";
- }
- /** Constructor */
- NEGEMMMatrixMultiplyKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEGEMMMatrixMultiplyKernel(const NEGEMMMatrixMultiplyKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEGEMMMatrixMultiplyKernel &operator=(const NEGEMMMatrixMultiplyKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEGEMMMatrixMultiplyKernel(NEGEMMMatrixMultiplyKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEGEMMMatrixMultiplyKernel &operator=(NEGEMMMatrixMultiplyKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @note If the output tensor is a matrix, the input matrices @p input0 and @p input1 should be the output of the kernels: @ref NEGEMMInterleave4x4Kernel and @ref NEGEMMTranspose1xWKernel
- * These two kernels change the layout of the original matrices to be more cache-friendly.
- *
- * @param[in] input0 Input tensor containing the interleaved Matrix A or the vector A. Data types supported: F16/F32
- * @param[in] input1 Input tensor containing the transposed Matrix B if the first input tensor A is not a vector.
- * If the output tensor is a vector, input1 must contain the matrix B not reshaped. Data type supported: same as @p input0
- * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0.
- * @param[in] alpha Weight of the matrix product
- * @param[in] is_interleaved (Optional) True if input0 and input1 have been reshaped respectively using @ref NEGEMMInterleave4x4Kernel and @ref NEGEMMTranspose1xWKernel
- * @param[in] reshape_info (Optional) GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped
- */
- void configure(const ITensor *input0, const ITensor *input1, ITensor *output, float alpha, bool is_interleaved, const GEMMReshapeInfo &reshape_info = GEMMReshapeInfo());
- /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMMatrixMultiplyKernel
- *
- * @param[in] input0 Input tensor containing the interleaved Matrix A or the vector A. Data types supported: F16/F32
- * @param[in] input1 Input tensor containing the transposed Matrix B if the first input tensor A is not a vector.
- * If the output tensor is a vector, input1 must contain the matrix B not reshaped. Data type supported: same as @p input0
- * @param[in] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0.
- * @param[in] alpha Weight of the matrix product
- * @param[in] is_interleaved (Optional) True if input0 and input1 have been reshaped respectively using @ref NEGEMMInterleave4x4Kernel and @ref NEGEMMTranspose1xWKernel
- * @param[in] reshape_info (Optional) GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, float alpha, bool is_interleaved, const GEMMReshapeInfo &reshape_info);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input0;
- const ITensor *_input1;
- ITensor *_output;
- float _alpha;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEGEMMMATRIXMULTIPLYKERNEL_H*/
diff --git a/arm_compute/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.h b/arm_compute/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.h
deleted file mode 100644
index f5635dd58c..0000000000
--- a/arm_compute/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.h
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGEMMMATRIXVECTORMULTIPLYKERNEL_H_
-#define ARM_COMPUTE_NEGEMMMATRIXVECTORMULTIPLYKERNEL_H_
-
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the GEMM matrix vector multiply kernel. **/
-class NEGEMMMatrixVectorMultiplyKernel : public INESimpleKernel
-{
-public:
- const char *name() const override
- {
- return "NEGEMMMatrixVectorMultiplyKernel";
- }
- /** Default constructor */
- NEGEMMMatrixVectorMultiplyKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEGEMMMatrixVectorMultiplyKernel(const NEGEMMMatrixVectorMultiplyKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEGEMMMatrixVectorMultiplyKernel &operator=(const NEGEMMMatrixVectorMultiplyKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEGEMMMatrixVectorMultiplyKernel(NEGEMMMatrixVectorMultiplyKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEGEMMMatrixVectorMultiplyKernel &operator=(NEGEMMMatrixVectorMultiplyKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in] input0 First Input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
- * @param[in] input1 Second Input tensor. Data types supported: same as @p input.
- * @param[out] output Output tensor which stores the interleaved matrix. Data type supported: same as @p input, S32 for QASYMM8/QASYMM8_SIGNED input.
- */
- void configure(const ITensor *input0, const ITensor *input1, ITensor *output);
-
- /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMMatrixVectorMultiplyKernel
- *
- * @param[in] input0 First Input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
- * @param[in] input1 Second Input tensor. Data types supported: same as @p input.
- * @param[in] output Output tensor which stores the interleaved matrix. Data type supported: same as @p input, S32 for QASYMM8/QASYMM8_SIGNED input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- /** Template function to run the matrix vector multiplication
- *
- * @tparam I0 Input 0 type
- * @tparam I1 Input 1 type
- * @tparam O Output type
- *
- * @param[in] window_in Input region. (Must be a valid region of the window returned by window()).
- * @param[in] window_w Weights region. (Must be a valid region of the window returned by window()).
- * @param[in] window_out Output region.(Must be a valid region of the window returned by window()).
- */
- template <typename I0, typename I1, typename O>
- void matrix_vector_multiply(const Window &window_in, const Window &window_w, const Window &window_out);
- /** Common signature for all the specialised matrix vector multiplication functions */
- using GEMMMatrixVectorMultiplyFunctionPtr = void (NEGEMMMatrixVectorMultiplyKernel::*)(const Window &window_in,
- const Window &window_w,
- const Window &window_out);
-
-private:
- GEMMMatrixVectorMultiplyFunctionPtr _func;
- const ITensor *_input0;
- const ITensor *_input1;
- ITensor *_output;
- BorderSize _border_size;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEGEMMMATRIXVECTORMULTIPLYKERNEL_H_*/
diff --git a/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h b/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h
deleted file mode 100644
index 967a1b73dc..0000000000
--- a/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGEMMTRANSPOSE1xWKERNEL_H
-#define ARM_COMPUTE_NEGEMMTRANSPOSE1xWKERNEL_H
-
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** NEON kernel which transposes the elements of a matrix in chunks of 1xW, where W is equal to (16 / element size of the tensor)
- *
- * Following an example of how the transposition1xW works when the input data is F32
- *
- * @f[
- * \left( \begin{array}{cccc}
- * a00 & a01 & a02 & a03 \\
- * a10 & a11 & a12 & a13 \\
- * a20 & a21 & a22 & a23 \\
- * a30 & a31 & a32 & a33 \\
- * \end{array} \right)
- * \rightarrow
- * \left( \begin{array}{ccccccccccccccccc}
- * a00 & a01 & a02 & a03 & a10 & a11 & a12 & a13 & a20 & a21 & a22 & a23 & a30 & a31 & a32 & a33 \\
- * \end{array} \right)
- * @f]
- *
- * Following an example of how the transposition1xW works when the input data type is F16
- *
- * @f[
- * \left( \begin{array}{cccccccc}
- * a00 & a01 & a02 & a03 & a04 & a05 & a06 & a07 \\
- * a10 & a11 & a12 & a13 & a14 & a15 & a16 & a17 \\
- * a20 & a21 & a22 & a23 & a24 & a25 & a26 & a27 \\
- * a30 & a31 & a32 & a33 & a34 & a35 & a36 & a37 \\
- * \end{array} \right)
- * \rightarrow
- * \left( \begin{array}{cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc}
- * a00 & a01 & a02 & a03 & a04 & a05 & a06 & a07 & a10 & a11 & a12 & a13 & a14 & a15 & a16 & a17 & a20 & a21 & a22 & a23 & a24 & a25 & a26 & a27 & a30 & a31 & a32 & a33 & a34 & a35 & a36 & a37\\
- * \end{array} \right)
- * @f]
- *
- * @note The output matrix will have the following shape: [ height * W, ceil(width / W) ], where W = (16 / element size of the tensor)
- *
- */
-class NEGEMMTranspose1xWKernel : public INESimpleKernel
-{
-public:
- const char *name() const override
- {
- return "NEGEMMTranspose1xWKernel";
- }
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor. Data types supported: All
- * @param[out] output Output tensor. Data type supported: same as @p input.
- */
- void configure(const ITensor *input, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMTranspose1xWKernel
- *
- * @param[in] input Input tensor info. Data types supported: All
- * @param[in] output Output tensor info. Data type supported: same as @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEGEMMTRANSPOSE1xWKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEGatherKernel.h b/arm_compute/core/NEON/kernels/NEGatherKernel.h
deleted file mode 100644
index bfef40b53b..0000000000
--- a/arm_compute/core/NEON/kernels/NEGatherKernel.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_NEGATHERKERNEL_H
-#define ARM_COMPUTE_NEGATHERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Kernel to perform other operation on NEON */
-class NEGatherKernel : public INEKernel
-{
-public:
- /** Default constructor. */
- NEGatherKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- NEGatherKernel(const NEGatherKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- NEGatherKernel &operator=(const NEGatherKernel &) = delete;
- /** Allow instances of this class to be moved. */
- NEGatherKernel(NEGatherKernel &&) = default;
- /** Allow instances of this class to be moved. */
- NEGatherKernel &operator=(NEGatherKernel &&) = default;
- /** Default detructor */
- ~NEGatherKernel() = default;
-
- /** Name of the kernel
- *
- * @return Kernel name
- */
- const char *name() const override
- {
- return "NEGatherKernel";
- }
- /** Initialise the kernel's inputs and outputs
- *
- * @param[in] input Source tensor. Supported tensor rank: up to 4. Data type supported: All
- * @param[in] indices Indices tensor. Supported tensor rank: up to 1. Must be one of the following type: U32/S32. Each value Must be in range [0, input.shape[@p axis])
- * @param[out] output Destination tensor. Data type supported: Same as @p input
- * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative values wrap around. Defaults to 0
- */
- void configure(const ITensor *input, const ITensor *indices, ITensor *output, int axis = 0);
- /** Static function to check if given info will lead to a valid configuration of @ref NEGatherKernel
- *
- * @param[in] input Source tensor info. Supported tensor rank: up to 4. Data type supported: All
- * @param[in] indices Indices tensor info. Supported tensor rank: up to 1. Must be one of the following type: U32/S32. Each value Must be in range [0, input.shape[@p axis])
- * @param[in] output Destination tensor info. Data type supported: Same as @p input
- * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative values wrap around. Defaults to 0
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, int axis);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Implementation of the gather operation for 0 axis.
- *
- * For gather on the 0 axis an element by element copy is performed.
- *
- * @param[in] window Region on which to execute the kernel. (Must be a region of the window returned by window())
- * @param[in] info Info about executing thread and CPU.
- */
- template <typename U>
- void gather_0_axis(const Window &window, const ThreadInfo &info);
-
- /** Implementation of the gather operation.
- *
- * For 1<=axis a row-wise copy is taking place.
- *
- * @param[in] window Region on which to execute the kernel. (Must be a region of the window returned by window())
- * @param[in] info Info about executing thread and CPU.
- */
- template <typename U>
- void gather_n_axis(const Window &window, const ThreadInfo &info);
-
- using kernel_ptr = void (NEGatherKernel::*)(const Window &window, const ThreadInfo &info);
-
- const ITensor *_input;
- const ITensor *_indices;
- int _axis;
- ITensor *_output;
- kernel_ptr _func;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEGATHERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h b/arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h
deleted file mode 100644
index fa92eef1b7..0000000000
--- a/arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGAUSSIAN3x3KERNEL_H
-#define ARM_COMPUTE_NEGAUSSIAN3x3KERNEL_H
-
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to perform a Gaussian 3x3 filter */
-class NEGaussian3x3Kernel : public INESimpleKernel
-{
-public:
- const char *name() const override
- {
- return "NEGaussian3x3Kernel";
- }
- /** Set the source, destination and border mode of the kernel
- *
- * @param[in] input Source tensor. Data type supported: U8
- * @param[out] output Destination tensor. Data type supported: S16
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ITensor *input, ITensor *output, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEGAUSSIAN3x3KERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h b/arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h
deleted file mode 100644
index 5e63e5136f..0000000000
--- a/arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGAUSSIAN5x5KERNEL_H
-#define ARM_COMPUTE_NEGAUSSIAN5x5KERNEL_H
-
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to perform a Gaussian 5x5 filter (horizontal pass) */
-class NEGaussian5x5HorKernel : public INESimpleKernel
-{
-public:
- const char *name() const override
- {
- return "NEGaussian5x5HorKernel";
- }
- /** Default constructor */
- NEGaussian5x5HorKernel();
-
- /** Initialise the kernel's source, destination and border mode.
- *
- * @param[in] input Source tensor. Data type supported: U8.
- * @param[out] output Destination tensor. Data type supported: S16.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ITensor *input, ITensor *output, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- BorderSize _border_size;
-};
-
-/** NEON kernel to perform a Gaussian 5x5 filter (vertical pass) */
-class NEGaussian5x5VertKernel : public INESimpleKernel
-{
-public:
- const char *name() const override
- {
- return "NEGaussian5x5VertKernel";
- }
- /** Initialise the kernel's source, destination and border mode.
- *
- * @param[in] input Source tensor. Data type supported: S16.
- * @param[out] output Destination tensor, Data type supported: U8.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ITensor *input, ITensor *output, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEGAUSSIAN5x5KERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h b/arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h
deleted file mode 100644
index 4700325b5f..0000000000
--- a/arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGAUSSIANPYRAMIDKERNEL_H
-#define ARM_COMPUTE_NEGAUSSIANPYRAMIDKERNEL_H
-
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to perform a GaussianPyramid (horizontal pass) */
-class NEGaussianPyramidHorKernel : public INESimpleKernel
-{
-public:
- const char *name() const override
- {
- return "NEGaussianPyramidHorKernel";
- }
- /** Default constructor */
- NEGaussianPyramidHorKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEGaussianPyramidHorKernel(NEGaussianPyramidHorKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEGaussianPyramidHorKernel &operator=(NEGaussianPyramidHorKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEGaussianPyramidHorKernel(NEGaussianPyramidHorKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEGaussianPyramidHorKernel &operator=(NEGaussianPyramidHorKernel &&) = default;
- /** Default destructor */
- ~NEGaussianPyramidHorKernel() = default;
-
- /** Initialise the kernel's source, destination and border mode.
- *
- * @param[in] input Source tensor. Data type supported: U8.
- * @param[out] output Destination tensor. Output should have half the input width. Data type supported: S16.
- */
- void configure(const ITensor *input, ITensor *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- int _l2_load_offset;
-};
-
-/** NEON kernel to perform a GaussianPyramid (vertical pass) */
-class NEGaussianPyramidVertKernel : public INESimpleKernel
-{
-public:
- const char *name() const override
- {
- return "NEGaussianPyramidVertKernel";
- }
- /** Default constructor */
- NEGaussianPyramidVertKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEGaussianPyramidVertKernel(NEGaussianPyramidVertKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEGaussianPyramidVertKernel &operator=(NEGaussianPyramidVertKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEGaussianPyramidVertKernel(NEGaussianPyramidVertKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEGaussianPyramidVertKernel &operator=(NEGaussianPyramidVertKernel &&) = default;
- /** Default destructor */
- ~NEGaussianPyramidVertKernel() = default;
-
- /** Initialise the kernel's source, destination and border mode.
- *
- * @param[in] input Source tensor. Data type supported: S16.
- * @param[out] output Destination tensor. Output should have half the input height. Data type supported: U8.
- */
- void configure(const ITensor *input, ITensor *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- int _t2_load_offset;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEGAUSSIANPYRAMIDKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h b/arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h
deleted file mode 100644
index 382ce54518..0000000000
--- a/arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGENERATEPROPOSALSLAYERKERNEL_H
-#define ARM_COMPUTE_NEGENERATEPROPOSALSLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for Compute All Anchors kernel */
-class NEComputeAllAnchorsKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEComputeAllAnchorsKernel";
- }
-
- /** Default constructor */
- NEComputeAllAnchorsKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEComputeAllAnchorsKernel(const NEComputeAllAnchorsKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEComputeAllAnchorsKernel &operator=(const NEComputeAllAnchorsKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEComputeAllAnchorsKernel(NEComputeAllAnchorsKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEComputeAllAnchorsKernel &operator=(NEComputeAllAnchorsKernel &&) = default;
- /** Default destructor */
- ~NEComputeAllAnchorsKernel() = default;
-
- /** Set the input and output tensors.
- *
- * @param[in] anchors Source tensor. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: QSYMM16/F16/F32
- * @param[out] all_anchors Destination tensor. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input
- * @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo
- *
- */
- void configure(const ITensor *anchors, ITensor *all_anchors, const ComputeAnchorsInfo &info);
-
- /** Static function to check if given info will lead to a valid configuration of @ref NEComputeAllAnchorsKernel
- *
- * @param[in] anchors Source tensor info. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: QSYMM16/F16/F32
- * @param[in] all_anchors Destination tensor info. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input
- * @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo
- *
- * @return a Status
- */
- static Status validate(const ITensorInfo *anchors, const ITensorInfo *all_anchors, const ComputeAnchorsInfo &info);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- template <typename T>
- void internal_run(const Window &window);
-
- const ITensor *_anchors;
- ITensor *_all_anchors;
- ComputeAnchorsInfo _anchors_info;
-};
-} // arm_compute
-#endif // ARM_COMPUTE_NEGENERATEPROPOSALSLAYERKERNEL_H
diff --git a/arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h b/arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h
deleted file mode 100644
index edb2da58e2..0000000000
--- a/arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEHOGDESCRIPTORKERNEL_H
-#define ARM_COMPUTE_NEHOGDESCRIPTORKERNEL_H
-
-#include "arm_compute/core/IHOG.h"
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Size2D.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to perform HOG Orientation Binning */
-class NEHOGOrientationBinningKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEHOGOrientationBinningKernel";
- }
- /** Default constructor */
- NEHOGOrientationBinningKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEHOGOrientationBinningKernel(const NEHOGOrientationBinningKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEHOGOrientationBinningKernel &operator=(const NEHOGOrientationBinningKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEHOGOrientationBinningKernel(NEHOGOrientationBinningKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEHOGOrientationBinningKernel &operator=(NEHOGOrientationBinningKernel &&) = default;
- /** Default destructor */
- ~NEHOGOrientationBinningKernel() = default;
-
- /** Initialise the kernel's inputs, output and HOG's metadata
- *
- * @param[in] input_magnitude Input tensor which stores the magnitude of the gradient for each pixel. Data type supported: S16.
- * @param[in] input_phase Input tensor which stores the phase of the gradient for each pixel. Data type supported: U8
- * @param[out] output Output tensor which stores the local HOG for each cell. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per cell
- * @param[in] hog_info HOG's metadata
- */
- void configure(const ITensor *input_magnitude, const ITensor *input_phase, ITensor *output, const HOGInfo *hog_info);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Common signature for all the specialised block normalization functions
- *
- * @param[in] mag_row_ptr Pointer to the first row of the cell in the magnitude tensor
- * @param[in] phase_row_ptr Pointer to the first row of the cell in the phase tensor
- * @param[out] output_ptr Pointer to the output cell of hog space tensor
- * @param[in] mag_stride Stride of the magnitude tensor
- * @param[in] phase_stride Stride of the phase tensor
- * @param[in] cell_width Width of the cell
- * @param[in] cell_height Height of the cell
- * @param[in] num_bins Number of bins for each cell
- * @param[in] phase_scale Scale factor to apply to the phase in order to calculate the histogram index
- */
- using OrientBinFunc = void(const int16_t *__restrict mag_row_ptr, const uint8_t *__restrict phase_row_ptr, float *__restrict output_ptr, size_t mag_stride, size_t phase_stride, size_t cell_width,
- size_t cell_height, size_t num_bins, float phase_scale);
- /** Orientation binning function to use for the particular cell width passed to configure() */
- OrientBinFunc *_func;
- const ITensor *_input_magnitude;
- const ITensor *_input_phase;
- ITensor *_output;
- size_t _cell_width;
- size_t _cell_height;
- size_t _num_bins;
- float _phase_scale;
-};
-
-/** NEON kernel to perform HOG block normalization */
-class NEHOGBlockNormalizationKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEHOGBlockNormalizationKernel";
- }
- /** Default constructor */
- NEHOGBlockNormalizationKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEHOGBlockNormalizationKernel(const NEHOGBlockNormalizationKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEHOGBlockNormalizationKernel &operator=(const NEHOGBlockNormalizationKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEHOGBlockNormalizationKernel(NEHOGBlockNormalizationKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEHOGBlockNormalizationKernel &operator=(NEHOGBlockNormalizationKernel &&) = default;
- /** Default destructor */
- ~NEHOGBlockNormalizationKernel() = default;
-
- /** Initialise the kernel's input, output and HOG's metadata
- *
- * @param[in] input Input tensor which stores the local HOG for each cell. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per cell
- * @param[out] output Output tensor which stores the normalised blocks. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block
- * @param[in] hog_info HOG's metadata
- */
- void configure(const ITensor *input, ITensor *output, const HOGInfo *hog_info);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Common signature for all the specialised block normalization functions
- *
- * @param[in] input_row_ptr Pointer to the first row of the block in the input hog space tensor
- * @param[out] output_ptr Pointer to the output block of the hog normalized space
- * @param[in] input_stride Stride of the input hog space tensor
- * @param[in] num_cells_per_block_height Number of cells per block along the Y direction
- * @param[in] num_bins_block_x Number of bins per block along the X direction
- * @param[in] num_bins_block Number of total bins per block
- * @param[in] l2_hyst_threshold Threshold to use for l2 hysteresis normalization
- */
- using BlockNormFunc = void(const float *input_row_ptr, float *output_ptr, size_t input_stride, size_t num_cells_per_block_height, size_t num_bins_block_x, size_t num_bins_block,
- float l2_hyst_threshold);
- /** Block normalization function to use for the particular normalization type passed to configure() */
- BlockNormFunc *_func;
- const ITensor *_input;
- ITensor *_output;
- Size2D _num_cells_per_block;
- Size2D _num_cells_per_block_stride;
- size_t _num_bins;
- float _l2_hyst_threshold;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEHOGDESCRIPTORKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h b/arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h
deleted file mode 100644
index acb35923d4..0000000000
--- a/arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEHOGDETECTORKERNEL_H
-#define ARM_COMPUTE_NEHOGDETECTORKERNEL_H
-
-#include "arm_compute/core/IArray.h"
-#include "arm_compute/core/IHOG.h"
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "support/Mutex.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to perform HOG detector kernel using linear SVM */
-class NEHOGDetectorKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEHOGDetectorKernel";
- }
- /** Default constructor */
- NEHOGDetectorKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEHOGDetectorKernel(const NEHOGDetectorKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEHOGDetectorKernel &operator=(const NEHOGDetectorKernel &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEHOGDetectorKernel(NEHOGDetectorKernel &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEHOGDetectorKernel &operator=(NEHOGDetectorKernel &&) = delete;
- /** Default destructor */
- ~NEHOGDetectorKernel() = default;
-
- /** Initialise the kernel's input, HOG data-object, detection window, the stride of the detection window, the threshold and index of the object to detect
- *
- * @param[in] input Input tensor which stores the HOG descriptor obtained with @ref NEHOGOrientationBinningKernel. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block
- * @param[in] hog HOG data object used by @ref NEHOGOrientationBinningKernel and @ref NEHOGBlockNormalizationKernel
- * @param[out] detection_windows Array of @ref DetectionWindow. This array stores all the detected objects
- * @param[in] detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions.
- * It must be multiple of the hog->info()->block_stride()
- * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane
- * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to
- */
- void configure(const ITensor *input, const IHOG *hog, IDetectionWindowArray *detection_windows, const Size2D &detection_window_stride, float threshold = 0.0f, uint16_t idx_class = 0);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input;
- IDetectionWindowArray *_detection_windows;
- const float *_hog_descriptor;
- float _bias;
- float _threshold;
- uint16_t _idx_class;
- size_t _num_bins_per_descriptor_x;
- size_t _num_blocks_per_descriptor_y;
- size_t _block_stride_width;
- size_t _block_stride_height;
- size_t _detection_window_width;
- size_t _detection_window_height;
- size_t _max_num_detection_windows;
- arm_compute::Mutex _mutex;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEHOGDETECTORKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h b/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h
deleted file mode 100644
index a77fe16ac2..0000000000
--- a/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEHARRISCORNERSKERNEL_H
-#define ARM_COMPUTE_NEHARRISCORNERSKERNEL_H
-
-#include "arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h"
-#include "arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h"
-#include "arm_compute/core/IArray.h"
-#include "arm_compute/core/NEON/INEKernel.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-using IImage = ITensor;
-
-/** Common interface for all Harris Score kernels */
-class INEHarrisScoreKernel : public INEKernel
-{
-public:
- /** Default constructor */
- INEHarrisScoreKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- INEHarrisScoreKernel(const INEHarrisScoreKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- INEHarrisScoreKernel &operator=(const INEHarrisScoreKernel &) = delete;
- /** Allow instances of this class to be moved */
- INEHarrisScoreKernel(INEHarrisScoreKernel &&) = default;
- /** Allow instances of this class to be moved */
- INEHarrisScoreKernel &operator=(INEHarrisScoreKernel &&) = default;
- /** Default destructor */
- ~INEHarrisScoreKernel() = default;
-
-public:
- /** Setup the kernel parameters
- *
- * @param[in] input1 Source image (gradient X). Data types supported: S16/S32
- * @param[in] input2 Source image (gradient Y). Data types supported: same as @ input1
- * @param[out] output Destination image (harris score). Data types supported: F32
- * @param[in] norm_factor Normalization factor to use accordingly with the gradient size (Must be different from 0)
- * @param[in] strength_thresh Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel).
- * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- virtual void configure(const IImage *input1, const IImage *input2, IImage *output, float norm_factor, float strength_thresh, float sensitivity, bool border_undefined) = 0;
-
-protected:
- const IImage *_input1; /**< Source image - Gx component */
- const IImage *_input2; /**< Source image - Gy component */
- IImage *_output; /**< Source image - Harris score */
- float _sensitivity; /**< Sensitivity value */
- float _strength_thresh; /**< Threshold value */
- float _norm_factor; /**< Normalization factor */
- BorderSize _border_size; /**< Border size */
-};
-
-/** Template NEON kernel to perform Harris Score.
- * The implementation supports 3, 5, and 7 for the block_size
- */
-template <int32_t block_size>
-class NEHarrisScoreKernel : public INEHarrisScoreKernel
-{
-public:
- const char *name() const override
- {
- return "NEHarrisScoreKernel";
- }
- /** Default constructor */
- NEHarrisScoreKernel();
- // Inherited methods overridden:
- void configure(const IImage *input1, const IImage *input2, IImage *output, float norm_factor, float strength_thresh, float sensitivity, bool border_undefined) override;
- BorderSize border_size() const override;
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Common signature for all the specialised harris score functions */
- using HarrisScoreFunction = void(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, int32_t input_stride,
- float norm_factor, float sensitivity, float strength_thresh);
- /** Harris Score function to use for the particular image types passed to configure() */
- HarrisScoreFunction *_func;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEHARRISCORNERSKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h b/arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h
deleted file mode 100644
index be81f2e963..0000000000
--- a/arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_NEHEIGHTCONCATENATELAYERKERNEL_H
-#define ARM_COMPUTE_NEHEIGHTCONCATENATELAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the height concatenate kernel.
- * The input tensor will be concatenated into the output tensor.
- */
-class NEHeightConcatenateLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEHeightConcatenateLayerKernel";
- }
- /** Default constructor */
- NEHeightConcatenateLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEHeightConcatenateLayerKernel(const NEHeightConcatenateLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEHeightConcatenateLayerKernel &operator=(const NEHeightConcatenateLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEHeightConcatenateLayerKernel(NEHeightConcatenateLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEHeightConcatenateLayerKernel &operator=(NEHeightConcatenateLayerKernel &&) = default;
- /** Default destructor */
- ~NEHeightConcatenateLayerKernel() = default;
- /** Initialise the kernel's inputs and output
- *
- * @param[in] input Input tensor. Data types supported: All
- * @param[in] height_offset The starting offset on the Y axis for the output tensor.
- * @param[in,out] output Output tensor. Data types supported: Same as @p input.
- *
- */
- void configure(const ITensor *input, unsigned int height_offset, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEHeightConcatenateLayerKernel
- *
- * @param[in] input Input tensor info. Data types supported: All
- * @param[in] height_offset The starting offset on the Y axis for the output tensor.
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, unsigned int height_offset, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input;
- ITensor *_output;
- unsigned int _height_offset;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEHEIGHTCONCATENATELAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEHistogramKernel.h b/arm_compute/core/NEON/kernels/NEHistogramKernel.h
deleted file mode 100644
index b1dd105676..0000000000
--- a/arm_compute/core/NEON/kernels/NEHistogramKernel.h
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEHISTOGRAMKERNEL_H
-#define ARM_COMPUTE_NEHISTOGRAMKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "support/Mutex.h"
-
-#include <cstddef>
-#include <cstdint>
-
-namespace arm_compute
-{
-class IDistribution1D;
-class ITensor;
-using IImage = ITensor;
-
-/** Interface for the histogram kernel */
-class NEHistogramKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEHistogramKernel";
- }
- /** Default constructor */
- NEHistogramKernel();
- /** Default destructor */
- ~NEHistogramKernel() = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEHistogramKernel(const NEHistogramKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEHistogramKernel &operator=(const NEHistogramKernel &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEHistogramKernel(NEHistogramKernel &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEHistogramKernel &operator=(NEHistogramKernel &&) = delete;
-
- /** Set the input image and the distribution output.
- *
- * @param[in] input Source image. Data type supported: U8.
- * @param[out] output Destination distribution.
- * @param[in,out] local_hist Array that the threads use to save their local histograms.
- * It's size should be equal to (number_of_threads * num_bins),
- * and the Window::thread_id() is used to determine the part of the array
- * used by each thread.
- * @param[out] window_lut LUT with pre-calculated possible window values.
- * The size of the LUT should be equal to max_range_size and it will be filled
- * during the configure stage, while it re-used in every run, therefore can be
- * safely shared among threads.
- */
- void configure(const IImage *input, IDistribution1D *output, uint32_t *local_hist, uint32_t *window_lut);
- /** Set the input image and the distribution output.
- *
- * @note Used for histogram of fixed size equal to 256
- *
- * @param[in] input Source image. Data type supported: U8.
- * @param[out] output Destination distribution which must be of 256 bins..
- */
- void configure(const IImage *input, IDistribution1D *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Function to merge multiple partial histograms.
- *
- * @param[out] global_hist Pointer to the final histogram.
- * @param[in] local_hist Pointer to the partial histograms.
- * @param[in] bins Number of bins.
- */
- void merge_histogram(uint32_t *global_hist, const uint32_t *local_hist, size_t bins);
- /** Function to merge multiple minimum values of partial histograms.
- *
- * @param[out] global_min Pointer to the global min value.
- * @param[in] local_min Local min value.
- */
- void merge_min(uint8_t *global_min, const uint8_t &local_min);
- /** Function to perform histogram on the given window
- *
- * @param[in] win Region on which to execute the kernel
- * @param[in] info Info about the executing thread
- */
- void histogram_U8(Window win, const ThreadInfo &info);
- /** Function to perform histogram on the given window where histogram is
- * of fixed size 256 without ranges and offsets.
- *
- * @param[in] win Region on which to execute the kernel
- * @param[in] info Info about the executing thread
- */
- void histogram_fixed_U8(Window win, const ThreadInfo &info);
- /** Pre-calculate the pixel windowing for every possible pixel
- *
- * Calculate (V - offset) * numBins / range where V is every possible pixel value.
- *
- * @note We currently support U8 image thus possible pixel values are between 0 and 255
- */
- void calculate_window_lut() const;
- /** Common signature for all the specialised Histogram functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using HistogramFunctionPtr = void (NEHistogramKernel::*)(Window window, const ThreadInfo &info);
-
- HistogramFunctionPtr _func; ///< Histogram function to use for the particular image types passed to configure()
- const IImage *_input;
- IDistribution1D *_output;
- uint32_t *_local_hist;
- uint32_t *_window_lut;
- arm_compute::Mutex _hist_mtx;
- static constexpr unsigned int _max_range_size{ 256 }; ///< 256 possible pixel values as we handle only U8 images
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEHISTOGRAMKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEIm2ColKernel.h b/arm_compute/core/NEON/kernels/NEIm2ColKernel.h
deleted file mode 100644
index 1c358b379d..0000000000
--- a/arm_compute/core/NEON/kernels/NEIm2ColKernel.h
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEIM2COLKERNEL_H
-#define ARM_COMPUTE_NEIM2COLKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-class Size2D;
-
-/** Interface for the im2col reshape kernel.
- *
- * Rearranges image blocks into columns. It is used to strip out each convolution block to a single column.
- * It is used to transform a convolution to a plain matrix multiplication.
- *
- * For example taking into account the image below and assuming 3x3 image blocks with stride of 1 we have:
- *
- * @f[
- * \left( \begin{array}{cccc}
- * a00 & a01 & a02 & a03 \\
- * a10 & a11 & a12 & a13 \\
- * a20 & a21 & a22 & a23 \\
- * a30 & a31 & a32 & a33 \\
- * \end{array} \right)
- * \rightarrow
- * \left( \begin{array}{ccccccccc}
- * a00 & a01 & a02 & a10 & a11 & a12 & a20 & a21 & a22 \\
- * a01 & a02 & a03 & a11 & a12 & a13 & a21 & a22 & a23 \\
- * a10 & a11 & a12 & a20 & a21 & a22 & a30 & a31 & a32 \\
- * a11 & a12 & a13 & a21 & a22 & a23 & a31 & a32 & a33 \\
- * \end{array} \right)
- * @f]
- */
-class NEIm2ColKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEIm2ColKernel";
- }
- /** Default constructor */
- NEIm2ColKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEIm2ColKernel(const NEIm2ColKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEIm2ColKernel &operator=(const NEIm2ColKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEIm2ColKernel(NEIm2ColKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEIm2ColKernel &operator=(NEIm2ColKernel &&) = default;
- /** Default destructor */
- ~NEIm2ColKernel() = default;
-
- /** Set the input and output of the kernel.
- *
- * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32
- * Note: QASYMM8 works only for has_bias = false
- * @param[out] output The output tensor. Data types supported: Same as @p input
- * @param[in] kernel_dims The kernel dimensions (width and height).
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- * @param[in] has_bias In case biases are provided expands the matrix with 1.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported
- */
- void configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info,
- bool has_bias, const Size2D &dilation = Size2D(1U, 1U), unsigned int num_groups = 1);
- /** Static function to check if given info will lead to a valid configuration of @ref NEIm2ColKernel
- *
- * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32
- * Note: QASYMM8 works only for has_bias = false
- * @param[in] output The output tensor. Data types supported: Same as @p input
- * @param[in] kernel_dims The kernel dimensions (width and height).
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- * @param[in] has_bias In case biases are provided expands the matrix with 1.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info,
- bool has_bias, const Size2D &dilation = Size2D(1U, 1U), unsigned int num_groups = 1);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Template function to run im2col
- *
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
- */
- template <typename T, bool has_pads, bool is_nchw>
- void run_im2col(const Window &window);
-
- /** Common signature for all the specialised im2col functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using Im2ColFunctionPtr = void (NEIm2ColKernel::*)(const Window &window);
-
- Im2ColFunctionPtr _func;
- const ITensor *_input;
- ITensor *_output;
- std::pair<unsigned int, unsigned int> _convolved_dims;
- PadStrideInfo _conv_info;
- unsigned int _kernel_width;
- unsigned int _kernel_height;
- bool _has_bias;
- Size2D _dilation;
- DataLayout _data_layout;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEIM2COLKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h
deleted file mode 100644
index 7c14e409c6..0000000000
--- a/arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYERKERNEL_H
-#define ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for performing an instance normalization */
-class NEInstanceNormalizationLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEInstanceNormalizationLayerKernel";
- }
- /** Default constructor */
- NEInstanceNormalizationLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEInstanceNormalizationLayerKernel(const NEInstanceNormalizationLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEInstanceNormalizationLayerKernel &operator=(const NEInstanceNormalizationLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEInstanceNormalizationLayerKernel(NEInstanceNormalizationLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEInstanceNormalizationLayerKernel &operator=(NEInstanceNormalizationLayerKernel &&) = default;
- /** Default destructor */
- ~NEInstanceNormalizationLayerKernel() = default;
- /** Set the input and output tensors.
- *
- * @param[in, out] input Source tensor. Data types supported: F16/F32. Data layout supported: NCHW
- * In case of @p output tensor = nullptr this tensor will store the result of the normalization.
- * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input.
- * @param[in] gamma (Optional) The scale scalar value applied to the normalized tensor. Defaults to 1.0
- * @param[in] beta (Optional) The offset scalar value applied to the normalized tensor. Defaults to 0.0
- * @param[in] epsilon (Optional) Lower bound value for the normalization. Defaults to 1e-12
- */
- void configure(ITensor *input, ITensor *output, float gamma = 1.0f, float beta = 0.0f, float epsilon = 1e-12f);
-
- /** Static function to check if given info will lead to a valid configuration of @ref NEInstanceNormalizationLayer.
- *
- * @param[in] input Source tensor info. Data types supported: F16/F32. Data layout supported: NCHW
- * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p input.
- * @param[in] gamma (Optional) The scale scalar value applied to the normalized tensor. Defaults to 1.0
- * @param[in] beta (Optional) The offset scalar value applied to the normalized tensor. Defaults to 0.0
- * @param[in] epsilon (Optional) Lower bound value for the normalization. Defaults to 1e-12
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, float gamma = 1.0f, float beta = 0.0f, float epsilon = 1e-12f);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Common signature for all the specialized instance normalization functions
- *
- * @param[in, out] input An input tensor. In case of @p output tensor = nullptr this tensor will store the result of the normalization.
- * @param[out] output The output tensor.
- * @param[in] gamma The scale scalar value applied to the normalized tensor. Defaults to 1.0
- * @param[in] beta The offset scalar value applied to the normalized tensor. Defaults to 0.0
- * @param[in] epsilon Lower bound value for the normalization. Defaults to 1e-12
- */
- using NormalizationFunction = void(ITensor *input, ITensor *output, float gamma, float beta, float epsilon, const Window &window);
-
- NormalizationFunction *_func;
- ITensor *_input;
- ITensor *_output;
- float _gamma;
- float _beta;
- float _epsilon;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEIntegralImageKernel.h b/arm_compute/core/NEON/kernels/NEIntegralImageKernel.h
deleted file mode 100644
index 77ae7b9efa..0000000000
--- a/arm_compute/core/NEON/kernels/NEIntegralImageKernel.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEINTEGRALIMAGEKERNEL_H
-#define ARM_COMPUTE_NEINTEGRALIMAGEKERNEL_H
-
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Kernel to perform an image integral on an image */
-class NEIntegralImageKernel : public INESimpleKernel
-{
-public:
- const char *name() const override
- {
- return "NEIntegralImageKernel";
- }
- /** Set the source, destination and border mode of the kernel
- *
- * @param[in] input Source tensor. Data type supported: U8
- * @param[out] output Destination tensor. Data type supported: U32
- */
- void configure(const ITensor *input, ITensor *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
- bool is_parallelisable() const override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEINTEGRALIMAGEKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEL2NormalizeLayerKernel.h b/arm_compute/core/NEON/kernels/NEL2NormalizeLayerKernel.h
deleted file mode 100644
index 3937bf0163..0000000000
--- a/arm_compute/core/NEON/kernels/NEL2NormalizeLayerKernel.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEL2NORMALIZELAYERKERNEL_H
-#define ARM_COMPUTE_NEL2NORMALIZELAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for performing a L2 normalize on a given axis given the square sum of it in this axis */
-class NEL2NormalizeLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEL2NormalizeLayerKernel";
- }
- /** Default constructor */
- NEL2NormalizeLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEL2NormalizeLayerKernel(const NEL2NormalizeLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEL2NormalizeLayerKernel &operator=(const NEL2NormalizeLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEL2NormalizeLayerKernel(NEL2NormalizeLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEL2NormalizeLayerKernel &operator=(NEL2NormalizeLayerKernel &&) = default;
- /** Default destructor */
- ~NEL2NormalizeLayerKernel() = default;
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. Data types supported: F16/F32.
- * @param[in] sum Sum values tensor. Data types supported: same as @p input.
- * Sum will have the same number of dimensions as input.
- * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input.
- * Output will have the same number of dimensions as input.
- * @param[in] axis Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2
- * @param[in] epsilon Lower bound value for the normalization.
- */
- void configure(const ITensor *input, const ITensor *sum, ITensor *output, int axis, float epsilon);
-
- /** Static function to check if given info will lead to a valid configuration of @ref NEL2NormalizeLayerKernel.
- *
- * @param[in] input Source tensor info. Data types supported: F16/F32.
- * @param[in] sum Sum values tensor info. Data types supported: same as @p input.
- * Sum will have the same number of dimensions as input.
- * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p input.
- * Output will have the same number of dimensions as input.
- * @param[in] axis Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2
- * @param[in] epsilon Lower bound value for the normalization.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, int axis, float epsilon);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input;
- const ITensor *_sum;
- ITensor *_output;
- unsigned int _actual_axis;
- float _epsilon;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEL2NORMALIZELAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NELKTrackerKernel.h b/arm_compute/core/NEON/kernels/NELKTrackerKernel.h
deleted file mode 100644
index cf99bbe691..0000000000
--- a/arm_compute/core/NEON/kernels/NELKTrackerKernel.h
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_LKTRACKERKERNEL_H
-#define ARM_COMPUTE_LKTRACKERKERNEL_H
-
-#include "arm_compute/core/IArray.h"
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-#include <cstddef>
-#include <cstdint>
-#include <tuple>
-#include <utility>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Internal keypoint class for Lucas-Kanade Optical Flow */
-struct NELKInternalKeypoint
-{
- float x{ 0.f }; /**< x coordinate of the keypoint */
- float y{ 0.f }; /**< y coordinate of the keypoint */
- bool tracking_status{ false }; /**< the tracking status of the keypoint */
-};
-
-/** Interface for NEON Array of Internal Key Points. */
-using INELKInternalKeypointArray = IArray<NELKInternalKeypoint>;
-
-/** Interface for the Lucas-Kanade tracker kernel */
-class NELKTrackerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NELKTrackerKernel";
- }
- /** Default constructor */
- NELKTrackerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NELKTrackerKernel(const NELKTrackerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NELKTrackerKernel &operator=(const NELKTrackerKernel &) = delete;
- /** Allow instances of this class to be moved */
- NELKTrackerKernel(NELKTrackerKernel &&) = default;
- /** Allow instances of this class to be moved */
- NELKTrackerKernel &operator=(NELKTrackerKernel &&) = default;
- /** Default destructor */
- ~NELKTrackerKernel() = default;
-
- /** Initialise the kernel input and output
- *
- * @param[in] input_old Pointer to the input old tensor. Data type supported: U8
- * @param[in] input_new Pointer to the input new tensor. Data type supported. U8
- * @param[in] old_scharr_gx Pointer to the input scharr X tensor. Data type supported: S16
- * @param[in] old_scharr_gy Pointer to the input scharr Y tensor. Data type supported: S16
- * @param[in] old_points Pointer to the IKeyPointArray storing old key points
- * @param[in] new_points_estimates Pointer to the IKeyPointArray storing new estimates key points
- * @param[out] new_points Pointer to the IKeyPointArray storing new key points
- * @param[in, out] old_points_internal Pointer to the array of NELKInternalKeypoint for old points
- * @param[out] new_points_internal Pointer to the array of NELKInternalKeypoint for new points
- * @param[in] termination The criteria to terminate the search of each keypoint.
- * @param[in] use_initial_estimate The flag to indicate whether the initial estimated position should be used
- * @param[in] epsilon The error for terminating the algorithm
- * @param[in] num_iterations The maximum number of iterations before terminate the algorithm
- * @param[in] window_dimension The size of the window on which to perform the algorithm
- * @param[in] level The pyramid level
- * @param[in] num_levels The number of pyramid levels
- * @param[in] pyramid_scale Scale factor used for generating the pyramid
- */
- void configure(const ITensor *input_old, const ITensor *input_new, const ITensor *old_scharr_gx, const ITensor *old_scharr_gy,
- const IKeyPointArray *old_points, const IKeyPointArray *new_points_estimates, IKeyPointArray *new_points,
- INELKInternalKeypointArray *old_points_internal, INELKInternalKeypointArray *new_points_internal,
- Termination termination, bool use_initial_estimate, float epsilon, unsigned int num_iterations, size_t window_dimension,
- size_t level, size_t num_levels, float pyramid_scale);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- /** Initialise the array of keypoints in the provide range
- *
- * @param[in] start Index of first element in the keypoints array to be initialised
- * @param[in] end Index after last elelemnt in the keypoints array to be initialised
- */
- void init_keypoints(int start, int end);
- /** Compute the structure tensor A^T * A based on the scharr gradients I_x and I_y
- *
- * @param[in] keypoint Keypoint for which gradients are computed
- * @param[out] bilinear_ix Intermediate interpolated data for X gradient
- * @param[out] bilinear_iy Intermediate interpolated data for Y gradient
- *
- * @return Values A11, A12, A22
- */
- std::tuple<int, int, int> compute_spatial_gradient_matrix(const NELKInternalKeypoint &keypoint, int32_t *bilinear_ix, int32_t *bilinear_iy);
- /** Compute the vector A^T * b, i.e. -sum(I_d * I_t) for d in {x,y}
- *
- * @param[in] old_keypoint Old keypoint for which gradient is computed
- * @param[in] new_keypoint New keypoint for which gradient is computed
- * @param[in] bilinear_ix Intermediate interpolated data for X gradient
- * @param[in] bilinear_iy Intermediate interpolated data for Y gradient
- *
- * @return Values b1, b2
- */
- std::pair<int, int> compute_image_mismatch_vector(const NELKInternalKeypoint &old_keypoint, const NELKInternalKeypoint &new_keypoint, const int32_t *bilinear_ix, const int32_t *bilinear_iy);
-
- const ITensor *_input_old;
- const ITensor *_input_new;
- const ITensor *_old_scharr_gx;
- const ITensor *_old_scharr_gy;
- IKeyPointArray *_new_points;
- const IKeyPointArray *_new_points_estimates;
- const IKeyPointArray *_old_points;
- INELKInternalKeypointArray *_old_points_internal;
- INELKInternalKeypointArray *_new_points_internal;
- Termination _termination;
- bool _use_initial_estimate;
- float _pyramid_scale;
- float _epsilon;
- unsigned int _num_iterations;
- int _window_dimension;
- unsigned int _level;
- unsigned int _num_levels;
- ValidRegion _valid_region;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NELKTRACKERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h b/arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h
deleted file mode 100644
index ad2a161296..0000000000
--- a/arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NELOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H
-#define ARM_COMPUTE_NELOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to multiply each row of first tensor with low 2 dimensions of second tensor. */
-class NELocallyConnectedMatrixMultiplyKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NELocallyConnectedMatrixMultiplyKernel";
- }
- /** Default constructor */
- NELocallyConnectedMatrixMultiplyKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NELocallyConnectedMatrixMultiplyKernel(const NELocallyConnectedMatrixMultiplyKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NELocallyConnectedMatrixMultiplyKernel &operator=(const NELocallyConnectedMatrixMultiplyKernel &) = delete;
- /** Allow instances of this class to be moved */
- NELocallyConnectedMatrixMultiplyKernel(NELocallyConnectedMatrixMultiplyKernel &&) = default;
- /** Allow instances of this class to be moved */
- NELocallyConnectedMatrixMultiplyKernel &operator=(NELocallyConnectedMatrixMultiplyKernel &&) = default;
- /** Initialise the kernel's input and output
- *
- * @param[in] input0 First input tensor. Data types supported: F16, F32
- * @param[in] input1 Second input tensor containing the Matrix B. Data type supported: same as @p input0
- * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
- */
- void configure(const ITensor *input0, const ITensor *input1, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NELocallyConnectedMatrixMultiplyKernel
- *
- * @param[in] input0 First input tensor info. Data types supported: F16, F32
- * @param[in] input1 Second input tensor info. Data type supported: same as @p input0
- * @param[in] output Output tensor info. Data type supported: same as @p input0
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input0;
- const ITensor *_input1;
- ITensor *_output;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NELOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h b/arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h
deleted file mode 100644
index 7ad5bf0d99..0000000000
--- a/arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEMAGNITUDEPHASEKERNEL_H
-#define ARM_COMPUTE_NEMAGNITUDEPHASEKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Template interface for the kernel to compute magnitude and phase */
-template <MagnitudeType mag_type, PhaseType phase_type>
-class NEMagnitudePhaseKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEMagnitudePhaseKernel";
- }
- /** Default constructor */
- NEMagnitudePhaseKernel();
- /** Destructor */
- ~NEMagnitudePhaseKernel() = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEMagnitudePhaseKernel(const NEMagnitudePhaseKernel &) = delete;
- /** Default move constructor */
- NEMagnitudePhaseKernel(NEMagnitudePhaseKernel &&) = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEMagnitudePhaseKernel &operator=(const NEMagnitudePhaseKernel &) = delete;
- /** Default move assignment operator */
- NEMagnitudePhaseKernel &operator=(NEMagnitudePhaseKernel &&) = default;
-
- /** Initialise the kernel's input, output.
- *
- * @note At least one of out1 or out2 must be set
- *
- * @param[in] gx Gradient X tensor. Data type supported: S16.
- * @param[in] gy Gradient Y tensor. Data type supported: S16.
- * @param[out] magnitude (Optional) The output tensor - Magnitude. Data type supported: S16.
- * @param[out] phase (Optional) The output tensor - Phase. Data type supported: U8.
- */
- void configure(const ITensor *gx, const ITensor *gy, ITensor *magnitude, ITensor *phase);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Function to perform magnitude on the given window
- *
- * @param[in] window Region on which to execute the kernel
- */
- void magnitude(const Window &window);
- /** Function to perform phase on the given window
- *
- * @param[in] window Region on which to execute the kernel
- */
- void phase(const Window &window);
- /** Function to perform magnitude and phase on the given window
- *
- * @param[in] window Region on which to execute the kernel
- */
- void magnitude_phase(const Window &window);
-
-private:
- /** Common signature for all the specialised MagnitudePhase functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using MagnitudePhaseFunctionPtr = void (NEMagnitudePhaseKernel::*)(const Window &window);
- /** MagnitudePhase function to use for the particular formats passed to configure() */
- MagnitudePhaseFunctionPtr _func;
- const ITensor *_gx; /**< Input gradient X */
- const ITensor *_gy; /**< Input gradient Y */
- ITensor *_magnitude; /**< Output - Magnitude */
- ITensor *_phase; /**< Output - Phase */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEMAGNITUDEPHASEKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h b/arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h
deleted file mode 100644
index 2197e3cfbe..0000000000
--- a/arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEMEANSTDDEVKERNEL_H
-#define ARM_COMPUTE_NEMEANSTDDEVKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "support/Mutex.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-using IImage = ITensor;
-
-/** Interface for the kernel to calculate mean and standard deviation of input image pixels. */
-class NEMeanStdDevKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEMeanStdDevKernel";
- }
- /** Default constructor */
- NEMeanStdDevKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEMeanStdDevKernel(const NEMeanStdDevKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEMeanStdDevKernel &operator=(const NEMeanStdDevKernel &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEMeanStdDevKernel(NEMeanStdDevKernel &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEMeanStdDevKernel &operator=(NEMeanStdDevKernel &&) = delete;
- /** Default destructor */
- ~NEMeanStdDevKernel() = default;
-
- /** Initialise the kernel's input and outputs.
- *
- * @param[in] input Input image. Data type supported: U8.
- * @param[out] mean Input average pixel value.
- * @param[out] global_sum Keeps global sum of pixel values.
- * @param[out] stddev (Optional) Output standard deviation of pixel values.
- * @param[out] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values.
- */
- void configure(const IImage *input, float *mean, uint64_t *global_sum, float *stddev = nullptr, uint64_t *global_sum_squared = nullptr);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
- BorderSize border_size() const override;
-
-private:
- const IImage *_input;
- float *_mean;
- float *_stddev;
- uint64_t *_global_sum;
- uint64_t *_global_sum_squared;
- arm_compute::Mutex _mtx;
- BorderSize _border_size;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEMEANSTDDEVKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEMeanStdDevNormalizationKernel.h b/arm_compute/core/NEON/kernels/NEMeanStdDevNormalizationKernel.h
deleted file mode 100644
index dc0455cc4c..0000000000
--- a/arm_compute/core/NEON/kernels/NEMeanStdDevNormalizationKernel.h
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEMEANSTDDEVNORMALIZATIONKERNEL_H
-#define ARM_COMPUTE_NEMEANSTDDEVNORMALIZATIONKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-#include <arm_fp16.h>
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to normalize the input 2D tensor across the first dimension with respect to mean and standard deviation of the same dimension. */
-class NEMeanStdDevNormalizationKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEMeanStdDevNormalizationKernel";
- }
- /** Default constructor */
- NEMeanStdDevNormalizationKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEMeanStdDevNormalizationKernel(const NEMeanStdDevNormalizationKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEMeanStdDevNormalizationKernel &operator=(const NEMeanStdDevNormalizationKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEMeanStdDevNormalizationKernel(NEMeanStdDevNormalizationKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEMeanStdDevNormalizationKernel &operator=(NEMeanStdDevNormalizationKernel &&) = default;
- /** Default destructor */
- ~NEMeanStdDevNormalizationKernel() = default;
- /** Initialise the kernel's input and outputs.
- *
- * @note If the output tensor is a nullptr, the normalization will be performed in-place.
- *
- * @param[in, out] input Source tensor with 2 dimensions. In case of @p output tensor = nullptr,
- * this tensor will store the result of the normalization. Data types supported: F16/F32.
- * @param[out] output (Optional) Destination tensor. It can be nullptr in case of in-place computation. Data type supported: same as @p input
- * @param[in] epsilon (Optional) Small float to avoid division by zero in case of zero standard deviation. Defaults to 1e-8.
- */
- void configure(ITensor *input, ITensor *output = nullptr, float epsilon = 1e-8f);
- /** Static function to check if given info will lead to a valid configuration of @ref NEMeanStdDevNormalizationKernel
- *
- * @param[in] input Source tensor info with 2 dimensions. In case of @p output tensor info = nullptr,
- * this tensor will store the result of the normalization. Data types supported: F16/F32.
- * @param[in] output (Optional) Destination tensor info. It can be nullptr in case of in-place computation. Data type supported: same as @p input
- * @param[in] epsilon (Optional) Small float to avoid division by zero in case of zero standard deviation. Defaults to 1e-8.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output = nullptr, float epsilon = 1e-8f);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Normalizes the input with respect to mean and standard deviation.
- *
- * @param[in] window Region on which to execute the kernel.
- */
- template <typename ScalarType, int size>
- void mean_stddev_normalization(const Window &window);
-
- ITensor *_input;
- ITensor *_output;
- float _epsilon;
-
- using MeanStdDevNormFunction = void (NEMeanStdDevNormalizationKernel::*)(const Window &window);
-
- MeanStdDevNormFunction _func;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEMEANSTDDEVNORMALIZATIONKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h b/arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h
deleted file mode 100644
index 3e86860f79..0000000000
--- a/arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEMEDIAN3x3KERNEL_H
-#define ARM_COMPUTE_NEMEDIAN3x3KERNEL_H
-
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Kernel to perform a median filter on a tensor */
-class NEMedian3x3Kernel : public INESimpleKernel
-{
-public:
- const char *name() const override
- {
- return "NEMedian3x3Kernel";
- }
- /** Set the source, destination and border mode of the kernel
- *
- * @param[in] input Source tensor. Data type supported: U8
- * @param[out] output Destination tensor. Data type supported: U8
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ITensor *input, ITensor *output, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEMEDIAN3x3KERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEMemsetKernel.h b/arm_compute/core/NEON/kernels/NEMemsetKernel.h
deleted file mode 100644
index b4bcd11b82..0000000000
--- a/arm_compute/core/NEON/kernels/NEMemsetKernel.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEMEMSETKERNEL_H
-#define ARM_COMPUTE_NEMEMSETKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/PixelValue.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for filling the planes of a tensor */
-class NEMemsetKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEMemsetKernel";
- }
- /** Default constructor */
- NEMemsetKernel();
- /** Default destructor */
- ~NEMemsetKernel() = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEMemsetKernel(const NEMemsetKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEMemsetKernel &operator=(const NEMemsetKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEMemsetKernel(NEMemsetKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEMemsetKernel &operator=(NEMemsetKernel &&) = default;
- /** Initialise the kernel's tensor and filling value
- *
- * @param[in,out] tensor Input tensor to fill. Supported data types: All
- * @param[in] constant_value The value used to fill the planes of the tensor
- */
- void configure(ITensor *tensor, const PixelValue &constant_value);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- ITensor *_tensor;
- PixelValue _constant_value;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEMEMSETKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEMinMaxLayerKernel.h b/arm_compute/core/NEON/kernels/NEMinMaxLayerKernel.h
deleted file mode 100644
index 445e12af03..0000000000
--- a/arm_compute/core/NEON/kernels/NEMinMaxLayerKernel.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_NEMINMAXLAYERKERNEL_H
-#define ARM_COMPUTE_NEMINMAXLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "support/Mutex.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to perform min max search on a 3D tensor. */
-class NEMinMaxLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEMinMaxLayerKernel";
- }
- /** Default constructor */
- NEMinMaxLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEMinMaxLayerKernel(const NEMinMaxLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEMinMaxLayerKernel &operator=(const NEMinMaxLayerKernel &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEMinMaxLayerKernel(NEMinMaxLayerKernel &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEMinMaxLayerKernel &operator=(NEMinMaxLayerKernel &&) = delete;
- /** Default destructor */
- ~NEMinMaxLayerKernel() = default;
-
- /** Initialise the kernel's input and outputs.
- *
- * @note output[0] = minimum
- * @note output[1] = maximum
- *
- * @param[in] input Input tensor with at least 3 dimensions. The dimensions over the third will be interpreted as batches. Data type supported: F32.
- * @param[out] output Output tensor with shape [2, batches, ...] which stores the minimum and maximum value for each 3D input tensor.
- * The dimensions over the second must match the batched dimensions of the input tensor. Data types supported: F32
- */
- void configure(const ITensor *input, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLMinMaxLayerKernel
- *
- * @param[in] input Input tensor info. Data types supported: F32.
- * @param[in] output Output tensor info with shape [2, batches, ...] which stores the minimum and maximum values for each 3D input tensor.
- * The dimensions over the second must match the batched dimensions of the input tensor. Data types supported: F32.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
- /** Resets global minimum and maximum. */
- void reset();
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- void update_min_max(float *out_ptr, float min, float max);
- const ITensor *_input;
- ITensor *_output;
- arm_compute::Mutex _mtx;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEMINMAXLAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h b/arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h
deleted file mode 100644
index 597a093d70..0000000000
--- a/arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEMINMAXLOCATIONKERNEL_H
-#define ARM_COMPUTE_NEMINMAXLOCATIONKERNEL_H
-
-#include "arm_compute/core/IArray.h"
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "support/Mutex.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-using IImage = ITensor;
-
-/** Interface for the kernel to perform min max search on an image. */
-class NEMinMaxKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEMinMaxKernel";
- }
- /** Default constructor */
- NEMinMaxKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEMinMaxKernel(const NEMinMaxKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEMinMaxKernel &operator=(const NEMinMaxKernel &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEMinMaxKernel(NEMinMaxKernel &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEMinMaxKernel &operator=(NEMinMaxKernel &&) = delete;
- /** Default destructor */
- ~NEMinMaxKernel() = default;
-
- /** Initialise the kernel's input and outputs.
- *
- * @param[in] input Input Image. Data types supported: U8/S16/F32.
- * @param[out] min Minimum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32.
- * @param[out] max Maximum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32.
- */
- void configure(const IImage *input, void *min, void *max);
- /** Resets global minimum and maximum. */
- void reset();
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Performs the min/max algorithm on U8 images on a given window.
- *
- * @param win The window to run the algorithm on.
- */
- void minmax_U8(Window win);
- /** Performs the min/max algorithm on S16 images on a given window.
- *
- * @param win The window to run the algorithm on.
- */
- void minmax_S16(Window win);
- /** Performs the min/max algorithm on F32 images on a given window.
- *
- * @param win The window to run the algorithm on.
- */
- void minmax_F32(Window win);
- /** Common signature for all the specialised MinMax functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using MinMaxFunction = void (NEMinMaxKernel::*)(Window window);
- /** MinMax function to use for the particular image types passed to configure() */
- MinMaxFunction _func;
- /** Helper to update min/max values **/
- template <typename T>
- void update_min_max(T min, T max);
-
- const IImage *_input; /**< Input image. */
- void *_min; /**< Minimum value. */
- void *_max; /**< Maximum value. */
- arm_compute::Mutex _mtx; /**< Mutex used for result reduction. */
-};
-
-/** Interface for the kernel to find min max locations of an image. */
-class NEMinMaxLocationKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEMinMaxLocationKernel";
- }
- /** Default constructor */
- NEMinMaxLocationKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEMinMaxLocationKernel(const NEMinMaxLocationKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEMinMaxLocationKernel &operator=(const NEMinMaxLocationKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEMinMaxLocationKernel(NEMinMaxLocationKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEMinMaxLocationKernel &operator=(NEMinMaxLocationKernel &&) = default;
- /** Default destructor */
- ~NEMinMaxLocationKernel() = default;
-
- /** Initialise the kernel's input and outputs.
- *
- * @param[in] input Input Image. Data types supported: U8/S16/F32.
- * @param[out] min Minimum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32.
- * @param[out] max Maximum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32.
- * @param[out] min_loc Array of minimum value locations.
- * @param[out] max_loc Array of maximum value locations.
- * @param[out] min_count Number of minimum value encounters.
- * @param[out] max_count Number of maximum value encounters.
- */
- void configure(const IImage *input, void *min, void *max,
- ICoordinates2DArray *min_loc = nullptr, ICoordinates2DArray *max_loc = nullptr,
- uint32_t *min_count = nullptr, uint32_t *max_count = nullptr);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- bool is_parallelisable() const override;
-
-private:
- /** Performs the min/max location algorithm on T type images on a given window.
- *
- * @param win The window to run the algorithm on.
- */
- template <class T, bool count_min, bool count_max, bool loc_min, bool loc_max>
- void minmax_loc(const Window &win);
- /** Common signature for all the specialised MinMaxLoc functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using MinMaxLocFunction = void (NEMinMaxLocationKernel::*)(const Window &window);
- /** MinMaxLoc function to use for the particular image types passed to configure() */
- MinMaxLocFunction _func;
- /** Helper to create a function pointer table for the parameterized MinMaxLocation functions. */
- template <class T, typename>
- struct create_func_table;
-
- const IImage *_input; /**< Input image. */
- void *_min; /**< Minimum value. */
- void *_max; /**< Maximum value. */
- uint32_t *_min_count; /**< Count of minimum value encounters. */
- uint32_t *_max_count; /**< Count of maximum value encounters. */
- ICoordinates2DArray *_min_loc; /**< Locations of minimum values. */
- ICoordinates2DArray *_max_loc; /**< Locations of maximum values. */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEMINMAXLOCATIONKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NENonLinearFilterKernel.h b/arm_compute/core/NEON/kernels/NENonLinearFilterKernel.h
deleted file mode 100644
index 43594bacbf..0000000000
--- a/arm_compute/core/NEON/kernels/NENonLinearFilterKernel.h
+++ /dev/null
@@ -1,151 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NENONLINEARFILTERKERNEL_H
-#define ARM_COMPUTE_NENONLINEARFILTERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to apply a non-linear filter */
-class NENonLinearFilterKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NENonLinearFilterKernel";
- }
- /** Default constructor */
- NENonLinearFilterKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NENonLinearFilterKernel(NENonLinearFilterKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NENonLinearFilterKernel &operator=(NENonLinearFilterKernel &) = delete;
- /** Allow instances of this class to be moved */
- NENonLinearFilterKernel(NENonLinearFilterKernel &&) = default;
- /** Allow instances of this class to be moved */
- NENonLinearFilterKernel &operator=(NENonLinearFilterKernel &&) = default;
- /** Set the source, destination and border mode of the kernel
- *
- * @param[in] input Source tensor. Data type supported: U8
- * @param[out] output Destination tensor. Data type supported: U8
- * @param[in] function Non linear function to perform
- * @param[in] mask_size Mask size. Supported sizes: 3, 5
- * @param[in] pattern Mask pattern
- * @param[in] mask The given mask. Will be used only if pattern is specified to PATTERN_OTHER
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ITensor *input, ITensor *output, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- /** Fill mask with the corresponding given pattern.
- *
- * @param[in,out] mask Mask to be filled according to pattern
- * @param[in] cols Columns (width) of mask
- * @param[in] rows Rows (height) of mask
- * @param[in] pattern Pattern to fill the mask according to
- */
- void fill_mask(uint8_t *mask, int cols, int rows, MatrixPattern pattern);
- /** Apply a median filter when given mask pattern is defined as box.
- *
- * @param[in] win Window to apply the filter on.
- */
- template <int mask_w, int mask_h>
- void median_filter_box(const Window &win);
- /** Apply a min filter when given mask pattern is defined as box.
- *
- * @param[in] win Window to apply the filter on.
- */
- template <int mask_w, int mask_h>
- void min_filter_box(const Window &win);
- /** Apply a max filter when given mask pattern is defined as box.
- *
- * @param[in] win Window to apply the filter on.
- */
- template <int mask_w, int mask_h>
- void max_filter_box(const Window &win);
- /** Apply a median filter when given mask pattern is defined as cross.
- *
- * @param[in] win Window to apply the filter on.
- */
- template <int mask_w, int mask_h>
- void median_filter_cross(const Window &win);
- /** Apply a min filter when given mask pattern is defined as cross.
- *
- * @param[in] win Window to apply the filter on.
- */
- template <int mask_w, int mask_h>
- void min_filter_cross(const Window &win);
- /** Apply a max filter when given mask pattern is defined as cross.
- *
- * @param[in] win Window to apply the filter on.
- */
- template <int mask_w, int mask_h>
- void max_filter_cross(const Window &win);
- /** Apply a median filter when given mask pattern is defined as disk.
- *
- * @param[in] win Window to apply the filter on.
- */
- template <int mask_w, int mask_h>
- void median_filter_disk(const Window &win);
- /** Apply a min filter when given mask pattern is defined as disk.
- *
- * @param[in] win Window to apply the filter on.
- */
- template <int mask_w, int mask_h>
- void min_filter_disk(const Window &win);
- /** Apply a max filter when given mask pattern is defined as disk.
- *
- * @param[in] win Window to apply the filter on.
- */
- template <int mask_w, int mask_h>
- void max_filter_disk(const Window &win);
- /** Apply a non-linear filter when given mask has user-defined pattern.
- *
- * @param[in] win Window to apply the filter on.
- */
- template <int mask_w, int mask_h>
- void non_linear_filter_generic(const Window &win);
-
-private:
- unsigned int _border_width;
- const ITensor *_input;
- ITensor *_output;
- const uint8_t *_mask;
- MatrixPattern _pattern;
- NonLinearFilterFunction _function;
- unsigned int _func_idx;
- BorderSize _border_size;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NENONLINEARFILTERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h b/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h
deleted file mode 100644
index e2ddec9a33..0000000000
--- a/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NENONMAXIMASUPPRESSION3x3KERNEL_H
-#define ARM_COMPUTE_NENONMAXIMASUPPRESSION3x3KERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface to perform Non-Maxima suppression over a 3x3 window using NEON
- *
- * @note Used by @ref NEFastCorners and @ref NEHarrisCorners
- */
-class NENonMaximaSuppression3x3Kernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NENonMaximaSuppression3x3Kernel";
- }
- /** Default constructor */
- NENonMaximaSuppression3x3Kernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NENonMaximaSuppression3x3Kernel(const NENonMaximaSuppression3x3Kernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NENonMaximaSuppression3x3Kernel &operator=(const NENonMaximaSuppression3x3Kernel &) = delete;
- /** Allow instances of this class to be moved */
- NENonMaximaSuppression3x3Kernel(NENonMaximaSuppression3x3Kernel &&) = default;
- /** Allow instances of this class to be moved */
- NENonMaximaSuppression3x3Kernel &operator=(NENonMaximaSuppression3x3Kernel &&) = default;
- /** Default destructor */
- ~NENonMaximaSuppression3x3Kernel() = default;
-
- /** Initialise the kernel's sources, destinations and border mode.
- *
- * @param[in] input Source tensor. Data types supported: U8/F32
- * @param[out] output Destination tensor. Data types supported: same as @p input
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ITensor *input, ITensor *output, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-protected:
- /** Common signature for all the specialised non-maxima suppression 3x3 functions
- *
- * @param[in] input_ptr Pointer to the input tensor.
- * @param[out] output_ptr Pointer to the output tensor
- * @param[in] input_stride Stride of the input tensor
- */
- using NonMaxSuppr3x3Function = void(const void *__restrict input_ptr, void *__restrict output_ptr, const uint32_t input_stride);
-
- NonMaxSuppr3x3Function *_func; /**< Non-Maxima suppression function to use for the particular tensor types passed to configure() */
- const ITensor *_input; /**< Source tensor */
- ITensor *_output; /**< Destination tensor */
-};
-
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-/** NEON kernel to perform Non-Maxima suppression 3x3 with intermediate results in FP16 if the input data type is FP32
- */
-class NENonMaximaSuppression3x3FP16Kernel : public NENonMaximaSuppression3x3Kernel
-{
-public:
- const char *name() const override
- {
- return "NENonMaximaSuppression3x3FP16Kernel";
- }
- /** Initialise the kernel's sources, destinations and border mode.
- *
- * @param[in] input Source tensor. Data types supported: U8/F32.
- * @param[out] output Destination tensor. Data types supported: same as @p input
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ITensor *input, ITensor *output, bool border_undefined);
-};
-#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-/** NEON kernel to perform Non-Maxima suppression 3x3 with intermediate results in FP16 if the input data type is FP32 */
-using NENonMaximaSuppression3x3FP16Kernel = NENonMaximaSuppression3x3Kernel;
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-} // namespace arm_compute
-#endif /* _ARM_COMPUTE_NENONMAXIMASUPPRESSION3x3KERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h b/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h
deleted file mode 100644
index 4727164d00..0000000000
--- a/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NENORMALIZATIONLAYERKERNEL_H
-#define ARM_COMPUTE_NENORMALIZATIONLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the normalization layer kernel.
- */
-class NENormalizationLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NENormalizationLayerKernel";
- }
- /** Default constructor */
- NENormalizationLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NENormalizationLayerKernel(const NENormalizationLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NENormalizationLayerKernel &operator=(const NENormalizationLayerKernel &) = delete;
- /** Default Move Constructor. */
- NENormalizationLayerKernel(NENormalizationLayerKernel &&) = default;
- /** Default move assignment operator */
- NENormalizationLayerKernel &operator=(NENormalizationLayerKernel &&) = default;
- /** Default destructor */
- ~NENormalizationLayerKernel() = default;
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
- * and an optional 4th dimension for batch of inputs. Data types supported: FP16/F32. Data layouts supported: NCHW/NHWC.
- * @param[in] input_squared Source with each element has been squared. 3 lower dims represent a single input with dimensions [width, height, IFM],
- * Data type and layout supported: same as @p input.
- * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type and layout supported: same as @p input.
- * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters.
- */
- void configure(const ITensor *input, const ITensor *input_squared, ITensor *output, NormalizationLayerInfo norm_info);
- /** Static function to check if given info will lead to a valid configuration of @ref NENormalizationLayerKernel
- *
- * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
- * and an optional 4th dimension for batch of inputs. Data types supported: FP16/F32. Data layouts supported: NCHW/NHWC.
- * @param[in] input_squared Source with each element has been squared. 3 lower dims represent a single input with dimensions [width, height, IFM],
- * Data type and layout supported: same as @p input.
- * @param[in] output Destination tensor. Output will have the same number of dimensions as input. Data type and layout supported: same as @p input.
- * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *input_squared, const ITensorInfo *output, NormalizationLayerInfo norm_info);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- /** Function to perform normalization depending on the given template
- * dimension. The second template parameter specifies whether the
- * normalization has to be 1D or 2D.
- *
- * @note Only supported normalizations are:
- * - 1D over X or Z
- * - 2D over X and Y
- *
- * @param[in] window Region on which to execute the kernel.
- */
- template <typename T, unsigned int S, unsigned int dim, bool do_2D_norm>
- void normalize_float(const Window &window);
-
- /** Common signature for all the specialised normalization functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using NormalizationFunction = void (NENormalizationLayerKernel::*)(const Window &window);
-
-private:
- NormalizationFunction _func;
- const ITensor *_input;
- const ITensor *_input_squared;
- ITensor *_output;
- NormalizationLayerInfo _norm_info;
- BorderSize _border_size;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NENORMALIZATIONLAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEPadLayerKernel.h b/arm_compute/core/NEON/kernels/NEPadLayerKernel.h
deleted file mode 100644
index 4cbefbd1e3..0000000000
--- a/arm_compute/core/NEON/kernels/NEPadLayerKernel.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEPADLAYERKERNEL_H
-#define ARM_COMPUTE_NEPADLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to add padding to a tensor
- *
- * Add padding given padding information
- */
-class NEPadLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEPadLayerKernel";
- }
- /** Default constructor */
- NEPadLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEPadLayerKernel(const NEPadLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEPadLayerKernel &operator=(const NEPadLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEPadLayerKernel(NEPadLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEPadLayerKernel &operator=(NEPadLayerKernel &&) = default;
- /** Default destructor */
- ~NEPadLayerKernel() = default;
-
- /** Initialize the function
- *
- * @param[in] input Source tensor. Data types supported: All.
- * @param[out] output Output tensor. Data type supported: same as @p input
- * @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i]
- * specifies the front and the end padding in the i-th dimension.
- * @param[in] constant_value (Optional) Constant value to be used for the padding
- * @param[in] mode (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT.
- * Only CONSTANT padding mode is currently supported
- */
- void configure(ITensor *input, ITensor *output, const PaddingList &padding, const PixelValue constant_value = PixelValue(), const PaddingMode mode = PaddingMode::CONSTANT);
- /** Static function to check if given info will lead to a valid configuration of @ref NEPadLayer.
- *
- * @param[in] input Source tensor info. Data types supported: All.
- * @param[in] output Output tensor info. Data type supported: same as @p input
- * @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i]
- * specifies the front and the end padding in the i-th dimension.
- * @param[in] constant_value (Optional) Constant value to be used for the padding
- * @param[in] mode (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT.
- * Only CONSTANT padding mode is currently supported
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding, const PixelValue constant_value = PixelValue(), const PaddingMode mode = PaddingMode::CONSTANT);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Template function to run the padding function with constant padding
- *
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
- */
- template <typename T>
- void run_pad_constant(const Window &window);
-
- /** Function to run the padding function with constant padding for 3D input and 1D, 2D, 3D padding
- *
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
- */
- void run_pad_constant_uint8_3Dinput_3Dpad(const Window &window);
-
- /** Common signature for all the specialised permute functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using PadFunctionPtr = void (NEPadLayerKernel::*)(const Window &window);
-
- PadFunctionPtr _func;
- const ITensor *_input;
- ITensor *_output;
- PaddingList _padding;
- PixelValue _constant_value;
- PaddingMode _mode;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEPADLAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEPermuteKernel.h b/arm_compute/core/NEON/kernels/NEPermuteKernel.h
deleted file mode 100644
index 89dc4e6fc7..0000000000
--- a/arm_compute/core/NEON/kernels/NEPermuteKernel.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEPERMUTEKERNEL_H
-#define ARM_COMPUTE_NEPERMUTEKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** NEON kernel to perform tensor permutation.
- *
- * Permutes given a permutation vector
- */
-class NEPermuteKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEPermuteKernel";
- }
- /** Default constructor */
- NEPermuteKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEPermuteKernel(const NEPermuteKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEPermuteKernel &operator=(const NEPermuteKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEPermuteKernel(NEPermuteKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEPermuteKernel &operator=(NEPermuteKernel &&) = default;
- /** Default destructor */
- ~NEPermuteKernel() = default;
-
- /** Set the input and output of the kernel.
- *
- * @note Arbitrary permutation vectors are supported with rank not greater than 4
- *
- * @param[in] input The input tensor to permute. Data types supported: All
- * @param[out] output The output tensor. Data types supported: Same as @p input
- * @param[in] perm Permutation vector
- */
- void configure(const ITensor *input, ITensor *output, const PermutationVector &perm);
- /** Static function to check if given info will lead to a valid configuration of @ref CPPPermuteKernel
- *
- * @note Arbitrary permutation vectors are supported with rank not greater than 4
- *
- * @param[in] input The input tensor to permute. Data types supported: All
- * @param[in] output The output tensor. Data types supported: Same as @p input
- * @param[in] perm Permutation vector
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PermutationVector &perm);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Template function to run the permute
- *
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
- */
- template <typename T>
- void run_permute(const Window &window);
-
- /** Common signature for all the specialised permute functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using PermuteFunctionPtr = void (NEPermuteKernel::*)(const Window &window);
-
- PermuteFunctionPtr _func;
- const ITensor *_input;
- ITensor *_output;
- PermutationVector _perm;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEPERMUTEKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h b/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h
deleted file mode 100644
index 1a9dd6be2e..0000000000
--- a/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h
+++ /dev/null
@@ -1,195 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEPIXELWISEMULTIPLICATIONKERNEL_H
-#define ARM_COMPUTE_NEPIXELWISEMULTIPLICATIONKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to perform addition between two tensors */
-class NEPixelWiseMultiplicationKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEPixelWiseMultiplicationKernel";
- }
- /** Default constructor */
- NEPixelWiseMultiplicationKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEPixelWiseMultiplicationKernel(const NEPixelWiseMultiplicationKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEPixelWiseMultiplicationKernel &operator=(const NEPixelWiseMultiplicationKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEPixelWiseMultiplicationKernel(NEPixelWiseMultiplicationKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEPixelWiseMultiplicationKernel &operator=(NEPixelWiseMultiplicationKernel &&) = default;
- /** Default destructor */
- ~NEPixelWiseMultiplicationKernel() = default;
- /** Initialise the kernel's input, output and border mode.
- *
- * @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported.
- * For all other scale values only round to zero (implemented as round towards minus infinity) is supported.
- *
- * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32
- * @param[in] input2 An input tensor. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), QASYMM8_SIGNED (only if @p input1 is QASYMM8_SIGNED), S16, QSYMM16 (only if @p input1 is QSYMM16), F16 (only if @p input1 is F16), F32 (only if @p input1 is F32).
- * @param[out] output Output tensor. Data types supported:
- * - U8, only if both inputs are U8.
- * - QASYMM8, only if both inputs are QASYMM8.
- * - QASYMM8_SIGNED, only if @p input1 is QASYMM8_SIGNED.
- * - S16.
- * - QSYMM16, only if both inputs are QSYMM16.
- * - S32, only if both inputs are QSYMM16.
- * - F16, only if @p input1 is F16.
- * - F32, only if both inputs are F32.
- * @param[in] scale Scale to apply after multiplication.
- * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
- * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if datatype is QASYMM8, QASYMM8_SIGNED or QSYMM16.
- * @param[in] rounding_policy Rounding policy.
- */
- void configure(const ITensor *input1, const ITensor *input2, ITensor *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy);
- /** Static function to check if given info will lead to a valid configuration of @ref NEPixelWiseMultiplicationKernel
- *
- * @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported.
- * For all other scale values only round to zero (implemented as round towards minus infinity) is supported.
- *
- * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32
- * @param[in] input2 An input tensor info. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), QASYMM8_SIGNED (only if @p input1 is QASYMM8_SIGNED), S16, QSYMM16 (only if @p input1 is QSYMM16), F16 (only if @p input1 is F16), F32 (only if @p input1 is F32).
- * @param[in] output Output tensor info. Data types supported:
- * - U8, only if both inputs are U8.
- * - QASYMM8, only if both inputs are QASYMM8.
- * - QASYMM8_SIGNED, only if @p input1 is QASYMM8_SIGNED.
- * - S16.
- * - QSYMM16, only if both inputs are QSYMM16.
- * - S32, only if both inputs are QSYMM16.
- * - F16, only if @p input1 is F16.
- * - F32, only if both inputs are F32.
- * @param[in] scale Scale to apply after multiplication.
- * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
- * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if datatype is QASYMM8, QASYMM8_SIGNED or QSYMM16.
- * @param[in] rounding_policy Rounding policy.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- /** Common signature for all the specialised multiplication functions with integer scaling factor
- *
- * @param[in] input1_ptr Pointer to the first input tensor.
- * @param[in] input2_ptr Pointer to the second input tensor.
- * @param[out] output_ptr Pointer to the output tensor.
- * @param[in] scale Integer scale factor.
- */
- using MulFunctionInt = void(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, int scale);
- /** Common signature for all the specialised multiplication functions with float scaling factor
- *
- * @param[in] input1_ptr Pointer to the first input tensor.
- * @param[in] input2_ptr Pointer to the second input tensor.
- * @param[out] output_ptr Pointer to the output tensor.
- * @param[in] scale Float scale factor.
- */
- using MulFunctionFloat = void(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, float scale);
- /** Common signature for all the specialised QASYMM8 multiplication functions with float scaling factor
- *
- * @param[in] input1_ptr Pointer to the first input tensor.
- * @param[in] input2_ptr Pointer to the second input tensor.
- * @param[out] output_ptr Pointer to the output tensor.
- * @param[in] scale Float scale factor.
- * @param[in] input1_qua_info Quantization Info of tensor input1.
- * @param[in] input2_qua_info Quantization Info of tensor input2.
- * @param[in] output_qua_info Quantization Info of tensor output.
- *
- */
- using MulFunctionQuantized = void(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, float scale,
- const UniformQuantizationInfo &input1_qua_info, const UniformQuantizationInfo &input2_qua_info, const UniformQuantizationInfo &output_qua_info);
-
- MulFunctionFloat *_func_float;
- MulFunctionInt *_func_int;
- MulFunctionQuantized *_func_quantized;
-
-private:
- const ITensor *_input1;
- const ITensor *_input2;
- ITensor *_output;
- float _scale;
- int _scale_exponent;
- bool _run_optimized_qasymm8;
-};
-
-/** Interface for the complex pixelwise multiplication kernel. */
-class NEComplexPixelWiseMultiplicationKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEComplexPixelWiseMultiplicationKernel";
- }
- /** Default constructor.*/
- NEComplexPixelWiseMultiplicationKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEComplexPixelWiseMultiplicationKernel(const NEComplexPixelWiseMultiplicationKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEComplexPixelWiseMultiplicationKernel &operator=(const NEComplexPixelWiseMultiplicationKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEComplexPixelWiseMultiplicationKernel(NEComplexPixelWiseMultiplicationKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEComplexPixelWiseMultiplicationKernel &operator=(NEComplexPixelWiseMultiplicationKernel &&) = default;
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] input1 An input tensor. Data types supported: F32. Number of channels supported: 2 (complex tensor).
- * @param[in] input2 An input tensor. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
- * @param[out] output The output tensor, Data types supported: same as @p input1. Number of channels supported: same as @p input1.
- */
- void configure(const ITensor *input1, const ITensor *input2, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEComplexPixelWiseMultiplicationKernel
- *
- * @param[in] input1 An input tensor info. Data types supported: F32. Number of channels supported: 2 (complex tensor).
- * @param[in] input2 An input tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
- * @param[in] output The output tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- const ITensor *_input1;
- const ITensor *_input2;
- ITensor *_output;
-};
-
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEPIXELWISEMULTIPLICATIONKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h
deleted file mode 100644
index b0574b7cf6..0000000000
--- a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h
+++ /dev/null
@@ -1,222 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEPOOLINGLAYERKERNEL_H
-#define ARM_COMPUTE_NEPOOLINGLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the pooling layer kernel */
-class NEPoolingLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEPoolingLayerKernel";
- }
- /** Default constructor */
- NEPoolingLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEPoolingLayerKernel(const NEPoolingLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEPoolingLayerKernel &operator=(const NEPoolingLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEPoolingLayerKernel(NEPoolingLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEPoolingLayerKernel &operator=(NEPoolingLayerKernel &&) = default;
- /** Default destructor */
- ~NEPoolingLayerKernel() = default;
- /** Set the input and output tensors.
- *
- * @note F16 are supported for pool sizes 2 and 3 only
- *
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[out] output Destination tensor. Data types supported: Same as @p input.
- * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
- * @param[out] indices (optional) The indices of the maximal values. Data type supported: U32.
- */
- void configure(const ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info, ITensor *indices = nullptr);
- /** Static function to check if given info will lead to a valid configuration of @ref NEPoolingLayerKernel
- *
- * @note F16 are supported for pool sizes 2 and 3 only
- *
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] output Destination tensor. Data types supported: Same as @p input.
- * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
- * @param[in] indices (optional) The indices of the maximal values. Data type supported: U32.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices = nullptr);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- /** Function to perform 2x2 pooling.
- *
- * @param[in] window_input Input region on which to execute the kernel.
- * @param[in] window Output region on which to execute the kernel.
- * @param[in] pooling_type Pooling operation to be computed.
- * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
- */
- void pooling2_f32_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
- /** Function to perform 2x2 pooling and compute the pooling indices. The indices can be used for max unpool.
- *
- * @param[in] window_input Input region on which to execute the kernel.
- * @param[in] window Output region on which to execute the kernel.
- */
- void pooling2_f32_nchw_maxpool_indices(const Window &window_input, const Window &window);
- /** Function to perform 2x2 pooling and compute the pooling indices. The indices can be used for max unpool.
- *
- * @param[in] window_input Input region on which to execute the kernel.
- * @param[in] window Output region on which to execute the kernel.
- */
- void pooling2_f32_nhwc_maxpool_indices(const Window &window_input, const Window &window);
- /** Function to perform MxN pooling for 32-bit floating point values.
- *
- * @param[in] window_input Input region on which to execute the kernel.
- * @param[in] window Output region on which to execute the kernel.
- * @param[in] pooling_type Pooling operation to be computed.
- * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
- */
- void poolingMxN_f32_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
- /** Function to perform MxN pooling for 32-bit floating point values (NHWC).
- *
- * @param[in] window_input Input region on which to execute the kernel.
- * @param[in] window Output region on which to execute the kernel.
- * @param[in] pooling_type Pooling operation to be computed.
- * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
- */
- void poolingMxN_f32_nhwc(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
- /** Function to perform 7x7 pooling.
- *
- * @param[in] window_input Input region on which to execute the kernel.
- * @param[in] window Output region on which to execute the kernel.
- * @param[in] pooling_type Pooling operation to be computed.
- * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
- */
- void pooling7_f32_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
- /** Function to perform 3x3 pooling.
- *
- * @param[in] window_input Input region on which to execute the kernel.
- * @param[in] window Output region on which to execute the kernel.
- * @param[in] pooling_type Pooling operation to be computed.
- * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
- */
- void pooling3_f32_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
- /** Function to perform 2x2 pooling for float16_t.
- *
- * @param[in] window_input Input region on which to execute the kernel.
- * @param[in] window Output region on which to execute the kernel.
- * @param[in] pooling_type Pooling operation to be computed.
- * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
- */
- void pooling2_f16_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
- /** Function to perform 3x3 pooling.
- *
- * @param[in] window_input Input region on which to execute the kernel.
- * @param[in] window Output region on which to execute the kernel.
- * @param[in] pooling_type Pooling operation to be computed.
- * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
- */
- void pooling3_f16_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
- /** Function to perform MxN pooling for 16-bit floating point values.
- *
- * @param[in] window_input Input region on which to execute the kernel.
- * @param[in] window Output region on which to execute the kernel.
- * @param[in] pooling_type Pooling operation to be computed.
- * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
- */
- void poolingMxN_f16_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
- /** Function to perform MxN pooling for 16-bit floating point values. (NHWC)
- *
- * @param[in] window_input Input region on which to execute the kernel.
- * @param[in] window Output region on which to execute the kernel.
- * @param[in] pooling_type Pooling operation to be computed.
- * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
- */
- void poolingMxN_f16_nhwc(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
- /** Template function to perform 2x2 pooling for 8bit quantized fixed point. (NCHW)
- *
- * @param[in] window_input Input region on which to execute the kernel.
- * @param[in] window Output region on which to execute the kernel.
- * @param[in] pooling_type Pooling operation to be computed.
- * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
- */
- template <typename T>
- void pooling2_q8_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
- /** Template function to perform 3x3 pooling for 8bit quantized fixed point. (NCHW)
- *
- * @param[in] window_input Input region on which to execute the kernel.
- * @param[in] window Output region on which to execute the kernel.
- * @param[in] pooling_type Pooling operation to be computed.
- * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
- */
- template <typename T>
- void pooling3_q8_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
- /** Template function to perform MxN pooling for 8-bit quantized. (NCHW)
- *
- * @param[in] window_input Input region on which to execute the kernel.
- * @param[in] window Output region on which to execute the kernel.
- * @param[in] pooling_type Pooling operation to be computed.
- * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
- */
- template <typename T>
- void poolingMxN_q8_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
- /** Template function to perform MxN pooling for 8-bit quantized. (NHWC)
- *
- * @param[in] window_input Input region on which to execute the kernel.
- * @param[in] window Output region on which to execute the kernel.
- * @param[in] pooling_type Pooling operation to be computed.
- * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
- */
- template <typename T>
- void poolingMxN_q8_nhwc(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
- /** Common signature for all the specialised Pooling functions
- *
- * @param[in] window_input Input region on which to execute the kernel.
- * @param[in] window Output region on which to execute the kernel.
- * @param[in] pooling_type Pooling operation to be computed.
- * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
- */
- using PoolingFunction = void (NEPoolingLayerKernel::*)(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding);
-
-private:
- PoolingFunction _func;
- const ITensor *_input;
- ITensor *_output;
- ITensor *_indices;
- PoolingLayerInfo _pool_info;
- DataLayout _data_layout;
- unsigned int _num_elems_processed_per_iteration;
- BorderSize _border_size;
- bool _is_square;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEPOOLINGLAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEPriorBoxLayerKernel.h b/arm_compute/core/NEON/kernels/NEPriorBoxLayerKernel.h
deleted file mode 100644
index 6bf6574568..0000000000
--- a/arm_compute/core/NEON/kernels/NEPriorBoxLayerKernel.h
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEPRIORBOXLAYERKERNEL_H
-#define ARM_COMPUTE_NEPRIORBOXLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to calculate prior boxes */
-class NEPriorBoxLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEPriorBoxLayerKernel";
- }
- /** Default constructor */
- NEPriorBoxLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEPriorBoxLayerKernel(const NEPriorBoxLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEPriorBoxLayerKernel &operator=(const NEPriorBoxLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEPriorBoxLayerKernel(NEPriorBoxLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEPriorBoxLayerKernel &operator=(NEPriorBoxLayerKernel &&) = default;
- /** Set the input and output tensors.
- *
- * @param[in] input1 First source tensor. Data types supported: F32. Data layouts supported: NCHW/NHWC.
- * @param[in] input2 Second source tensor. Data types and layouts supported: same as @p input1
- * @param[out] output Destination tensor. Output dimensions are [W * H * num_priors * 4, 2]. Data type supported: same as @p input
- * @param[in] info Prior box layer info.
- */
- void configure(const ITensor *input1, const ITensor *input2, ITensor *output, const PriorBoxLayerInfo &info);
- /** Static function to check if given info will lead to a valid configuration of @ref NEPriorBoxLayerKernel
- *
- * @param[in] input1 First source tensor info. Data types supported: F32. Data layouts supported: NCHW/NHWC.
- * @param[in] input2 Second source tensor info. Data types and layouts supported: same as @p input1
- * @param[in] output Destination tensor info. Output dimensions are [W * H * num_priors * 4, 2]. Data type supported: same as @p input
- * @param[in] info Prior box layer info.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const PriorBoxLayerInfo &info);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Stores the coordinates of the calculated prior boxes.
- *
- * @param[out] out Output pointer.
- * @param[in] offset Output offset to write to.
- * @param[in] center_x Center pixel value on x-axis.
- * @param[in] center_y Center pixel value on y-axis.
- * @param[in] box_width Prior box width.
- * @param[in] box_height Prior box height.
- * @param[in] width Input width.
- * @param[in] height Input height.
- */
- void store_coordinates(float *out, const int offset, const float center_x, const float center_y, const float box_width, const float box_height, const int width, const int height);
- /** Function to calculate prior boxes.
- *
- * @param[in] window Input region on which to execute the kernel.
- */
- void calculate_prior_boxes(const Window &window);
-
- const ITensor *_input1;
- const ITensor *_input2;
- ITensor *_output;
- PriorBoxLayerInfo _info;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEPRIORBOXLAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h b/arm_compute/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h
deleted file mode 100644
index f5e8da7feb..0000000000
--- a/arm_compute/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * Copyright (c) 2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEQLSTMLAYERNORMALIZATIONKERNEL_H
-#define ARM_COMPUTE_NEQLSTMLAYERNORMALIZATIONKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include <functional>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to perform layer normalization */
-class NEQLSTMLayerNormalizationKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEQLSTMLayerNormalizationKernel";
- }
- /** Default constructor */
- NEQLSTMLayerNormalizationKernel() = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEQLSTMLayerNormalizationKernel(const NEQLSTMLayerNormalizationKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEQLSTMLayerNormalizationKernel &operator=(const NEQLSTMLayerNormalizationKernel &) = delete;
- /** Default Move Constructor. */
- NEQLSTMLayerNormalizationKernel(NEQLSTMLayerNormalizationKernel &&) = default;
- /** Default move assignment operator */
- NEQLSTMLayerNormalizationKernel &operator=(NEQLSTMLayerNormalizationKernel &&) = default;
- /** Default destructor */
- ~NEQLSTMLayerNormalizationKernel() = default;
-
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. Data types supported: QSYMM16.
- * @param[out] output Destination tensor. Data types supported: Same as @p input.
- * @param[in] weight Weight tensor. Data types supported: Same as @p input.
- * @param[in] bias Bias tensor. Data types supported: S32
- */
- void configure(const ITensor *input, ITensor *output, const ITensor *weight, const ITensor *bias);
- /** Static function to check if given info will lead to a valid configuration of @ref NEQLSTMLayerNormalizationKernel
- *
- * @param[in] input Source tensor info. Data types supported: QSYMM16.
- * @param[in] output Destination tensor info. Data types supported: Same as @p input.
- * @param[in] weight Weight tensor info. Data types supported: Same as @p input.
- * @param[in] bias Bias tensor info. Data types supported: S32
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *weight, const ITensorInfo *bias);
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- // constants
- static constexpr uint32_t max_input_dimension{ 2 }; /**< The maximum input dimension supported */
- static constexpr uint32_t max_weight_dimension{ 1 }; /**< The maximum weight dimension supported */
- static constexpr uint32_t max_bias_dimension{ 1 }; /**< The maximum bias dimension supported */
- static constexpr uint32_t vector_size_byte{ 16 }; /**< Computation vector size in byte */
-
- using ComputeFuncType = std::function<void(NEQLSTMLayerNormalizationKernel &)>;
-
- ComputeFuncType _fn{}; /**< Function pointer to computation function */
-
- const ITensor *_input{ nullptr }; /**< Input tensor */
- const ITensor *_weight{ nullptr }; /**< Weight tensor */
- const ITensor *_bias{ nullptr }; /**< Bias tensor */
- ITensor *_output{ nullptr }; /**< Output tensor */
-
- int32_t _output_multiplier{}; /**< Multiplier for output values */
- int32_t _output_shift{}; /**< Shift value for output values */
-
- int32_t _window_start_x{}; /**< The beginning of x-axis iteration */
- int32_t _window_end_x{}; /**< The end of x-axis iteration */
- int32_t _window_step_x{}; /**< The size of x-axis iteration's step */
-
- Window _inout_window{}; /**< Window for input and output tensor */
- Window _weight_window{}; /**< Window for weight and bias tensor */
-
- /** Function to configure initial windows for destination of computation
- *
- * @param[in] Target destination tensor to use for output window
- *
- * @return configured window
- */
- Window configure_window(ITensor *target);
- // Function to compute for data type QSYMM16
- void compute_qsymm16();
- /** Function to compute summation and summation of squared input of the given input pointer
- *
- * @param[in] Input_ptr pointer to input array
- *
- */
- std::pair<int64_t, int64_t> sum_qsymm16(const int16_t *input_ptr);
- /** Function to normalize values using computed mean and standard deviation
- *
- * @param[in] input_ptr Pointer to input array
- * @param[in] output_ptr Pointer to output array
- * @param[in] weight_ptr Pointer to weight array
- * @param[in] bias_ptr Pointer to bias array
- * @param[in] mean Mean value
- * @param[in] inv_std_mul Quantized multiplier for standard deviation
- * @param[in] inv_std_shift Shift for standard deviation
- *
- */
- void normalize_qasymm16(const int16_t *input_ptr,
- int16_t *output_ptr,
- const int16_t *weight_ptr,
- const int32_t *bias_ptr,
- int32_t mean, int32_t inv_std_mul, int32_t inv_std_shift);
- /** Function to compute output quantization information */
- QuantizationInfo compute_output_qinfo();
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEQLSTMLAYERNORMALIZATIONKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h
deleted file mode 100644
index 087e767b73..0000000000
--- a/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEQUANTIZATIONLAYERKERNEL_H
-#define ARM_COMPUTE_NEQUANTIZATIONLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the quantization layer kernel.
- *
- * @note The implementation supports only 3D input tensors
- *
- */
-class NEQuantizationLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEQuantizationLayerKernel";
- }
- /** Default constructor */
- NEQuantizationLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEQuantizationLayerKernel(const NEQuantizationLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEQuantizationLayerKernel &operator=(const NEQuantizationLayerKernel &) = delete;
- /** Default Move Constructor. */
- NEQuantizationLayerKernel(NEQuantizationLayerKernel &&) = default;
- /** Default move assignment operator */
- NEQuantizationLayerKernel &operator=(NEQuantizationLayerKernel &&) = default;
- /** Default destructor */
- ~NEQuantizationLayerKernel() = default;
- /** Set the input, output.
- *
- * @param[in] input Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
- * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16.
- *
- * @note Output auto initialization is not supported by this kernel
- */
- void configure(const ITensor *input, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEQuantizationLayerKernel
- *
- * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
- * @param[in] output Output tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Common signature for all the specialised @ref NEQuantizationLayerKernel functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using QuantizationFunctionExecutorPtr = void (NEQuantizationLayerKernel::*)(const Window &window);
- /** Function to apply QASYMM8 or QASYMM8_SIGNED quantization on a tensor.
- *
- * @param[in] window Region on which to execute the kernel.
- */
- template <typename TIn, typename TOut>
- void run_quantize_qasymm8(const Window &window);
- /** Function to apply QASYMM16 quantization on a tensor.
- *
- * @param[in] window Region on which to execute the kernel.
- */
- template <typename T>
- void run_quantize_qasymm16(const Window &window);
-
- const ITensor *_input;
- ITensor *_output;
-
- QuantizationFunctionExecutorPtr _func;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEQUANTIZATIONLAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEROIAlignLayerKernel.h b/arm_compute/core/NEON/kernels/NEROIAlignLayerKernel.h
deleted file mode 100644
index bebcab5359..0000000000
--- a/arm_compute/core/NEON/kernels/NEROIAlignLayerKernel.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEROIALIGNLAYERKERNEL_H
-#define ARM_COMPUTE_NEROIALIGNLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the RoIAlign kernel.
- */
-class NEROIAlignLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEROIAlignLayerKernel";
- }
-
- /** Constructor */
- NEROIAlignLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEROIAlignLayerKernel(const NEROIAlignLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEROIAlignLayerKernel &operator=(const NEROIAlignLayerKernel &) = delete;
- /** Default Move Constructor. */
- NEROIAlignLayerKernel(NEROIAlignLayerKernel &&) = default;
- /** Default move assignment operator. */
- NEROIAlignLayerKernel &operator=(NEROIAlignLayerKernel &&) = default;
- /** Default destructor */
- ~NEROIAlignLayerKernel() = default;
-
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32.
- * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner
- * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ].
- * Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8, otherwise same as @p input
- * @param[out] output Destination tensor. Data types supported: Same as @p input.
- * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo.
- *
- * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled
- * width and pooled height.
- * @note The z dimensions of @p output tensor and @p input tensor must be the same.
- * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
- */
- void configure(const ITensor *input, const ITensor *rois, ITensor *output, const ROIPoolingLayerInfo &pool_info);
- /** Static function to check if given info will lead to a valid configuration of @ref NEROIAlignLayerKernel
- *
- * @param[in] input Source tensor info. Data types supported: QASYMM8/F16/F32.
- * @param[in] rois ROIs tensor info. Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8,
- * otherwise same as @p input
- * @param[in] output Destination tensor info. Data types supported: Same as @p input.
- * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo.
- *
- * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled
- * width and pooled height.
- * @note The z dimensions of @p output tensor and @p input tensor must be the same.
- * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
- *
- * @return a Status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *rois, ITensorInfo *output, const ROIPoolingLayerInfo &pool_info);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- template <DataLayout data_layout, typename input_data_type, typename roi_data_type = input_data_type>
- void internal_run(const Window &window, const ThreadInfo &info);
-
- const ITensor *_input;
- ITensor *_output;
- const ITensor *_rois;
- ROIPoolingLayerInfo _pool_info;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEROIALIGNLAYERKERNEL_H*/
diff --git a/arm_compute/core/NEON/kernels/NEROIPoolingLayerKernel.h b/arm_compute/core/NEON/kernels/NEROIPoolingLayerKernel.h
deleted file mode 100644
index 59a5017711..0000000000
--- a/arm_compute/core/NEON/kernels/NEROIPoolingLayerKernel.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEROIPOOLINGLAYERKERNEL_H
-#define ARM_COMPUTE_NEROIPOOLINGLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-#include "arm_compute/core/IArray.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the ROI pooling layer kernel */
-class NEROIPoolingLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEROIPoolingLayerKernel";
- }
- /** Default constructor */
- NEROIPoolingLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEROIPoolingLayerKernel(const NEROIPoolingLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEROIPoolingLayerKernel &operator=(const NEROIPoolingLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEROIPoolingLayerKernel(NEROIPoolingLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEROIPoolingLayerKernel &operator=(NEROIPoolingLayerKernel &&) = default;
- /** Default destructor */
- ~NEROIPoolingLayerKernel() = default;
-
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. Data types supported: F32.
- * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner
- * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. Data types supported: U16
- * @param[out] output Destination tensor. Data types supported: Same as @p input.
- * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo.
- *
- * @note The x and y dimensions of @p output tensor must be the same as that specified by @p pool_info 's pooled
- * width and pooled height.
- * @note The z dimensions of @p output tensor and @p input tensor must be the same.
- * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois tensor.
- */
- void configure(const ITensor *input, const ITensor *rois, ITensor *output, const ROIPoolingLayerInfo &pool_info);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input;
- const ITensor *_rois;
- ITensor *_output;
- ROIPoolingLayerInfo _pool_info;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEROIPOOLINGLAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NERangeKernel.h b/arm_compute/core/NEON/kernels/NERangeKernel.h
deleted file mode 100644
index e67a5dc945..0000000000
--- a/arm_compute/core/NEON/kernels/NERangeKernel.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NERANGEKERNEL_H
-#define ARM_COMPUTE_NERANGEKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Kernel class for Range
- *
- * range generates a 1-D tensor containing a sequence of numbers that begins at 'start' and extends by increments
- * of 'step' up to but not including 'end'.
- */
-class NERangeKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NERangeKernel";
- }
- /** Default constructor */
- NERangeKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NERangeKernel(const NERangeKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NERangeKernel &operator=(const NERangeKernel &) = delete;
- /** Allow instances of this class to be moved */
- NERangeKernel(NERangeKernel &&) = default;
- /** Allow instances of this class to be moved */
- NERangeKernel &operator=(NERangeKernel &&) = default;
- /** Default destructor */
- ~NERangeKernel() = default;
- /** Initialize the kernel's output tensor, start, end and step of the sequence.
- *
- * @param[out] output Output tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
- * @param[in] start The starting value of the sequence.
- * @param[in] end The ending (not including) value of the sequence.
- * @param[in] step The gap between each pair of values in the sequence.
- */
- void configure(ITensor *output, float start, float end, float step);
- /** Static function to check if given info will lead to a valid configuration of @ref NERangeKernel
- *
- * @param[in] output Output tensor info. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
- * @param[in] start The starting value of the sequence.
- * @param[in] end The ending (not including) value of the sequence.
- * @param[in] step The gap between each pair of values in the sequence.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *output, float start, float end, float step);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- using RangeFunction = void(ITensor *output, float start, float step, const Window &window);
-
- RangeFunction *_func; /**< Range function to be called */
- float _start; /**< Start of sequence */
- float _end; /**< End of sequence */
- float _step; /**< Increment/step value */
- ITensor *_output; /**< Destination tensor */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NERANGEKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEReductionOperationKernel.h b/arm_compute/core/NEON/kernels/NEReductionOperationKernel.h
deleted file mode 100644
index 28cca4987b..0000000000
--- a/arm_compute/core/NEON/kernels/NEReductionOperationKernel.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEREDUCTIONOPERATIONKERNEL_H
-#define ARM_COMPUTE_NEREDUCTIONOPERATIONKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to perform a reduction operation
- *
- * @note For ARG_MIN/ARG_MAX reduction, the default data type for an uninitialized
- * output tensor is signed 32-bit integer (S32). It is the user's responsibility
- * to check that the results do not overflow because the indices are computed
- * in unsigned 32-bit (U32).
- */
-class NEReductionOperationKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEReductionOperationKernel";
- }
- /** Default constructor */
- NEReductionOperationKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEReductionOperationKernel(const NEReductionOperationKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEReductionOperationKernel &operator=(const NEReductionOperationKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEReductionOperationKernel(NEReductionOperationKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEReductionOperationKernel &operator=(NEReductionOperationKernel &&) = default;
- /** Default destructor */
- ~NEReductionOperationKernel() = default;
-
- /** Set the source, destination of the kernel
- *
- * @param[in] input Source tensor. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32. Data layouts supported: NCHW.
- * @param[out] output Destination tensor.Data types and data layouts supported: same as @p input, S32 for ARG_MIX/ARG_MAX.
- * Output will have the same number of dimensions as input.
- * @param[in] axis Axis along which to reduce. Supported reduction axis : 0
- * @param[in] op Reduction operation to perform.
- */
- void configure(const ITensor *input, ITensor *output, unsigned int axis, ReductionOperation op);
-
- /** Static function to check if given info will lead to a valid configuration of @ref NEReductionOperationKernel.
- *
- * @param[in] input Source tensor info. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32. Data layouts supported: NCHW.
- * @param[in] output Destination tensor info.Data types and data layouts supported: same as @p input, S32 for ARG_MIX/ARG_MAX.
- * Output will have the same number of dimensions as input.
- * @param[in] axis Axis along which to reduce. Supported reduction axis : 0
- * @param[in] op Reduction operation to perform.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- const ITensor *_input;
- ITensor *_output;
- unsigned int _reduction_axis;
- ReductionOperation _op;
- BorderSize _border_size;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEREDUCTIONOPERATIONKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NERemapKernel.h b/arm_compute/core/NEON/kernels/NERemapKernel.h
deleted file mode 100644
index e929b1c5d4..0000000000
--- a/arm_compute/core/NEON/kernels/NERemapKernel.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEREMAPKERNEL_H
-#define ARM_COMPUTE_NEREMAPKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to perform a remap on a tensor */
-class NERemapKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NERemapKernel";
- }
- /** Default constructor */
- NERemapKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NERemapKernel(const NERemapKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NERemapKernel &operator=(const NERemapKernel &) = delete;
- /** Allow instances of this class to be moved */
- NERemapKernel(NERemapKernel &&) = default;
- /** Allow instances of this class to be moved */
- NERemapKernel &operator=(NERemapKernel &&) = default;
- /** Default destructor */
- ~NERemapKernel() = default;
-
- /** Initialize the kernel's input, output and border mode.
- *
- * @param[in] input Source tensor. Data type supported: U8.
- * @param[in] map_x Map for X coordinates. Data type supported: F32.
- * @param[in] map_y Map for Y coordinates. Data type supported: F32.
- * @param[out] output Destination tensor. Data types supported: U8. All but the lowest two dimensions must be the same size as in the input tensor, i.e. remapping is only performed within the XY-plane.
- * @param[in] policy The interpolation type.
- */
- void configure(const ITensor *input, const ITensor *map_x, const ITensor *map_y, ITensor *output, InterpolationPolicy policy);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- /** function to perform nearest interpolation on the given window */
- void remap_nearest(const Window &window);
- /** function to perform bilinear interpolation on the given window */
- void remap_bilinear(const Window &window);
- /** Remap function to use for the particular interpolation type passed to configure() */
- void (NERemapKernel::*_func)(const Window &window);
-
- const ITensor *_input; /**< Input image */
- ITensor *_output; /**< Output image */
- const ITensor *_map_x; /**< Input remap x coordinates */
- const ITensor *_map_y; /**< Input remap y coordinates */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEREMAPKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEReorgLayerKernel.h b/arm_compute/core/NEON/kernels/NEReorgLayerKernel.h
deleted file mode 100644
index 9277ddbe47..0000000000
--- a/arm_compute/core/NEON/kernels/NEReorgLayerKernel.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEREORGLAYERKERNEL_H
-#define ARM_COMPUTE_NEREORGLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the kernel to perform tensor re-organization */
-class NEReorgLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEReorgLayerKernel";
- }
- /** Default constructor */
- NEReorgLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEReorgLayerKernel(const NEReorgLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEReorgLayerKernel &operator=(const NEReorgLayerKernel &) = delete;
- /** Default Move Constructor. */
- NEReorgLayerKernel(NEReorgLayerKernel &&) = default;
- /** Default move assignment operator */
- NEReorgLayerKernel &operator=(NEReorgLayerKernel &&) = default;
- /** Default destructor */
- ~NEReorgLayerKernel() = default;
- /** Set the input and output of the kernel
- *
- * @param[in] input Source tensor. Data type supported: All
- * @param[out] output Destination tensor. Data type supported: Same as @p input
- * @param[in] stride Stride to be used during data re-organization.
- * It defines the spatial distance between 2 consecutive pixels in the x and y direction
- */
- void configure(const ITensor *input, ITensor *output, int32_t stride);
-
- /** Static function to check if given info will lead to a valid configuration of @ref NEReshapeLayerKernel
- *
- * @param[in] input Source tensor info. Data type supported: All
- * @param[in] output Destination tensor info. Data type supported: Same as @p input
- * @param[in] stride Stride to be used during data re-organization
- * It defines the spatial distance between 2 consecutive pixels in the x and y direction
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t stride);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input;
- ITensor *_output;
- int32_t _stride;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEREORGLAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h b/arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h
deleted file mode 100644
index fccf2685a8..0000000000
--- a/arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NERESHAPELAYERKERNEL_H
-#define ARM_COMPUTE_NERESHAPELAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the kernel to perform tensor reshaping */
-class NEReshapeLayerKernel : public INESimpleKernel
-{
-public:
- const char *name() const override
- {
- return "NEReshapeLayerKernel";
- }
- /** Set the input and output of the kernel
- *
- * @param[in] input Source tensor. Data type supported: All
- * @param[out] output Destination tensor. Data type supported: Same as @p input
- */
- void configure(const ITensor *input, ITensor *output);
-
- /** Static function to check if given info will lead to a valid configuration of @ref NEReshapeLayerKernel
- *
- * @param[in] input Source tensor info. Data type supported: All
- * @param[in] output Destination tensor info. Data type supported: Same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NERESHAPELAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEReverseKernel.h b/arm_compute/core/NEON/kernels/NEReverseKernel.h
deleted file mode 100644
index 516653b70d..0000000000
--- a/arm_compute/core/NEON/kernels/NEReverseKernel.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEREVERSEKERNEL_H
-#define ARM_COMPUTE_NEREVERSEKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the reverse layer kernel. */
-class NEReverseKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEReverseKernel";
- }
- /** Default constructor */
- NEReverseKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEReverseKernel(const NEReverseKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEReverseKernel &operator=(const NEReverseKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEReverseKernel(NEReverseKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEReverseKernel &operator=(NEReverseKernel &&) = default;
- /** Default destructor */
- ~NEReverseKernel() = default;
- /** Initialise the kernel's inputs and output
- *
- * @param[in] input Input tensor. Data types supported: All
- * @param[out] output Output tensor. Data type supported: Same as @p input
- * @param[in] axis Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32
- */
- void configure(const ITensor *input, ITensor *output, const ITensor *axis);
-
- /** Static function to check if given info will lead to a valid configuration of @ref NEReverseKernel
- *
- * @param[in] input Input tensor info. Data types supported: All
- * @param[in] output Output tensor info. Data type supported: Same as @p input
- * @param[in] axis Axis tensor info. Contains the indices of the dimensions to reverse. Data type supported: U32
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *axis);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input;
- ITensor *_output;
- const ITensor *_axis;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEREVERSEKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEScaleKernel.h b/arm_compute/core/NEON/kernels/NEScaleKernel.h
deleted file mode 100644
index 9bc04129e0..0000000000
--- a/arm_compute/core/NEON/kernels/NEScaleKernel.h
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NESCALEKERNEL_H
-#define ARM_COMPUTE_NESCALEKERNEL_H
-
-#include "arm_compute/core/KernelDescriptors.h"
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to perform scaling on a tensor */
-class NEScaleKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEScaleKernel";
- }
- /** Default constructor */
- NEScaleKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEScaleKernel(const NEScaleKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEScaleKernel &operator=(const NEScaleKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEScaleKernel(NEScaleKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEScaleKernel &operator=(NEScaleKernel &&) = default;
- /** Default destructor */
- ~NEScaleKernel() = default;
-
- /** Initialise the kernel's inputs, output and interpolation policy
- *
- * @note dx, dy and offsets have the same dimensions (width and height) of the output tensor
- * @note Using @p policy Area only supports data layout NCHW and input data type U8.
- *
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/U8/S16/F16/F32.
- * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer. Data type supported: F32
- * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer. Data type supported: F32
- * @param[in] offsets Offset to access the pixel with NEAREST interpolation or the top-left pixel with BILINEAR interpolation in the input tensor. Data type supported: S32.
- * @param[out] output Destination tensor. Data types supported: Same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
- * @param[in] info @ref ScaleKernelInfo to use for configuration
- */
- void configure(const ITensor *input, const ITensor *dx, const ITensor *dy, const ITensor *offsets, ITensor *output,
- const ScaleKernelInfo &info);
- /** Static function to check if given info will lead to a valid configuration of @ref NEScaleKernel
- *
- * @note dx, dy and offsets have the same dimensions (width and height) of the output tensor
- * @note Using @p policy Area only supports data layout NCHW and input data type U8.
- *
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/U8/S16/F16/F32.
- * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer. Data type supported: F32
- * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer. Data type supported: F32
- * @param[in] offsets Offset to access the pixel with NEAREST interpolation or the top-left pixel with BILINEAR interpolation in the input tensor. Data type supported: S32.
- * @param[in] output Destination tensor. Data types supported: Same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
- * @param[in] info @ref ScaleKernelInfo to use for validation
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *dx, const ITensorInfo *dy, const ITensorInfo *offsets, ITensorInfo *output,
- const ScaleKernelInfo &info);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- /** function to perform scale using nearest interpolation on the given window */
- void scale_nearest_nchw(const Window &window);
- /** function to perform scale using bilinear interpolation on the given window */
- void scale_bilinear_nchw(const Window &window);
- /** function to perform scale using area interpolation on the given window
- *
- * @note Used only in case down-sampling.
- */
- void scale_area_nchw(const Window &window);
- /** function to perform scale on the given window */
- void scale_nhwc(const Window &window);
- /** Scale function to use for the particular interpolation type passed to configure() */
- void (NEScaleKernel::*_func)(const Window &window);
-
- const ITensor *_offsets;
- const ITensor *_dx;
- const ITensor *_dy;
- const ITensor *_input;
- ITensor *_output;
- InterpolationPolicy _policy;
- BorderSize _border_size;
- BorderMode _border_mode;
- PixelValue _constant_border_value;
- float _sampling_offset;
- bool _use_padding;
- bool _align_corners;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NESCALEKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEScharr3x3Kernel.h b/arm_compute/core/NEON/kernels/NEScharr3x3Kernel.h
deleted file mode 100644
index 320b44d307..0000000000
--- a/arm_compute/core/NEON/kernels/NEScharr3x3Kernel.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NESCHARR3x3KERNEL_H
-#define ARM_COMPUTE_NESCHARR3x3KERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to run a 3x3 Scharr filter on a tensor.
- *
-* @f[
-* \mathbf{G}_x=\begin{vmatrix}
-* -3 & 0 & +3\\
-* -10& 0 & +10\\
-* -3 & 0 & +3
-* \end{vmatrix}
-* @f]
-*/
-class NEScharr3x3Kernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEScharr3x3Kernel";
- }
- /** Default constructor */
- NEScharr3x3Kernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEScharr3x3Kernel(const NEScharr3x3Kernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEScharr3x3Kernel &operator=(const NEScharr3x3Kernel &) = delete;
- /** Allow instances of this class to be moved */
- NEScharr3x3Kernel(NEScharr3x3Kernel &&) = default;
- /** Allow instances of this class to be moved */
- NEScharr3x3Kernel &operator=(NEScharr3x3Kernel &&) = default;
- /** Default destructor */
- ~NEScharr3x3Kernel() = default;
-
- /** Initialise the kernel's source, destination and border.
- *
- * @note At least one of output_x or output_y must be set.
- *
- * @param[in] input Source tensor. Data type supported: U8.
- * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S16.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S16.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- bool _run_scharr_x; /**< Do we need to run Scharr X ? */
- bool _run_scharr_y; /**< Do we need to run Scharr Y ? */
- const ITensor *_input; /**< Input tensor */
- ITensor *_output_x; /**< Output tensor for scharr X */
- ITensor *_output_y; /**< Output tensor for scharr Y */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NESCHARR3x3KERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NESelectKernel.h b/arm_compute/core/NEON/kernels/NESelectKernel.h
deleted file mode 100644
index 51c8543ddc..0000000000
--- a/arm_compute/core/NEON/kernels/NESelectKernel.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NESELECTKERNEL_H
-#define ARM_COMPUTE_NESELECTKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the select kernel
- *
- * Select is computed by:
- * @f[ output(i) = condition(i) ? x(i) : y(i) @f]
- *
- */
-class NESelectKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NESelectKernel";
- }
- /** Default constructor */
- NESelectKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NESelectKernel(const NESelectKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NESelectKernel &operator=(const NESelectKernel &) = delete;
- /** Allow instances of this class to be moved */
- NESelectKernel(NESelectKernel &&) = default;
- /** Allow instances of this class to be moved */
- NESelectKernel &operator=(NESelectKernel &&) = default;
- /** Default destructor */
- ~NESelectKernel() = default;
-
- /** Common signature for all the specialised elementwise functions
- *
- * @param[in] c Condition input tensor. Data types supported: U8.
- * @param[in] x First input tensor. Data types supported: All.
- * @param[out] y Second input tensor. Data types supported: Same as @p x
- * @param[in] output Output tensor. Data types supported: Same as @p x
- */
- void configure(const ITensor *c, const ITensor *x, const ITensor *y, ITensor *output);
-
- /** Validate the argument passed to the kernel
- *
- * @param[in] c Condition input tensor. Data types supported: U8.
- * @param[in] x First input tensor. Data types supported: All.
- * @param[in] y Second input tensor. Data types supported: Same as @p x
- * @param[in] output Output tensor. Data types supported: Same as @p x.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *c, const ITensorInfo *x, const ITensorInfo *y, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Common signature for all the specialised select functions
- *
- * @param[in] c Condition input tensor. Data types supported: U8.
- * @param[in] x First input tensor. Data types supported: All.
- * @param[in] y Second input tensor. Data types supported: Same as @p x
- * @param[in] output Output tensor. Data types supported: Same as @p x.
- */
- using SelectFunction = void(const ITensor *c, const ITensor *x, const ITensor *y, ITensor *output, const Window &window);
-
- /** Select function to use for the particular tensor types passed to configure() */
- SelectFunction *_function;
- const ITensor *_c; /**< Condition tensor */
- const ITensor *_x; /**< Source tensor 1 */
- const ITensor *_y; /**< Source tensor 2 */
- ITensor *_output; /**< Destination tensor */
- bool _has_same_rank; /**< Flag that indicates if condition tensor and other inputs have the same rank */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NESELECTKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NESobel3x3Kernel.h b/arm_compute/core/NEON/kernels/NESobel3x3Kernel.h
deleted file mode 100644
index ef0db2a428..0000000000
--- a/arm_compute/core/NEON/kernels/NESobel3x3Kernel.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NESOBEL3x3KERNEL_H
-#define ARM_COMPUTE_NESOBEL3x3KERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to run a 3x3 Sobel X filter on a tensor.
- *
- * @f[
- * \mathbf{G}_x=\begin{vmatrix}
- * -1 & 0 & +1\\
- * -2 & 0 & +2\\
- * -1 & 0 & +1
- * \end{vmatrix}
- * @f]
-*/
-class NESobel3x3Kernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NESobel3x3Kernel";
- }
- /** Default constructor */
- NESobel3x3Kernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NESobel3x3Kernel(const NESobel3x3Kernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NESobel3x3Kernel &operator=(const NESobel3x3Kernel &) = delete;
- /** Allow instances of this class to be moved */
- NESobel3x3Kernel(NESobel3x3Kernel &&) = default;
- /** Allow instances of this class to be moved */
- NESobel3x3Kernel &operator=(NESobel3x3Kernel &&) = default;
- /** Default destructor */
- ~NESobel3x3Kernel() = default;
-
- /** Initialise the kernel's source, destination and border mode.
- *
- * @note At least one of output_x or output_y must be set.
- *
- * @param[in] input Source tensor. Data type supported: U8.
- * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S16.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S16.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- bool _run_sobel_x; /**< Do we need to run Sobel X ? */
- bool _run_sobel_y; /**< Do we need to run Sobel Y ? */
- const ITensor *_input; /**< Input tensor */
- ITensor *_output_x; /**< Output tensor for sobel X */
- ITensor *_output_y; /**< Output tensor for sobel Y */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NESOBEL3x3KERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NESobel5x5Kernel.h b/arm_compute/core/NEON/kernels/NESobel5x5Kernel.h
deleted file mode 100644
index bc0cfb016e..0000000000
--- a/arm_compute/core/NEON/kernels/NESobel5x5Kernel.h
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NESOBEL5x5KERNEL_H
-#define ARM_COMPUTE_NESOBEL5x5KERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to run the horizontal pass of 5x5 Sobel filter on a tensor.
- *
- */
-class NESobel5x5HorKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NESobel5x5HorKernel";
- }
- /** Default constructor */
- NESobel5x5HorKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NESobel5x5HorKernel(const NESobel5x5HorKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NESobel5x5HorKernel &operator=(const NESobel5x5HorKernel &) = delete;
- /** Allow instances of this class to be moved */
- NESobel5x5HorKernel(NESobel5x5HorKernel &&) = default;
- /** Allow instances of this class to be moved */
- NESobel5x5HorKernel &operator=(NESobel5x5HorKernel &&) = default;
- /** Default destructor */
- ~NESobel5x5HorKernel() = default;
-
- /** Initialise the kernel's source, destination and border mode.
- *
- * @note At least one of output_x or output_y must be set
- *
- * @param[in] input Source tensor. Data type supported: U8.
- * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S16.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S16.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- const ITensor *_input; /**< Input tensor */
- ITensor *_output_x; /**< X output of horizontal pass */
- ITensor *_output_y; /**< Y output of horizontal pass */
- bool _run_sobel_x; /**< Do we need to run Sobel X? */
- bool _run_sobel_y; /**< Do we need to run Sobel Y? */
- BorderSize _border_size; /**< Border size */
-};
-
-/** Interface for the kernel to run the vertical pass of 5x5 Sobel Y filter on a tensor.
- *
-*/
-class NESobel5x5VertKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NESobel5x5VertKernel";
- }
- /** Default constructor */
- NESobel5x5VertKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NESobel5x5VertKernel(const NESobel5x5VertKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NESobel5x5VertKernel &operator=(const NESobel5x5VertKernel &) = delete;
- /** Allow instances of this class to be moved */
- NESobel5x5VertKernel(NESobel5x5VertKernel &&) = default;
- /** Allow instances of this class to be moved */
- NESobel5x5VertKernel &operator=(NESobel5x5VertKernel &&) = default;
- /** Default destructor */
- ~NESobel5x5VertKernel() = default;
-
- /** Initialise the kernel's source, destination and border mode.
- *
- * @param[in] input_x Input for X (X output of hor pass). Data type supported: S16.
- * @param[in] input_y Input for Y (Y output of hor pass). Data type supported: S16.
- * @param[out] output_x Destination tensor for the X gradient. Data type supported: S16.
- * @param[out] output_y Destination tensor for the Y gradient. Data type supported: S16.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(ITensor *input_x, ITensor *input_y, ITensor *output_x, ITensor *output_y, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- ITensor *_input_x; /**< X input (X output of the hor pass) */
- ITensor *_input_y; /**< Y input (Y output of the hor pass) */
- ITensor *_output_x; /**< X output of sobel */
- ITensor *_output_y; /**< Y output of sobel */
- bool _run_sobel_x; /**< Do we need to run sobel X? */
- bool _run_sobel_y; /**< Do we need to run sobel Y? */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NESOBEL5x5KERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NESobel7x7Kernel.h b/arm_compute/core/NEON/kernels/NESobel7x7Kernel.h
deleted file mode 100644
index 468a94d0d1..0000000000
--- a/arm_compute/core/NEON/kernels/NESobel7x7Kernel.h
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NESOBEL7x7KERNEL_H
-#define ARM_COMPUTE_NESOBEL7x7KERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to run the horizontal pass of 7x7 Sobel filter on a tensor.
- *
- */
-class NESobel7x7HorKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NESobel7x7HorKernel";
- }
- /** Default constructor */
- NESobel7x7HorKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NESobel7x7HorKernel(const NESobel7x7HorKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NESobel7x7HorKernel &operator=(const NESobel7x7HorKernel &) = delete;
- /** Allow instances of this class to be moved */
- NESobel7x7HorKernel(NESobel7x7HorKernel &&) = default;
- /** Allow instances of this class to be moved */
- NESobel7x7HorKernel &operator=(NESobel7x7HorKernel &&) = default;
- /** Default destructor */
- ~NESobel7x7HorKernel() = default;
-
- /** Initialise the kernel's source, destination and border mode.
- *
- * @note At least one of output_x or output_y must be set.
- *
- * @param[in] input Source tensor. Data type supported: U8.
- * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S32.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S32.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- const ITensor *_input; /**< Input tensor */
- ITensor *_output_x; /**< X output of horizontal pass */
- ITensor *_output_y; /**< Y output of horizontal pass */
- bool _run_sobel_x; /**< Do we need to run Sobel X? */
- bool _run_sobel_y; /**< Do we need to run Sobel Y? */
- BorderSize _border_size; /**< Border size */
-};
-
-/** Interface for the kernel to run the vertical pass of 7x7 Sobel Y filter on a tensor.
- *
-*/
-class NESobel7x7VertKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NESobel7x7VertKernel";
- }
- /** Default constructor */
- NESobel7x7VertKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NESobel7x7VertKernel(const NESobel7x7VertKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NESobel7x7VertKernel &operator=(const NESobel7x7VertKernel &) = delete;
- /** Allow instances of this class to be moved */
- NESobel7x7VertKernel(NESobel7x7VertKernel &&) = default;
- /** Allow instances of this class to be moved */
- NESobel7x7VertKernel &operator=(NESobel7x7VertKernel &&) = default;
- /** Default destructor */
- ~NESobel7x7VertKernel() = default;
-
- /** Initialise the kernel's source, destination and border mode.
- *
- * @note At least one of output_x or output_y must be set
- * @note If output_x is set then input_x must be set too
- * @note If output_y is set then input_y must be set too
- *
- * @param[in] input_x (Optional) Input for X (X output of hor pass). Data type supported: S32.
- * @param[in] input_y (Optional) Input for Y (Y output of hor pass). Data type supported: S32.
- * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S32.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S32.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ITensor *input_x, const ITensor *input_y, ITensor *output_x, ITensor *output_y, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- const ITensor *_input_x; /**< X input (X output of the hor pass) */
- const ITensor *_input_y; /**< Y input (Y output of the hor pass) */
- ITensor *_output_x; /**< X output of sobel */
- ITensor *_output_y; /**< Y output of sobel */
- bool _run_sobel_x; /**< Do we need to run sobel X? */
- bool _run_sobel_y; /**< Do we need to run sobel Y? */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NESOBEL7x7KERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h b/arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h
deleted file mode 100644
index 0e0be7936b..0000000000
--- a/arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NESOFTMAXLAYERKERNEL_H
-#define ARM_COMPUTE_NESOFTMAXLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the identifying the max value of 1D Logits */
-class NELogits1DMaxKernel : public INESimpleKernel
-{
-public:
- const char *name() const override
- {
- return "NELogits1DMaxKernel";
- }
- /** Default constructor */
- NELogits1DMaxKernel();
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[out] output Destination tensor. Data types supported: same as @p input
- */
- void configure(const ITensor *input, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NELogits1DMaxKernel
- *
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] output Destination tensor. Data types supported: same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- using Logits1DMaxFunction = void(const ITensor &in, ITensor &out, const Window &window);
-
-private:
- Logits1DMaxFunction *_func;
- BorderSize _border_size;
-};
-
-/** Interface for softmax computation for QASYMM8 with pre-computed max. */
-template <bool IS_LOG = false>
-class NELogits1DSoftmaxKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- if(IS_LOG)
- {
- return "NELogits1DSoftmaxKernel";
- }
- else
- {
- return "NELogits1DLogSoftmaxKernel";
- }
- }
- /** Default constructor */
- NELogits1DSoftmaxKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NELogits1DSoftmaxKernel(const NELogits1DSoftmaxKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NELogits1DSoftmaxKernel &operator=(const NELogits1DSoftmaxKernel &) = delete;
- /** Allow instances of this class to be moved */
- NELogits1DSoftmaxKernel(NELogits1DSoftmaxKernel &&) = default;
- /** Allow instances of this class to be moved */
- NELogits1DSoftmaxKernel &operator=(NELogits1DSoftmaxKernel &&) = default;
- /** Default destructor */
- ~NELogits1DSoftmaxKernel() = default;
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] max Max values tensor. Same shape as input with dimension 0 set to 1.
- * Data types supported: same as @p input.
- * @param[out] output Destination tensor. Data types supported: same as @p input.
- * @param[in] beta A scaling factor for the exponent.
- *
- * @param tmp Auxiliary tensor. Must be type F32 and same shape as the input.
- */
- void configure(const ITensor *input, const ITensor *max, ITensor *output, const float beta, ITensor *tmp);
- /** Static function to check if given info will lead to a valid configuration of @ref NELogits1DSoftmaxKernel
- *
- * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] max Max values tensor info. Same shape as input with dimension 0 set to 1.
- * Data types supported: same as @p input.
- * @param[in] output Destination tensor info. Data types supported: same as @p input.
- * @param[in] beta A scaling factor for the exponent.
- * @param[in] tmp Tensor info of auxiliary. Must be type F32 and same shape as the input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *max,
- const ITensorInfo *output, const float beta, const ITensorInfo *tmp);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- using LogitsSoftmaxFunction = void(const ITensor &in, const ITensor &max, void *const tmp, ITensor &out, const float beta,
- const Window &window);
-
- LogitsSoftmaxFunction *_func;
- const ITensor *_input;
- const ITensor *_max;
- ITensor *_output;
- float _beta;
- ITensor *_tmp; //Temporary. Used internally
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NESOFTMAXLAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NESpaceToBatchLayerKernel.h b/arm_compute/core/NEON/kernels/NESpaceToBatchLayerKernel.h
deleted file mode 100644
index 532fbb2852..0000000000
--- a/arm_compute/core/NEON/kernels/NESpaceToBatchLayerKernel.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Copyright (c) 2019-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NESPACETOBATCHLAYERKERNEL_H
-#define ARM_COMPUTE_NESPACETOBATCHLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-// Forward declaration
-class ITensor;
-
-/** Interface for the space to batch kernel */
-class NESpaceToBatchLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NESpaceToBatchLayerKernel";
- }
- /** Default constructor */
- NESpaceToBatchLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NESpaceToBatchLayerKernel(const NESpaceToBatchLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NESpaceToBatchLayerKernel &operator=(const NESpaceToBatchLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- NESpaceToBatchLayerKernel(NESpaceToBatchLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- NESpaceToBatchLayerKernel &operator=(NESpaceToBatchLayerKernel &&) = default;
- /** Default destructor */
- ~NESpaceToBatchLayerKernel() = default;
- /** Initialise the kernel's inputs and output.
- *
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
- * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32
- * @param[in] paddings 2-D tensor with shape [2, M]. Data types supported: S32
- * @param[out] output Tensor output. Data types supported: same as @p input
- */
- void configure(const ITensor *input, const ITensor *block_shape, const ITensor *paddings, ITensor *output);
- /** Initialise the kernel's input and output. (Static block shape and paddings)
- *
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
- * @param[in] block_shape_x Block shape x value.
- * @param[in] block_shape_y Block shape y value.
- * @param[in] padding_left The left padding of the output tensor.
- * @param[in] padding_right The right padding of the output tensor.
- * @param[out] output Tensor output. Data types supported: same as @p input
- */
- void configure(const ITensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NESpaceToBatchLayerKernel
- *
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
- * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32
- * @param[in] paddings 2-D tensor with shape [2, M]. Data types supported: S32
- * @param[in] output Tensor output. Data types supported: same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *paddings, const ITensorInfo *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NESpaceToBatchLayerKernel (Static block shape and paddings)
- *
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
- * @param[in] block_shape_x Block shape x value.
- * @param[in] block_shape_y Block shape y value.
- * @param[in] padding_left The left padding of the output tensor.
- * @param[in] padding_right The right padding of the output tensor.
- * @param[in] output Tensor output. Data types supported: same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input; /**< Source tensor */
- const ITensor *_block_shape; /**< Block shape tensor */
- const ITensor *_paddings; /**< Paddings tensor */
- ITensor *_output; /**< Destination tensor */
- DataLayout _data_layout; /**< Data layout to be used at run-time */
-
- Size2D _padding_left;
- int _block_shape_x;
- int _block_shape_y;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NESPACETOBATCHLAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernel.h b/arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernel.h
deleted file mode 100644
index e0c22e65fb..0000000000
--- a/arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernel.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NESPACETODEPTHLAYERKERNEL_H
-#define ARM_COMPUTE_NESPACETODEPTHLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the space to depth kernel */
-class NESpaceToDepthLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NESpaceToDepthLayerKernel";
- }
- /** Default constructor */
- NESpaceToDepthLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NESpaceToDepthLayerKernel(const NESpaceToDepthLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NESpaceToDepthLayerKernel &operator=(const NESpaceToDepthLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- NESpaceToDepthLayerKernel(NESpaceToDepthLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- NESpaceToDepthLayerKernel &operator=(NESpaceToDepthLayerKernel &&) = default;
- /** Default destructor */
- ~NESpaceToDepthLayerKernel() = default;
- /** Initialise the kernel's inputs and output.
- *
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
- * @param[out] output Tensor output. Data types supported: same as @p input
- * @param[in] block_shape Block shape value
- */
- void configure(const ITensor *input, ITensor *output, int32_t block_shape);
- /** Static function to check if given info will lead to a valid configuration of @ref NESpaceToDepthLayerKernel
- *
- * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All.
- * @param[in] output Tensor output info. Data types supported: same as @p input
- * @param[in] block_shape Block shape value
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input; /**< Source tensor */
- ITensor *_output; /**< Destination tensor */
- int32_t _block_shape; /**< Block shape */
- DataLayout _data_layout; /**< Data layout of the operation */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NESPACETODEPTHLAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEStackLayerKernel.h b/arm_compute/core/NEON/kernels/NEStackLayerKernel.h
deleted file mode 100644
index c4dc53eac6..0000000000
--- a/arm_compute/core/NEON/kernels/NEStackLayerKernel.h
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_NESTACKLAYERKERNEL_H
-#define ARM_COMPUTE_NESTACKLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to stacks a rank-R tensor into one with rank-(R+1) along the axis dimension.*/
-class NEStackLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEStackLayerKernel";
- }
- /** Default constructor */
- NEStackLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEStackLayerKernel(const NEStackLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEStackLayerKernel &operator=(const NEStackLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEStackLayerKernel(NEStackLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEStackLayerKernel &operator=(NEStackLayerKernel &&) = default;
- /** Default destructor */
- ~NEStackLayerKernel() = default;
- /** Initialise the kernel's inputs and output
- *
- * @note Supported input tensor rank: up to 4
- *
- * @param[in] input Input tensor. Data types supported: All
- * @param[in] axis The dimension to stack the tensors along. It must be smaller than the number of input dimensions.
- * @param[in] idx_input Index of the input tensor in the list of tensors to stack.
- * All tensors in the list must have the same shape
- * @param[in] num_tensors Number of tensors to stack
- * @param[out] output Output tensor. Data types supported: Same as @p input.
- *
- */
- void configure(const ITensor *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEStackLayerKernel
- *
- * @note Supported input tensor rank: up to 4
- *
- * @param[in] input Input tensor info. Data types supported: All
- * @param[in] axis The dimension to stack the tensors along. It must be smaller than the number of input dimensions.
- * @param[in] idx_input Index of the input tensor in the list of tensors to stack
- * All tensors in the list must have the same shape
- * @param[in] num_tensors Number of tensors to stack
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, const ITensorInfo *output);
-
- // Inherited methods overridden
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input;
- ITensor *_output;
- unsigned int _axis;
- unsigned int _idx_input;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NESTACKLAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEStridedSliceKernel.h b/arm_compute/core/NEON/kernels/NEStridedSliceKernel.h
deleted file mode 100644
index 6709619a62..0000000000
--- a/arm_compute/core/NEON/kernels/NEStridedSliceKernel.h
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NE_STRIDED_SLICE_KERNEL_H
-#define ARM_COMPUTE_NE_STRIDED_SLICE_KERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the kernel to perform tensor strided slicing */
-class NEStridedSliceKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEStridedSliceKernel";
- }
- /** Default constructor */
- NEStridedSliceKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEStridedSliceKernel(const NEStridedSliceKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEStridedSliceKernel &operator=(const NEStridedSliceKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEStridedSliceKernel(NEStridedSliceKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEStridedSliceKernel &operator=(NEStridedSliceKernel &&) = default;
- /** Default destructor */
- ~NEStridedSliceKernel() = default;
- /** Configure kernel
- *
- * @note Supported tensor rank: up to 4
- *
- * @param[in] input Source tensor. Data type supported: All
- * @param[out] output Destination tensor. Data type supported: Same as @p input
- * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
- * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
- * @param[in] strides The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input).
- * @param[in] begin_mask If the ith bit of begin_mask is set, starts[i] is ignored and the fullest possible range in that dimension is used instead.
- * @param[in] end_mask If the ith bit of end_mask is set, ends[i] is ignored and the fullest possible range in that dimension is used instead.
- * @param[in] shrink_axis_mask If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1.
- * A slice of size 1 starting from starts[i] in the dimension must be preserved.
- */
- void configure(const ITensor *input, ITensor *output,
- const Coordinates &starts, const Coordinates &ends, const BiStrides &strides,
- int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask);
-
- /** Static function to check if given info will lead to a valid configuration of @ref NEStridedSliceKernel
- *
- * @note Supported tensor rank: up to 4
- *
- * @param[in] input Source tensor info. Data type supported: All
- * @param[in] output Destination tensor info. Data type supported: Same as @p input
- * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
- * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
- * @param[in] strides The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input).
- * @param[in] begin_mask If the ith bit of begin_mask is set, starts[i] is ignored and the fullest possible range in that dimension is used instead.
- * @param[in] end_mask If the ith bit of end_mask is set, ends[i] is ignored and the fullest possible range in that dimension is used instead.
- * @param[in] shrink_axis_mask If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1.
- * A slice of size 1 starting from starts[i] in the dimension must be preserved.
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output,
- const Coordinates &starts, const Coordinates &ends, const BiStrides &strides,
- int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input; /**< Source tensor */
- ITensor *_output; /**< Destination tensor */
- Coordinates _starts_abs; /**< Absolute start coordinates */
- Coordinates _final_strides; /**< Final strides */
- int32_t _shrink_mask; /**< Shrink axis mask */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NE_STRIDED_SLICE_KERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NETableLookupKernel.h b/arm_compute/core/NEON/kernels/NETableLookupKernel.h
deleted file mode 100644
index 13a76cb40e..0000000000
--- a/arm_compute/core/NEON/kernels/NETableLookupKernel.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NETABLELOOKUPKERNEL_H
-#define ARM_COMPUTE_NETABLELOOKUPKERNEL_H
-
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-class ILut;
-
-/** Interface for the kernel to perform table lookup calculations. */
-class NETableLookupKernel : public INESimpleKernel
-{
-public:
- const char *name() const override
- {
- return "NETableLookupKernel";
- }
- /** Default constructor */
- NETableLookupKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NETableLookupKernel(const NETableLookupKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NETableLookupKernel &operator=(const NETableLookupKernel &) = delete;
- /** Allow instances of this class to be moved */
- NETableLookupKernel(NETableLookupKernel &&) = default;
- /** Allow instances of this class to be moved */
- NETableLookupKernel &operator=(NETableLookupKernel &&) = default;
- /** Initialise the kernel's input, lut and output.
- *
- * @param[in] input An input tensor. Data types supported: U8/S16.
- * @param[in] lut The input LUT.
- * @param[out] output The output tensor. Data types supported: same as @p input
- */
- void configure(const ITensor *input, const ILut *lut, ITensor *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Perform table lookup on a given window.
- *
- * @param window window Region on which to execute the kernel.
- */
- template <class T>
- void tableLookup(const Window &window);
- /** Common signature for all the specialised lut functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using TableLookupFunction = void (NETableLookupKernel::*)(const Window &window);
- /** Sub function to use for the particular tensor types passed to configure() */
- TableLookupFunction _func;
- const ILut *_lut;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NETABLELOOKUPKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEThresholdKernel.h b/arm_compute/core/NEON/kernels/NEThresholdKernel.h
deleted file mode 100644
index a6d1e9071c..0000000000
--- a/arm_compute/core/NEON/kernels/NEThresholdKernel.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NETHRESHOLDKERNEL_H
-#define ARM_COMPUTE_NETHRESHOLDKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the thresholding kernel
- *
- */
-class NEThresholdKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEThresholdKernel";
- }
- /** Constructor
- * Initialize all the pointers to nullptr and parameters to zero.
- */
- NEThresholdKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEThresholdKernel(const NEThresholdKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEThresholdKernel &operator=(const NEThresholdKernel &) = delete;
- /** Initialise the kernel's input, output and threshold parameters.
- *
- * @param[in] input An input tensor. Data type supported: U8
- * @param[out] output The output tensor. Data type supported: U8.
- * @param[in] threshold Threshold. When the threhold type is RANGE, this is used as the lower threshold.
- * @param[in] false_value value to set when the condition is not respected.
- * @param[in] true_value value to set when the condition is respected.
- * @param[in] type Thresholding type. Either RANGE or BINARY.
- * @param[in] upper Upper threshold. Only used when the thresholding type is RANGE.
- */
- void configure(const ITensor *input, ITensor *output, uint8_t threshold, uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** run binary thresholding on the given window */
- void run_binary(const Window &window);
- /** run range thresholding on the given window */
- void run_range(const Window &window);
-
- void (NEThresholdKernel::*_func)(const Window &window);
-
- const ITensor *_input; /**< Input */
- ITensor *_output; /**< Output */
- uint8_t _threshold;
- uint8_t _false_value;
- uint8_t _true_value;
- uint8_t _upper;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NETHRESHOLDKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NETileKernel.h b/arm_compute/core/NEON/kernels/NETileKernel.h
deleted file mode 100644
index a64470ffd0..0000000000
--- a/arm_compute/core/NEON/kernels/NETileKernel.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NETILEKERNEL_H
-#define ARM_COMPUTE_NETILEKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to perform a tile operation */
-class NETileKernel : public INEKernel
-{
-public:
- /** Default constructor */
- NETileKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- NETileKernel(const NETileKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- NETileKernel &operator=(const NETileKernel &) = delete;
- /** Allow instances of this class to be moved */
- NETileKernel(NETileKernel &&) = default;
- /** Allow instances of this class to be moved */
- NETileKernel &operator=(NETileKernel &&) = default;
- const char *name() const override
- {
- return "NETileKernel";
- }
- /** Set the source, destination of the kernel
- *
- * @param[in] input Source tensor. Data type supported: All.
- * @param[out] output Destination tensor. Same as @p input
- * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension.
- */
- void configure(const ITensor *input, ITensor *output, const Multiples &multiples);
- /** Static function to check if given info will lead to a valid configuration of @ref NETileKernel
- *
- * @param[in] input Source tensor info. Data type supported: All.
- * @param[in] output Destination tensor info. Same as @p input
- * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Multiples &multiples);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input;
- ITensor *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NETILEKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NETransposeKernel.h b/arm_compute/core/NEON/kernels/NETransposeKernel.h
deleted file mode 100644
index a14dece0d6..0000000000
--- a/arm_compute/core/NEON/kernels/NETransposeKernel.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NETRANSPOSEKERNEL_H
-#define ARM_COMPUTE_NETRANSPOSEKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel which transposes the elements of a matrix.
- *
- * [width, height, batch] -> [height, width, batch]
- *
- */
-class NETransposeKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NETransposeKernel";
- }
- /** Default constructor */
- NETransposeKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NETransposeKernel(const NETransposeKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NETransposeKernel &operator=(const NETransposeKernel &) = delete;
- /** Allow instances of this class to be moved */
- NETransposeKernel(NETransposeKernel &&) = default;
- /** Allow instances of this class to be moved */
- NETransposeKernel &operator=(NETransposeKernel &&) = default;
- /** Default destructor */
- ~NETransposeKernel() = default;
-
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor. Data types supported: All
- * @param[out] output Output tensor. Data type supported: Same as @p input
- */
- void configure(const ITensor *input, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NETransposeKernel
- *
- * @param[in] input Input tensor. Data types supported: All
- * @param[in] output Output tensor. Data type supported: Same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Common signature for all the transpose functions
- *
- * @param[in] input An input tensor. Data types supported: All
- * @param[out] output The output tensor. Data type supported: same as @p input
- * @param[in] window Region on which to execute the kernel.
- */
- using TransposeFunction = void(const ITensor *input, ITensor *output, const Window &window);
- /** Transpose function to use for the particular tensor types passed to configure() */
- TransposeFunction *_func;
- const ITensor *_input;
- ITensor *_output;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NETRANSPOSEKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEUpsampleLayerKernel.h b/arm_compute/core/NEON/kernels/NEUpsampleLayerKernel.h
deleted file mode 100644
index 1ea3f974e7..0000000000
--- a/arm_compute/core/NEON/kernels/NEUpsampleLayerKernel.h
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEUPSAMPLELAYERKERNEL_H
-#define ARM_COMPUTE_NEUPSAMPLELAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the Upsample layer kernel.*/
-class NEUpsampleLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEUpsampleLayerKernel";
- }
- /** Default constructor */
- NEUpsampleLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEUpsampleLayerKernel(const NEUpsampleLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEUpsampleLayerKernel &operator=(const NEUpsampleLayerKernel &) = delete;
- /** Default Move Constructor. */
- NEUpsampleLayerKernel(NEUpsampleLayerKernel &&) = default;
- /** Default move assignment operator */
- NEUpsampleLayerKernel &operator=(NEUpsampleLayerKernel &&) = default;
- /** Default destructor */
- ~NEUpsampleLayerKernel() = default;
- /** Set the input output tensors.
- *
- * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32.
- * @param[out] output Destination tensor. Data types supported: same as @p input.
- * @param[in] info Contains stride information described in @ref Size2D.
- * @param[in] policy Defines the policy to fill the intermediate pixels.
- *
- */
- void configure(const ITensor *input, ITensor *output, const Size2D &info, const InterpolationPolicy policy);
- /** Static function to check if given info will lead to a valid configuration of @ref NEUpsampleLayerKernel
- *
- * @param[in] input Source tensor info. Data types supported: QASYMM8/F16/F32.
- * @param[in] output Destination tensor info. Data types supported: same as @p input.
- * @param[in] info Contains stride information described in @ref Size2D.
- * @param[in] policy Defines the policy to fill the intermediate pixels.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &info, const InterpolationPolicy policy);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Function to run upsample layer (NCHW)
- *
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
- */
- template <typename T, int S>
- void upsample_nchw(const Window &window);
- /** Function to run upsample layer (NHWC)
- *
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
- */
- template <typename T, int S>
- void upsample_nhwc(const Window &window);
-
- using UpsampleFunctionPtr = void (NEUpsampleLayerKernel::*)(const Window &window);
-
-private:
- UpsampleFunctionPtr _func;
- const ITensor *_input;
- ITensor *_output;
- Size2D _info;
- unsigned int _num_elems_processed_per_iteration_x;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEUPSAMPLELAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEWarpKernel.h b/arm_compute/core/NEON/kernels/NEWarpKernel.h
deleted file mode 100644
index 61ca21eb48..0000000000
--- a/arm_compute/core/NEON/kernels/NEWarpKernel.h
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEWARPKERNEL_H
-#define ARM_COMPUTE_NEWARPKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-#include <array>
-#include <cstdint>
-namespace arm_compute
-{
-class ITensor;
-
-/** Common interface for warp affine and warp perspective */
-class INEWarpKernel : public INEKernel
-{
-public:
- /** Default constructor */
- INEWarpKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- INEWarpKernel(const INEWarpKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- INEWarpKernel &operator=(const INEWarpKernel &) = delete;
- /** Allow instances of this class to be moved */
- INEWarpKernel(INEWarpKernel &&) = default;
- /** Allow instances of this class to be moved */
- INEWarpKernel &operator=(INEWarpKernel &&) = default;
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] input Source tensor. Data type supported: U8.
- * @param[out] output Destination tensor. Data type supported: U8.
- * @param[in] matrix The perspective or affine matrix to use. Must be 2x3 for affine and 3x3 for perspective of type float.
- * The matrix argument requires 9 values, for the affine case the last 3 values are ignored.
- * @param[in] border_mode Strategy to use for borders
- * @param[in] constant_border_value Constant value used for filling the border.
- */
- virtual void configure(const ITensor *input, ITensor *output, const std::array<float, 9> &matrix, BorderMode border_mode, uint8_t constant_border_value);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
- // Inherited methods overridden:
- BorderSize border_size() const override;
-
-protected:
- /** function to perform warp affine or warp perspective on the given window when border mode == UNDEFINED
- *
- * @param[in] window Region on which to execute the kernel
- */
- virtual void warp_undefined(const Window &window) = 0;
- /** function to perform warp affine or warp perspective on the given window when border mode == CONSTANT
- *
- * @param[in] window Region on which to execute the kernel
- */
- virtual void warp_constant(const Window &window) = 0;
- /** function to perform warp affine or warp perspective on the given window when border mode == REPLICATE
- *
- * @param[in] window Region on which to execute the kernel
- */
- virtual void warp_replicate(const Window &window) = 0;
- /** Common signature for all the specialised warp functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- void (INEWarpKernel::*_func)(const Window &window);
-
- const ITensor *_input; /**< Input Tensor */
- ITensor *_output; /**< Output Tensor */
- uint8_t _constant_border_value; /**< Constant value used for filling the border. This value is used for those pixels out of the ROI when the border mode is CONSTANT */
- std::array<float, 9> _matrix; /**< The affine or perspective matrix. Must be 2x3 for warp affine or 3x3 for warp perspective of type float. */
-};
-
-/** Template interface for the kernel to compute warp affine
- *
- */
-template <InterpolationPolicy interpolation>
-class NEWarpAffineKernel : public INEWarpKernel
-{
-private:
- const char *name() const override
- {
- return "NEWarpAffineKernel";
- }
- // Inherited methods overridden:
- void warp_undefined(const Window &window) override;
- void warp_constant(const Window &window) override;
- void warp_replicate(const Window &window) override;
-};
-
-/** Template interface for the kernel to compute warp perspective
- *
- */
-template <InterpolationPolicy interpolation>
-class NEWarpPerspectiveKernel : public INEWarpKernel
-{
-private:
- const char *name() const override
- {
- return "NEWarpPerspectiveKernel";
- }
- // Inherited methods overridden:
- void warp_undefined(const Window &window) override;
- void warp_constant(const Window &window) override;
- void warp_replicate(const Window &window) override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEWARPKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h b/arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h
deleted file mode 100644
index b68cb50c7b..0000000000
--- a/arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEWEIGHTSRESHAPEKERNEL_H
-#define ARM_COMPUTE_NEWEIGHTSRESHAPEKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to perform reshaping on the weights used by convolution and locally connected layer
- *
- * Rearranges each 3-dimensional kernel to a single row leading to a matrix with linearized kernels.
- * In combination with the @ref NEIm2ColKernel can transform a convolution to a matrix multiplication.
- *
- * For example assuming a 3D weight kernel of 3x3 dimensions and depth of 2 we have:
- * @f[
- * \left( \begin{array}{ccc}
- * a000 & a001 & a002 \\
- * a010 & a011 & a012 \\
- * a020 & a021 & a022 \\
- * \end{array} \right)
- * \left( \begin{array}{ccc}
- * a100 & a101 & a102 \\
- * a110 & a111 & a112 \\
- * a120 & a121 & a122 \\
- * \end{array} \right)
- * \rightarrow
- * \left( \begin{array}{ccccccccc}
- * a000 & a001 & a002 & a010 & a011 & a012 & a020 & a021 & a022 & a100 & a101 & a102 & a110 & a111 & a112 & a120 & a121 & a122 \\
- * \end{array} \right)
- * @f]
- */
-class NEWeightsReshapeKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEWeightsReshapeKernel";
- }
- /** Constructor.*/
- NEWeightsReshapeKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEWeightsReshapeKernel(const NEWeightsReshapeKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEWeightsReshapeKernel &operator=(const NEWeightsReshapeKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEWeightsReshapeKernel(NEWeightsReshapeKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEWeightsReshapeKernel &operator=(NEWeightsReshapeKernel &&) = default;
- /** Default destructor */
- ~NEWeightsReshapeKernel() = default;
- /** Set the input and output of the kernel.
- *
- * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared,
- * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared.
- * Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/FP16/F32
- * @param[in] bias The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with
- * dimensions [OFM, num_patches] if unshared. Data types supported: Same as @p input
- * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types.
- * @param[out] output The output tensor. Data types supported: Same as @p input
- */
- void configure(const ITensor *input, const ITensor *bias, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEWeightsReshapeKernel
- *
- * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared,
- * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared.
- * Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32
- * @param[in] biases The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with
- * dimensions [OFM, num_patches] if unshared. Data types supported: Same as @p input
- * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types.
- * @param[in] output The output tensor. Should be a 2D Tensor. Data types supported: Same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input;
- const ITensor *_bias;
- ITensor *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEWEIGHTSRESHAPEKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h b/arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h
deleted file mode 100644
index f22f18f09f..0000000000
--- a/arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_NEWIDTHCONCATENATELAYERKERNEL_H
-#define ARM_COMPUTE_NEWIDTHCONCATENATELAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the width concatenate kernel.
- * The input tensor will be concatenated into the output tensor.
- */
-class NEWidthConcatenateLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEWidthConcatenateLayerKernel";
- }
- /** Default constructor */
- NEWidthConcatenateLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEWidthConcatenateLayerKernel(const NEWidthConcatenateLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEWidthConcatenateLayerKernel &operator=(const NEWidthConcatenateLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEWidthConcatenateLayerKernel(NEWidthConcatenateLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEWidthConcatenateLayerKernel &operator=(NEWidthConcatenateLayerKernel &&) = default;
- /** Default destructor */
- ~NEWidthConcatenateLayerKernel() = default;
- /** Initialise the kernel's inputs and output
- *
- * @param[in] input Input tensor. Data types supported: All
- * @param[in] width_offset The offset on the X axis.
- * @param[in,out] output Output tensor. Data types supported: Same as @p input.
- *
- */
- void configure(const ITensor *input, unsigned int width_offset, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEWidthConcatenateLayerKernel
- *
- * @param[in] input Input tensor info. Data types supported: All
- * @param[in] width_offset The offset on the X axis.
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, unsigned int width_offset, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input;
- ITensor *_output;
- unsigned int _width_offset;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEWIDTHCONCATENATELAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h b/arm_compute/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h
deleted file mode 100644
index 1740df0312..0000000000
--- a/arm_compute/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h
+++ /dev/null
@@ -1,596 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGEMMWINOGRADCONVOLUTIONLAYERKERNEL_H
-#define ARM_COMPUTE_NEGEMMWINOGRADCONVOLUTIONLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/NEON/kernels/convolution/common/convolution.hpp"
-#include "arm_compute/core/NEON/kernels/convolution/common/tensor.hpp"
-#include "arm_compute/core/NEON/kernels/convolution/winograd/winograd_layer.hpp"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the NEON kernel to perform Winograd input transform. */
-class INEWinogradLayerTransformInputKernel : public INEKernel
-{
-public:
- /** Get the working space required to perform the transformation.
- *
- * Note, the working space is only required when performing the
- * transformation - hence it can be reused whenever the transformation is
- * not running.
- *
- * @param num_threads The greatest number of threads that will be used to execute the transform.
- * @return Size of working space required in bytes.
- */
- virtual unsigned int get_working_space_size(unsigned int num_threads) const = 0;
-
- /** Determine how much memory (in units of TIn) to allocate for the
- * transformed input.
- *
- * @param[in] num_batches Number of batches in the input tensor.
- * @param[in] num_channels Number of feature maps in the input tensor.
- * @param[in] num_rows Number of rows in each feature map.
- * @param[in] num_cols Number of columns in each feature map.
- * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
- *
- * @return Storage size (in units of TIn) required.
- */
- virtual unsigned int get_input_storage_size(int num_batches, int num_channels, int num_rows, int num_cols, bool same_padding) const = 0;
-
- /** Gets the stride between matrices in the input worspace
- *
- * @param[in] num_batches Number of batches in the input tensor.
- * @param[in] num_channels Number of feature maps in the input tensor.
- * @param[in] num_rows Number of rows in each feature map.
- * @param[in] num_cols Number of columns in each feature map.
- * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
- *
- * @return Stride expressed in bytes.
- */
- virtual int get_matrix_stride(int num_batches, int num_channels, int num_rows, int num_cols, bool same_padding) const = 0;
-
- /** Configure the output transform kernel.
- *
- * @param[in] input_nhwc Input tensor in NHWC data layout format.
- * @param[in] num_batches Number of batches in input tensor.
- * @param[in] num_rows Number of rows in input tensor.
- * @param[in] num_cols Number of columns in input tensor.
- * @param[in] num_channels Number of channels in input tensor.
- * @param[in] padding Padding type.
- * @param[out] output Base of output matrices.
- * @param[in] matrix_stride Stride between output matrices.
- * @param[in] workspace Tensor to be used as the working space during the computation.
- */
- virtual void configure(const ITensor *input_nhwc, const int num_batches, const int num_rows, const int num_cols, const int num_channels,
- const PaddingType padding, ITensor *output, const int matrix_stride, ITensor *workspace) = 0;
-
- /** Destructor */
- virtual ~INEWinogradLayerTransformInputKernel()
- {
- }
-};
-
-/** NEON kernel to perform Winograd input transform. */
-template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
-class NEWinogradLayerTransformInputKernel : public INEWinogradLayerTransformInputKernel
-{
-public:
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEWinogradLayerTransformInputKernel(const NEWinogradLayerTransformInputKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEWinogradLayerTransformInputKernel &operator=(const NEWinogradLayerTransformInputKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEWinogradLayerTransformInputKernel(NEWinogradLayerTransformInputKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEWinogradLayerTransformInputKernel &operator=(NEWinogradLayerTransformInputKernel &&) = default;
- /** Default destructor */
- ~NEWinogradLayerTransformInputKernel() = default;
-
- /** Determine how much memory (in units of TIn) to allocate for the
- * transformed input.
- *
- * @param[in] num_batches Number of batches in the input tensor.
- * @param[in] num_channels Number of feature maps in the input tensor.
- * @param[in] num_rows Number of rows in each feature map.
- * @param[in] num_cols Number of columns in each feature map.
- * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
- *
- * @return Storage size (in units of TIn) required.
- */
- unsigned int get_input_storage_size(
- int num_batches,
- int num_channels,
- int num_rows,
- int num_cols,
- bool same_padding) const override;
-
- /** Get the working space required to perform the transformation.
- *
- * Note, the working space is only required when performing the
- * transformation - hence it can be reused whenever the transformation is
- * not running.
- *
- * @param[in] num_threads The greatest number of threads that will be used to execute the transform.
- *
- * @return Size of working space required in bytes.
- */
- unsigned int get_working_space_size(unsigned int num_threads) const override;
-
- /** Gets the stride between matrices in the input worspace
- *
- * @param[in] num_batches Number of batches in the input tensor.
- * @param[in] num_channels Number of feature maps in the input tensor.
- * @param[in] num_rows Number of rows in each feature map.
- * @param[in] num_cols Number of columns in each feature map.
- * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
- *
- * @return Stride expressed in bytes.
- */
- int get_matrix_stride(
- int num_batches,
- int num_channels,
- int num_rows,
- int num_cols,
- bool same_padding) const override;
-
- /** Default constructor */
- NEWinogradLayerTransformInputKernel();
-
- const char *name() const override
- {
- return "NEWinogradLayerTransformInputKernel";
- }
-
- /** Configure the output transform kernel.
- *
- * @param[in] input_nhwc Input tensor. Data types supported: F16/F32. Layout supported NHWC.
- * @param[in] num_batches Number of batches in input tensor.
- * @param[in] num_rows Number of rows in input tensor.
- * @param[in] num_cols Number of columns in input tensor.
- * @param[in] num_channels Number of channels in input tensor.
- * @param[in] padding Padding type.
- * @param[out] output Base of output matrices.
- * @param[in] matrix_stride Stride between output matrices.
- * @param[in] workspace Tensor to be used as the working space during the computation.
- */
- void configure(
- const ITensor *input_nhwc,
- const int num_batches,
- const int num_rows,
- const int num_cols,
- const int num_channels,
- const PaddingType padding,
- ITensor *output,
- const int matrix_stride,
- ITensor *workspace) override;
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
- /** Winograd base kernel */
- using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols, winograd::WinogradRoots::Integers>;
- /** Winograd convolution kernel */
- using WinogradConv = typename WinogradBase::template Convolution<T, T>;
-
- /** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerTransformInputKernel
- *
- * @param[in] input First tensor input info. Data types supported: F16/F32.
- * @param[in] output Output tensor info. Data types supported: same as @p input.
- * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info);
-
-private:
- using InputTransform = typename WinogradBase::template InputTransform<T, T>;
-
- std::unique_ptr<InputTransform> _transform{ nullptr };
- const ITensor *_input_nhwc;
- int _num_batches; /**< Number of batches in input tensor. */
- int _num_rows; /**< Number of rows in input tensor. */
- int _num_cols; /**< Number of columns in input tensor. */
- int _num_channels; /**< Number of channels in input tensor. */
- PaddingType _padding; /**< Padding type. */
- ITensor *_output; /**< Base of output matrices. */
- int _matrix_stride; /**< Stride between output matrices. */
- int _padding_top; /**< Padding to apply to the top of the image. */
- int _padding_left; /**< Padding to apply to the left of the image. */
- int _padding_right; /**< Padding to apply to the right of the image. */
- int _padding_bottom; /**< Padding to apply to the bottom of the image. */
- ITensor *_workspace;
-};
-
-/** Interface for the NEON kernel to perform Winograd output transform. */
-class INEWinogradLayerTransformOutputKernel : public INEKernel
-{
-public:
- /** Get the working space required to perform the transformation.
- *
- * Note, the working space is only required when performing the
- * transformation - hence it can be reused whenever the transformation is
- * not running.
- *
- * @param[in] num_threads The greatest number of threads that will be used to execute the transform.
- *
- * @return Size of working space required in bytes.
- */
- virtual unsigned int get_working_space_size(unsigned int num_threads) const = 0;
-
- /** Determine how much memory (in units of TOut) to allocate for the
- * (Winograd domain) output.
- *
- * @param[in] num_batches Number of batches in the output tensor.
- * @param[in] num_rows Number of rows in each feature map of the input tensor.
- * @param[in] num_cols Number of columns in each feature map of the input tensor.
- * @param[in] num_output_channels Number of feature maps in the output tensor.
- *
- * @return Storage size (in units of TOut) required.
- */
- virtual unsigned int get_output_storage_size(int num_batches, int num_rows, int num_cols, int num_output_channels) const = 0;
-
- /** Gets the stride between matrices in the output worspace
- *
- * @param[in] num_batches Number of batches in the output tensor.
- * @param[in] num_rows Number of rows in each feature map of the input tensor.
- * @param[in] num_cols Number of columns in each feature map of the input tensor.
- * @param[in] num_output_channels Number of feature maps in the output tensor.
- *
- * @return Stride expressed in bytes.
- */
- virtual int get_matrix_stride(int num_batches, int num_rows, int num_cols, int num_output_channels) const = 0;
-
- /** Get the output shape of a convolution.
- *
- * @param[in] num_rows Number of rows in each feature map of the input tensor.
- * @param[in] num_cols Number of columns in each feature map of the input tensor.
- * @param[in] padding_same True if padding is SAME, false otherwise
- *
- * @return Shape of the output tensor
- */
- virtual std::pair<unsigned int, unsigned int> get_output_shape(
- int num_rows, /* Number of rows in each feature map of the input tensor. */
- int num_cols, /* Number of columns in each feature map of the input tensor. */
- bool padding_same /* True if padding is SAME, false otherwise */
- ) const = 0;
-
- /** Configure the output transform kernel.
- *
- * @param[in] biases Pointer to the biases tensor.
- * @param[in] transformed_output Pointer to working space for the output tensor in the Winograd domain.
- * @param[in] matrix_stride Output matrix stride, can be computed with winograd::WinogradGEMM<2, 2, 3, 3>::Convolution<float, float>::get_output_matrix_stride()
- * @param[out] output_nhwc Pointer to a tensor in NHWC data layout ordered output tensor, in the spatial domain.
- * @param[in] num_batches Number of batches in the input tensor.
- * @param[in] num_rows Number of rows in output tensor.
- * @param[in] num_cols Number of columns in output tensor.
- * @param[in] num_channels Number of feature maps in the output tensor.
- * @param[in] workspace Tensor to be used as the working space during the computation.
- * @param[in] activation Activation to be used
- */
- virtual void configure(
- const ITensor *biases,
- const ITensor *transformed_output,
- const int matrix_stride,
- ITensor *output_nhwc,
- const int num_batches,
- const int num_rows,
- const int num_cols,
- const int num_channels,
- ITensor *workspace,
- const arm_gemm::Activation &activation) = 0;
-
- virtual ~INEWinogradLayerTransformOutputKernel()
- {
- }
-};
-
-/** NEON kernel to perform Winograd output transform. */
-template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
-class NEWinogradLayerTransformOutputKernel : public INEWinogradLayerTransformOutputKernel
-{
-public:
- const char *name() const override
- {
- return "NEWinogradLayerTransformOutputKernel";
- }
- /** Constructor */
- NEWinogradLayerTransformOutputKernel();
-
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEWinogradLayerTransformOutputKernel(const NEWinogradLayerTransformOutputKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEWinogradLayerTransformOutputKernel &operator=(const NEWinogradLayerTransformOutputKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEWinogradLayerTransformOutputKernel(NEWinogradLayerTransformOutputKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEWinogradLayerTransformOutputKernel &operator=(NEWinogradLayerTransformOutputKernel &&) = default;
- /** Default destructor */
- ~NEWinogradLayerTransformOutputKernel() = default;
-
- // Inherited methods overridden:
- /** Determine how much memory (in units of TOut) to allocate for the
- * (Winograd domain) output.
- *
- * @param[in] num_batches Number of batches in the output tensor.
- * @param[in] num_rows Number of rows in each feature map of the input tensor.
- * @param[in] num_cols Number of columns in each feature map of the input tensor.
- * @param[in] num_output_channels Number of feature maps in the output tensor.
- *
- * @return Storage size (in units of TOut) required.
- */
- unsigned int get_output_storage_size(int num_batches, int num_rows, int num_cols, int num_output_channels) const override;
-
- /** Gets the stride between matrices in the output worspace
- *
- * @param[in] num_batches Number of batches in the output tensor.
- * @param[in] num_rows Number of rows in each feature map of the input tensor.
- * @param[in] num_cols Number of columns in each feature map of the input tensor.
- * @param[in] num_output_channels Number of feature maps in the output tensor.
- *
- * @return Stride expressed in bytes.
- */
- int get_matrix_stride(int num_batches, int num_rows, int num_cols, int num_output_channels) const override;
- /** Get the output shape of a convolution.
- *
- * @param[in] num_rows Number of rows in each feature map of the input tensor.
- * @param[in] num_cols Number of columns in each feature map of the input tensor.
- * @param[in] padding_same True if padding is SAME, false otherwise
- *
- * @return Shape of the output tensor
- */
- std::pair<unsigned int, unsigned int> get_output_shape(
- int num_rows, /* Number of rows in each feature map of the input tensor. */
- int num_cols, /* Number of columns in each feature map of the input tensor. */
- bool padding_same) const override;
-
- /** Get the working space required to perform the transformation.
- *
- * Note, the working space is only required when performing the
- * transformation - hence it can be reused whenever the transformation is
- * not running.
- *
- * @param[in] num_threads The greatest number of threads that will be used to execute the transform.
- *
- * @return Size of working space required in bytes.
- */
- unsigned int get_working_space_size(unsigned int num_threads) const override;
-
- /** Configure the output transform kernel.
- *
- * @param[in] biases Pointer to the biases tensor.
- * @param[in] transformed_output Pointer to working space for the output tensor in the Winograd domain.
- * @param[in] matrix_stride Output matrix stride, can be computed with winograd::WinogradGEMM<2, 2, 3, 3>::Convolution<float, float>::get_output_matrix_stride()
- * @param[out] output_nhwc Pointer to a tensor with NHWC data layout, in the spatial domain.
- * @param[in] num_batches Number of batches in the input tensor.
- * @param[in] num_rows Number of rows in output tensor.
- * @param[in] num_cols Number of columns in output tensor.
- * @param[in] num_channels Number of feature maps in the output tensor.
- * @param[in] workspace Tensor to be used as the working space during the computation.
- * @param[in] activation Activation to be used
- */
- void configure(
- const ITensor *biases,
- const ITensor *transformed_output,
- const int matrix_stride,
- ITensor *output_nhwc,
- const int num_batches,
- const int num_rows,
- const int num_cols,
- const int num_channels,
- ITensor *workspace,
- const arm_gemm::Activation &activation) override;
-
- void run(const Window &window, const ThreadInfo &info) override;
-
- /** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerTransformOutputKernel
- *
- * @param[in] input Source tensor info with shape [C, N, 16, batches] or [C, N, 36, batches]. Data types supported: F16/F32.
- * @param[in] bias Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input
- * @param[in] output Destination tensor info with shape [output_convolved_dims.width, output_convolved_dims.height, C, batches]. Data type supported: same as @p input
- * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const WinogradInfo &winograd_info);
-
-private:
- using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols, winograd::WinogradRoots::Integers>;
- using WinogradConv = typename WinogradBase::template Convolution<T, T>;
- using OutputTransform = typename WinogradBase::template OutputTransform<T, T>;
-
- std::unique_ptr<OutputTransform> _transform{ nullptr };
- const ITensor *_biases;
- const ITensor *_transformed_output;
- ITensor *_workspace;
- int _matrix_stride;
- int _matrix_row_stride;
- ITensor *_output_nhwc;
- int _num_batches;
- int _num_rows;
- int _num_cols;
- int _num_channels;
-};
-
-/** Interface for the NEON kernel to perform Winograd weights transform. */
-class INEWinogradLayerTransformWeightsKernel : public INEKernel
-{
-public:
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- INEWinogradLayerTransformWeightsKernel(const INEWinogradLayerTransformWeightsKernel &) = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- INEWinogradLayerTransformWeightsKernel &operator=(const INEWinogradLayerTransformWeightsKernel &) = default;
- /** Allow instances of this class to be moved */
- INEWinogradLayerTransformWeightsKernel(INEWinogradLayerTransformWeightsKernel &&) = default;
- /** Allow instances of this class to be moved */
- INEWinogradLayerTransformWeightsKernel &operator=(INEWinogradLayerTransformWeightsKernel &&) = default;
-
- INEWinogradLayerTransformWeightsKernel()
- {
- }
- virtual ~INEWinogradLayerTransformWeightsKernel()
- {
- }
- /** Determine how much memory (in units of T) to allocate for the
- * transformed weights.
- *
- * @param[in] num_output_channels Number of output feature maps.
- * @param[in] num_input_channels Number of input feature maps.
- *
- * @return Storage size (in units of T) required.
- */
- virtual unsigned int get_weight_storage_size(int num_output_channels, int num_input_channels) const = 0;
- /** Gets the stride between matrices in the kernel worspace
- *
- * @param[in] num_output_channels Number of output feature maps.
- * @param[in] num_input_channels Number of input feature maps.
- *
- * @return Stride expressed in bytes.
- */
- virtual int get_matrix_stride(int num_output_channels, int num_input_channels) const = 0;
-
- /** Configure the weights transform kernel.
- *
- * @param[in] weights_hwio Pointer to the weights tensor
- * @param[out] output Pointer to working space for the output tensor in the Winograd domain.
- * @param[in] matrix_stride Stride across matrices in the output workspace.
- * @param[in] num_output_channels Number of filters.
- * @param[in] num_input_channels Number of channels in each filter.
- */
-
- virtual void configure(const ITensor *weights_hwio, ITensor *output, const int matrix_stride, const int num_output_channels, const int num_input_channels) = 0;
-
- /** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerTransformWeightsKernel
- *
- * @param[in] input First tensor input info. Data types supported: F16/F32.
- * @param[in] weights Weights tensor info. Data types supported: same as @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights);
-};
-
-/** NEON kernel to perform Winograd weights transform. */
-template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
-class NEWinogradLayerTransformWeightsKernel final : public INEWinogradLayerTransformWeightsKernel
-{
-public:
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEWinogradLayerTransformWeightsKernel(const NEWinogradLayerTransformWeightsKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEWinogradLayerTransformWeightsKernel &operator=(const NEWinogradLayerTransformWeightsKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEWinogradLayerTransformWeightsKernel(NEWinogradLayerTransformWeightsKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEWinogradLayerTransformWeightsKernel &operator=(NEWinogradLayerTransformWeightsKernel &&) = default;
- /** Default destructor */
- ~NEWinogradLayerTransformWeightsKernel() = default;
-
- /** Default constructor. */
- NEWinogradLayerTransformWeightsKernel();
- const char *name() const override
- {
- return "NEWinogradLayerTransformWeightsKernel";
- }
-
- /** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerTransformWeightsKernel
- *
- * @param[in] input Source tensor info. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout).
- * kernel_x must be 3 and equal to kernel_y. Data types supported: F16/F32.
- * @param[in] output Destination tensor info. The output is a 3D tensor with dimensions [OFM, IFM, 16] or [OFM, IFM, 36]. Data type supported: same as @p input
- * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info);
-
- // Inherited methods overridden:
-
-#ifndef DOXYGEN_SKIP_THIS
- /** Configure the weights transform kernel.
- *
- * @param[in] weights_hwio Pointer to the weights tensor
- * @param[out] output Pointer to working space for the output tensor in the Winograd domain.
- * @param[in] matrix_stride Stride across matrices in the output workspace.
- * @param[in] num_output_channels Number of filters.
- * @param[in] num_input_channels Number of channels in each filter.
- */
- void configure(const ITensor *weights_hwio, ITensor *output, const int matrix_stride, const int num_output_channels, const int num_input_channels) override;
-#endif /* DOXYGEN_SKIP_THIS */
-
- /** Determine how much memory (in units of T) to allocate for the
- * transformed weights.
- *
- * @param[in] num_output_channels Number of output feature maps.
- * @param[in] num_input_channels Number of input feature maps.
- *
- * @return Storage size (in units of T) required.
- */
- unsigned int get_weight_storage_size(int num_output_channels, int num_input_channels) const override;
-
- /** Gets the stride between matrices in the input worspace
- *
- * @param[in] num_output_channels Number of output feature maps.
- * @param[in] num_input_channels Number of input feature maps.
- *
- * @return Stride expressed in bytes.
- */
- int get_matrix_stride(int num_output_channels, int num_input_channels) const override;
- void run(const Window &window, const ThreadInfo &info) override;
- bool is_parallelisable() const override;
-
-private:
- using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols, winograd::WinogradRoots::Integers>;
- using WinogradConv = typename WinogradBase::template Convolution<T, T>;
- using WeightsTransform = typename WinogradBase::template WeightsTransform<T, T>;
-
- std::unique_ptr<WeightsTransform> _transform{ nullptr };
- const ITensor *_weights_hwio;
- ITensor *_output;
- int _matrix_stride;
- int _num_output_channels;
- int _num_input_channels;
-};
-
-/** NEON kernel to perform Winograd. */
-template <typename TIn, typename TOut, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
-class NEWinogradLayerConfiguration
-{
-public:
- /** Winograd base kernel */
- using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols, winograd::WinogradRoots::Integers>;
- /** Winograd convolution kernel */
-
- using WinogradConv = typename WinogradBase::template Convolution<TIn, TOut>;
-
- using TransformInputKernel = NEWinogradLayerTransformInputKernel<TIn, OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
- using TransformWeightsKernel = NEWinogradLayerTransformWeightsKernel<TIn, OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
- using TransformOutputKernel = NEWinogradLayerTransformOutputKernel<TOut, OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
-};
-
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEGEMMWINOGRADCONVOLUTIONLAYERKERNEL_H*/
diff --git a/arm_compute/core/NEON/kernels/NEYOLOLayerKernel.h b/arm_compute/core/NEON/kernels/NEYOLOLayerKernel.h
deleted file mode 100644
index 0fd3f8ce67..0000000000
--- a/arm_compute/core/NEON/kernels/NEYOLOLayerKernel.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEYOLOLAYERKERNEL_H
-#define ARM_COMPUTE_NEYOLOLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the YOLO layer kernel. */
-class NEYOLOLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEYOLOLayerKernel";
- }
- /** Constructor */
- NEYOLOLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEYOLOLayerKernel(const NEYOLOLayerKernel &) = delete;
- /** Default move constructor */
- NEYOLOLayerKernel(NEYOLOLayerKernel &&) = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEYOLOLayerKernel &operator=(const NEYOLOLayerKernel &) = delete;
- /** Default move assignment operator */
- NEYOLOLayerKernel &operator=(NEYOLOLayerKernel &&) = default;
- /** Default destructor */
- ~NEYOLOLayerKernel() = default;
- /** Set the input and output tensor.
- *
- * @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place
- *
- * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
- * of the activation function. Data types supported: F16/F32.
- * @param[out] output Destination tensor. Data type supported: same as @p input
- * @param[in] act_info Activation layer parameters.
- * @param[in] num_classes Number of classes to activate (must be submultiple of @p input channels)
- */
- void configure(ITensor *input, ITensor *output, const ActivationLayerInfo &act_info, int32_t num_classes);
- /** Static function to check if given info will lead to a valid configuration of @ref NEYOLOLayerKernel
- *
- * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result
- * of the activation function. Data types supported: F16/F32.
- * @param[in] output Destination tensor info. Data type supported: same as @p input
- * @param[in] act_info Activation layer information.
- * @param[in] num_classes Number of classes to activate (must be submultiple of @p input channels)
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info, int32_t num_classes);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Function to run YOLO layer
- *
- * @param[in] window Region on which to execute the kernel.
- */
- template <typename T, int S>
- void yolo_layer_nchw(const Window &window);
- /** Function to run YOLO layer on tensors with NHWC format
- *
- * @param[in] window Region on which to execute the kernel.
- */
- template <typename T>
- void yolo_layer_nhwc(const Window &window);
- /** Common signature for all the yolo layer functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using YOLOFunctionPtr = void (NEYOLOLayerKernel::*)(const Window &window);
-
-private:
- YOLOFunctionPtr _func;
- ITensor *_input;
- ITensor *_output;
- ActivationLayerInfo _act_info;
- int32_t _num_classes;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEYOLOLAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/arm_gemm/ndrange.hpp b/arm_compute/core/NEON/kernels/arm_gemm/ndrange.hpp
deleted file mode 100644
index 4ff83fbc51..0000000000
--- a/arm_compute/core/NEON/kernels/arm_gemm/ndrange.hpp
+++ /dev/null
@@ -1,185 +0,0 @@
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#pragma once
-
-#include <array>
-#include <algorithm>
-#include <initializer_list>
-
-#include <cassert>
-
-namespace arm_gemm {
-
-template<unsigned int D>
-class NDRange {
-private:
- std::array<unsigned int, D> m_sizes {};
- std::array<unsigned int, D> m_totalsizes {};
-
- class NDRangeIterator {
- private:
- const NDRange &m_parent;
- unsigned int m_pos = 0;
- unsigned int m_end = 0;
-
- public:
- NDRangeIterator(const NDRange &p, unsigned int s, unsigned int e) : m_parent(p), m_pos(s), m_end(e) { }
-
- bool done() const {
- return (m_pos >= m_end);
- }
-
- unsigned int dim(unsigned int d) const {
- unsigned int r = m_pos;
-
- if (d < (D - 1)) {
- r %= m_parent.m_totalsizes[d];
- }
-
- if (d > 0) {
- r /= m_parent.m_totalsizes[d-1];
- }
-
- return r;
- }
-
- bool next_dim0() {
- m_pos++;
-
- return !done();
- }
-
- bool next_dim1() {
- m_pos += m_parent.m_sizes[0] - dim(0);
-
- return !done();
- }
-
- unsigned int dim0_max() const {
- unsigned int offset = std::min(m_end - m_pos, m_parent.m_sizes[0] - dim(0));
-
- return dim(0) + offset;
- }
- };
-
-public:
- NDRange& operator=(const NDRange& rhs)=default;
- NDRange(const NDRange& rhs) =default;
-
- template <typename... T>
- NDRange(T... ts)
- : m_sizes{ts...}
- {
- unsigned int t=1;
-
- for (unsigned int i=0; i<D; i++) {
- t *= m_sizes[i];
-
- m_totalsizes[i] = t;
- }
- }
-
- NDRange(const std::array<unsigned int, D>& n)
- : m_sizes(n)
- {
- unsigned int t=1;
-
- for (unsigned int i=0; i<D; i++) {
- t *= m_sizes[i];
-
- m_totalsizes[i] = t;
- }
- }
-
- NDRangeIterator iterator(unsigned int start, unsigned int end) const {
- return NDRangeIterator(*this, start, end);
- }
-
- unsigned int total_size() const {
- return m_totalsizes[D - 1];
- }
-
- unsigned int get_size(unsigned int v) const {
- return m_sizes[v];
- }
-};
-
-/** NDCoordinate builds upon a range, but specifies a starting position
- * in addition to a size which it inherits from NDRange
- */
-template<unsigned int N>
-class NDCoordinate : public NDRange<N> {
- using int_t =unsigned int;
- using ndrange_t = NDRange<N>;
-
- std::array<int_t, N> m_positions {};
-public:
- NDCoordinate& operator=(const NDCoordinate& rhs)=default;
- NDCoordinate(const NDCoordinate& rhs) =default;
- NDCoordinate(const std::initializer_list<std::pair<int_t, int_t>>& list)
- {
- std::array<int_t, N> sizes{};
-
- std::size_t i = 0;
- for(auto& p : list) {
- m_positions[i]= p.first;
- sizes[i++] = p.second;
- }
-
- //update the parents sizes
- static_cast<ndrange_t&>(*this) = ndrange_t(sizes);
- }
-
- int_t get_position(int_t d) const {
- assert(d < m_positions.size());
- return m_positions[d];
- }
-
- void set_position(int_t d, int_t v) {
- assert(d < size(m_positions));
- assert(v < ndrange_t::get_size(d));
-
- m_positions[d] = v;
- }
-
- int_t get_position_end(int_t d) const {
- return get_position(d) + NDRange<N>::get_size(d);
- }
-}; //class NDCoordinate
-
-/** @returns the number of dimensions in the NDRange which have none-1 values
- * IE there is actual work in these dimensions that can be broken up
- */
-template<unsigned int N>
-std::size_t ndrange_popcount(const NDRange<N>& ndr) {
- std::size_t count = 0;
-
- for(unsigned int d = 0; d != N; ++d) {
- if(ndr.get_size(d) != 1)
- ++count;
- }
- return count;
-}
-
-} // namespace arm_gemm
diff --git a/arm_compute/core/NEON/kernels/assembly/Helpers.h b/arm_compute/core/NEON/kernels/assembly/Helpers.h
deleted file mode 100644
index 9372e05295..0000000000
--- a/arm_compute/core/NEON/kernels/assembly/Helpers.h
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_ASSEMBLY_HELPERS_H
-#define ARM_COMPUTE_ASSEMBLY_HELPERS_H
-
-#include "arm_compute/core/CPP/CPPTypes.h"
-#include "arm_compute/core/Utils.h"
-
-#include "arm_compute/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h"
-#include "arm_compute/core/NEON/kernels/assembly/arm_gemm.hpp"
-
-namespace arm_compute
-{
-/** Block sizes to use to break the M, N, K dimension */
-struct BlockSizes
-{
- unsigned int k_block{ 0 }; /**< Block size alon the K dimension */
- unsigned int x_block{ 0 }; /**< Block size along the N (x) dimension */
- unsigned int m_round{ 0 }; /**< Block size along the M dimension (Must be a multiple of strategy_out_height) */
- unsigned int strategy_out_height{ 0 }; /**< Number of rows (M) processed by the selected strategy */
-};
-
-/** Extracts the kernel description of the selected kernel by the GEMM backend heuristics
- *
- * @param[in] input_type Data type of the input tensor.
- * @param[in] ci CPU information.
- * @param[in] num_threads Maximum number of threads that might be used for the calculations.
- * @param[in] p M, N, K sizes.
- * @param[in] activation Activation struct
- * @param[in] pretranspose_hint Is B also pretransposed ?
- *
- * @return Kernel description that the assembly heuristics picked for the given configuration
- */
-arm_gemm::KernelDescription get_gemm_info(DataType input_type,
- const CPUInfo &ci,
- const unsigned int num_threads,
- const INEGEMMWrapperKernel::Params &p,
- arm_gemm::Activation activation,
- bool pretranspose_hint);
-
-/** Calculate the recommended block sizes to use based on the CPU cache sizes and the strategy which will be used
- *
- * @param[in] ci CPU information.
- * @param[in] M M dimension.
- * @param[in] N N dimension.
- * @param[in] K K dimension.
- *
- * @return Recommeded block sizes to use for the given M, N, K dimensions.
- */
-template <typename strategy>
-BlockSizes calculate_block_sizes(const CPUInfo &ci, unsigned int M, unsigned int N, unsigned int K)
-{
- BlockSizes bs;
-
- using Toi = typename strategy::operand_type;
-
- const unsigned int L1_size = ci.get_L1_cache_size();
- const unsigned int L2_size = ci.get_L2_cache_size();
-
- // Work out blocking parameters
-
- // k_block: Find out how much of the larger array can be loaded into half the cache.
- // This should account for associative caches.
- bs.k_block = (L1_size / 2) / (sizeof(Toi) * (std::max(strategy::out_width(), strategy::out_height())));
-
- // Needs to be (at least a single) multiple of the K unroll level.
- bs.k_block /= strategy::k_unroll();
- bs.k_block = std::max(bs.k_block, 1U) * strategy::k_unroll();
-
- // Now tune to presented problem size; this is how many blocks we need.
- int num_k_blocks = DIV_CEIL(K, bs.k_block);
-
- // So divide the space equally into that many blocks.
- bs.k_block = DIV_CEIL(K, num_k_blocks);
-
- // And round UP to the K unroll level required.
- bs.k_block = ceil_to_multiple(bs.k_block, strategy::k_unroll());
-
- // x_block: Work out how many rows (of length k_block) will fit in the L2
- // Don't allocate more than 90% of the L2 to allow for overheads, and subtract off the L1 contents.
- bs.x_block = (((L2_size * 9) / 10) - (bs.k_block * sizeof(Toi) * (strategy::out_width() + strategy::out_height()))) / (sizeof(Toi) * bs.k_block);
-
- // Needs to be (at least a single) multiple of the kernel output width.
- bs.x_block /= strategy::out_width();
- bs.x_block = std::max(bs.x_block, 1U) * strategy::out_width();
-
- // And tune to the presented problem size.
- int num_x_blocks = DIV_CEIL(N, bs.x_block);
- bs.x_block = DIV_CEIL(N, num_x_blocks);
-
- bs.x_block = ceil_to_multiple(bs.x_block, strategy::out_width());
-
- // Work out the rounded size of M - needed for some buffers.
- bs.m_round = ceil_to_multiple(M, strategy::out_height());
- bs.strategy_out_height = strategy::out_height();
-
- return bs;
-}
-
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_ASSEMBLY_HELPERS_H */
diff --git a/arm_compute/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h b/arm_compute/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h
deleted file mode 100644
index f152ab5f61..0000000000
--- a/arm_compute/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_INEGEMMWRAPPERKERNEL_H
-#define ARM_COMPUTE_INEGEMMWRAPPERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Common interface for all the arm_gemm Gemms
- */
-class INEGEMMWrapperKernel : public INEKernel
-{
-public:
- /** Parameters defining the dimensions of the matrices being multiplied */
- struct Params
- {
- unsigned int M{ 0 }; /**< Rows in output matrix C (and input matrix A). */
- unsigned int N{ 0 }; /**< Columns in output matrix C (and input matrix B). */
- unsigned int K{ 0 }; /**< Columns of input matrix A (= rows of input matrix B). */
- unsigned int batches{ 0 }; /**< Number of "batched" GEMMs (unique A and C, shared B). */
- unsigned int multis{ 0 }; /**< Number of "multi" GEMMs (unique A, B and C). */
- };
-
- static Params extract_parameters(const ITensor *a, const ITensor *b, const ITensor *c, const GEMMInfo &gemm_info);
-
- /** Constructor */
- INEGEMMWrapperKernel();
- /** Prevent instances of this class from being copied */
- INEGEMMWrapperKernel(const INEGEMMWrapperKernel &) = delete;
- /** Prevent instances of this class from being copied */
- INEGEMMWrapperKernel &operator=(const INEGEMMWrapperKernel &) = delete;
- /** Allow instances of this class to be moved */
- INEGEMMWrapperKernel(INEGEMMWrapperKernel &&) = default;
- /** Allow instances of this class to be moved */
- INEGEMMWrapperKernel &operator=(INEGEMMWrapperKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @note The input and output tensor must have the same dimensions
- *
- * @param[in] a Input tensor (Matrix A)
- * @param[in] b Input tensor (Matrix B)
- * @param[out] c Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0.
- * @param[in] alpha Scalar multiplier to apply to AB matrix product.
- * @param[in] beta Scalar multiplier to apply to input C matrix before adding product.
- * @param[in] gemm_info GEMM meta-data
- */
- void configure(const ITensor *a, const ITensor *b, ITensor *c, float alpha, float beta, const GEMMInfo &gemm_info);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-protected:
- /** Called as part of configure() after _a, _b, _c and _params have been set.
- *
- * @param[in] alpha Scalar multiplier to apply to AB matrix product.
- * @param[in] beta Scalar multiplier to apply to input C matrix before adding product.
- *
- * @return A 3D execution window.
- */
- virtual Window configure_internal(float alpha, float beta) = 0;
-
- /** Run the kernel from the start to the end offset in window.
- *
- * @param[in] window Window to use for the iteration
- * @param[in] start_offset Where to start iterating from (In Window coordinates)
- * @param[in] end_offset Where to stop iterating (In Window coordinates).
- * @param[in] info Info about executing thread and CPU.
- */
- virtual void run_internal(const Window &window, const Coordinates &start_offset, const Coordinates &end_offset, const ThreadInfo &info) = 0;
-
- const ITensor *_a;
- const ITensor *_b;
- ITensor *_c;
- Params _params;
- GEMMInfo _gemm_info;
-
-private:
- Window _window3d;
- TensorShape _window_shape;
-};
-
-} // namespace arm_compute
-
-#endif /* ARM_COMPUTE_INEGEMMRAPPERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/assembly/NEDepthwiseConvolutionAssemblyKernelWrapper.h b/arm_compute/core/NEON/kernels/assembly/NEDepthwiseConvolutionAssemblyKernelWrapper.h
deleted file mode 100644
index 8a9fb82b4a..0000000000
--- a/arm_compute/core/NEON/kernels/assembly/NEDepthwiseConvolutionAssemblyKernelWrapper.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_ASSEMBLY_DEPTHWISE_CONVOLUTION_ASSEMBLY_WRAPPER_KERNEL_H
-#define ARM_COMPUTE_ASSEMBLY_DEPTHWISE_CONVOLUTION_ASSEMBLY_WRAPPER_KERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-
-#include "arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** This class is a wrapper for the depthwise convolution assembly kernels. */
-class NEDepthwiseConvolutionAssemblyKernelWrapper final : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEDepthwiseConvolutionAssemblyKernelWrapper";
- }
-
- /** Default constructor */
- NEDepthwiseConvolutionAssemblyKernelWrapper()
- : _kernel(nullptr)
- {
- }
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDepthwiseConvolutionAssemblyKernelWrapper(const NEDepthwiseConvolutionAssemblyKernelWrapper &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDepthwiseConvolutionAssemblyKernelWrapper &operator=(const NEDepthwiseConvolutionAssemblyKernelWrapper &) = delete;
- /** Default Move Constructor. */
- NEDepthwiseConvolutionAssemblyKernelWrapper(NEDepthwiseConvolutionAssemblyKernelWrapper &&) = default;
- /** Default move assignment operator */
- NEDepthwiseConvolutionAssemblyKernelWrapper &operator=(NEDepthwiseConvolutionAssemblyKernelWrapper &&) = default;
-
- /** Initialise the kernel's input and output.
- *
- * @param[in] kernel Pointer to an assembly kernel implementation.
- */
- void configure(depthwise::IDepthwiseConvolution *kernel)
- {
- ARM_COMPUTE_ERROR_ON_NULLPTR((reinterpret_cast<void *>(kernel)));
- _kernel = kernel;
- Window win;
- win.set(Window::DimX, Window::Dimension(0, _kernel->get_window(), 1));
- INEKernel::configure(win);
- }
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override
- {
- ARM_COMPUTE_ERROR_ON_NULLPTR((reinterpret_cast<void *>(_kernel)));
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- auto first = window.x().start();
- auto last = window.x().end();
- _kernel->run(first, last, info.thread_id);
- }
-
-private:
- depthwise::IDepthwiseConvolution *_kernel;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_ASSEMBLY_DEPTHWISE_CONVOLUTION_ASSEMBLY_WRAPPER_KERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/assembly/NEGEMMAssemblyWrapperKernel.h b/arm_compute/core/NEON/kernels/assembly/NEGEMMAssemblyWrapperKernel.h
deleted file mode 100644
index 0e3dd74577..0000000000
--- a/arm_compute/core/NEON/kernels/assembly/NEGEMMAssemblyWrapperKernel.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_ASSEMBLY_GEMM_KERNEL_WRAPPER_KERNEL_H
-#define ARM_COMPUTE_ASSEMBLY_GEMM_KERNEL_WRAPPER_KERNEL_H
-
-#include "arm_compute/core/NEON/kernels/assembly/arm_gemm_compute_iface.hpp"
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-
-#include "gemm_common.hpp"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** This class is a wrapper for the assembly kernels.
- *
- * Some kernels were written in assembly and highly optimised for specific CPUs like A53 or A55.
- * This class works as a wrapper for these assembly kernels. The arm compute library creates an instance
- * of NEGEMMAssemblyWrapperKernel and other auxiliary data structures to execute a single assembly kernel
- * in the context of an NEFunctions.
- *
- * The type T is the type of the actual kernel implemented in assembly which is of type
- * template<typename To, typename Tr> class GemmCommon
- *
- *
- */
-template <typename TypeInput, typename TypeOutput>
-class NEGEMMAssemblyWrapperKernel final : public INEKernel
-{
-public:
- /** Constructor
- */
- NEGEMMAssemblyWrapperKernel()
- : _kernel(nullptr), _name("NEGEMMAssemblyWrapperKernel")
- {
- }
-
- NEGEMMAssemblyWrapperKernel(NEGEMMAssemblyWrapperKernel &) = delete;
- NEGEMMAssemblyWrapperKernel(NEGEMMAssemblyWrapperKernel &&) = default;
- NEGEMMAssemblyWrapperKernel &operator=(NEGEMMAssemblyWrapperKernel &) = delete;
-
- const char *name() const override
- {
- return _name.c_str();
- }
-
-
- void run(const Window &window, const ThreadInfo &info) override
- {
- ARM_COMPUTE_ERROR_ON_NULLPTR((reinterpret_cast<void *>(_kernel)));
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-
- auto win=arm_gemm::to_ndcoord(window);
-
- arm_gemm::ndcoord_t thread_locator { };
-
- _kernel->execute(win, thread_locator, info.thread_id);
- }
-
- // Inherited methods overridden:
- void run_nd(const Window &window, const ThreadInfo &info, const Window &thread_locator) override
- {
- ARM_COMPUTE_ERROR_ON_NULLPTR((reinterpret_cast<void *>(_kernel)));
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-
- //convert between arm_compute and arm_gemm types
- auto ndc_win = arm_gemm::to_ndcoord(window);
- auto ndc_tlc = arm_gemm::to_ndcoord(thread_locator);
-
- _kernel->execute(ndc_win, ndc_tlc, info.thread_id);
- }
-
- /** Initialise the kernel's input and output.
- *
- * @param[in] kernel Pointer to an assembly kernel implementation.
- * @param[in] num_threads Number of concurrent threads which will execute the kernel.
- */
- void configure(arm_gemm::GemmCommon<TypeInput, TypeOutput> *kernel, std::string kernel_name_tag)
- {
- ARM_COMPUTE_ERROR_ON_NULLPTR((reinterpret_cast<void *>(kernel)));
- _kernel = kernel;
-
- Window win = to_window(kernel->get_window_size());
-
- INEKernel::configure(win);
-
- if(!kernel_name_tag.empty())
- {
- _name += "/" + kernel_name_tag;
- }
- }
-
-private:
- arm_gemm::GemmCommon<TypeInput, TypeOutput> *_kernel;
- std::string _name;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_ASSEMBLY_GEMM_KERNEL_WRAPPER_KERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/assembly/arm_gemm.hpp b/arm_compute/core/NEON/kernels/assembly/arm_gemm.hpp
deleted file mode 100644
index 7723224ec8..0000000000
--- a/arm_compute/core/NEON/kernels/assembly/arm_gemm.hpp
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#pragma once
-
-#include <memory>
-#include <cstring>
-
-#include "arm_gemm_local.hpp"
-#include "gemm_common.hpp"
-
-namespace arm_gemm {
-
-enum class GemmMethod
-{
- DEFAULT,
- GEMV_BATCHED,
- GEMV_PRETRANSPOSED,
- GEMV_NATIVE_TRANSPOSED,
- GEMM_NATIVE,
- GEMM_HYBRID,
- GEMM_INTERLEAVED,
- GEMM_INTERLEAVED_2D,
- QUANTIZE_WRAPPER,
- GEMM_HYBRID_QUANTIZED
-};
-
-struct KernelDescription
-{
- GemmMethod method = GemmMethod::DEFAULT;
- std::string name = "";
- bool is_default = false;
-
- KernelDescription(GemmMethod m, std::string n, bool d=false) : method(m), name(n), is_default(d) { }
- KernelDescription() noexcept { }
-};
-
-struct GemmConfig
-{
- GemmMethod method = GemmMethod::DEFAULT;
- std::string filter = "";
- unsigned int inner_block_size = 0;
- unsigned int outer_block_size = 0;
-
- GemmConfig(GemmMethod method) : method(method) { }
- GemmConfig() { }
-};
-
-struct Activation
-{
- enum class Type {
- None,
- ReLU,
- BoundedReLU
- };
-
- Type type;
- float param1;
- float param2;
-
- Activation(Type type=Type::None, float p1=0.0f, float p2=0.0f) : type(type), param1(p1), param2(p2) { }
-};
-
-struct GemmArgs
-{
-public:
- const CPUInfo *_ci;
- unsigned int _Msize;
- unsigned int _Nsize;
- unsigned int _Ksize;
- unsigned int _nbatches;
- unsigned int _nmulti;
- bool _trA;
- bool _trB;
- Activation _act;
- int _maxthreads;
- bool _pretransposed_hint;
- const GemmConfig *_cfg;
-
- GemmArgs(const CPUInfo *ci, const unsigned int M, const unsigned int N,
- const unsigned int K, const unsigned int nbatches,
- const unsigned int nmulti, const bool trA, const bool trB,
- Activation act, const int maxthreads,
- const bool pretransposed_hint, const GemmConfig *cfg=nullptr ) :
- _ci(ci), _Msize(M), _Nsize(N), _Ksize(K), _nbatches(nbatches), _nmulti(nmulti),
- _trA(trA), _trB(trB), _act(act), _maxthreads(maxthreads),
- _pretransposed_hint(pretransposed_hint), _cfg(cfg)
- {
- }
-};
-
-struct Requantize32
-{
-public:
- const int32_t *bias = nullptr;
- size_t bias_multi_stride = 0;
- int32_t a_offset = 0;
- int32_t b_offset = 0;
- int32_t c_offset = 0;
- bool per_channel_requant = false;
- int32_t per_layer_shift = 0;
- int32_t per_layer_mul = 0;
- const int32_t *per_channel_shifts = nullptr;
- const int32_t *per_channel_muls = nullptr;
- int32_t minval = 0;
- int32_t maxval = 0;
-
- Requantize32() = default;
-
- // Constructor for per-tensor quantization
- Requantize32(const int32_t *bias, size_t bias_multi_stride,
- int32_t a_offset, int32_t b_offset, int32_t c_offset,
- int32_t requant_shift, int32_t requant_mul,
- int32_t minv, int32_t maxv) :
- bias(bias), bias_multi_stride(bias_multi_stride),
- a_offset(a_offset), b_offset(b_offset), c_offset(c_offset),
- per_channel_requant(false), per_layer_shift(requant_shift), per_layer_mul(requant_mul),
- minval(minv), maxval(maxv)
- {
- }
-
- // Constructor for per-channel quantization
- Requantize32(const int32_t *bias, size_t bias_multi_stride,
- int32_t a_offset, int32_t b_offset, int32_t c_offset,
- const int32_t *requant_shifts, const int32_t *requant_muls,
- int32_t minv, int32_t maxv) :
- bias(bias), bias_multi_stride(bias_multi_stride),
- a_offset(a_offset), b_offset(b_offset), c_offset(c_offset),
- per_channel_requant(true), per_channel_shifts(requant_shifts), per_channel_muls(requant_muls),
- minval(minv), maxval(maxv)
- {
- }
-};
-
-struct Nothing
-{
-};
-
-template<typename Top, typename Tret>
-using UniqueGemmCommon = std::unique_ptr<GemmCommon<Top, Tret> >;
-
-/* Low level API calls.
- * These are implemented as 'GemmArgs' versions, or with the arguments explicitly listed. */
-
-/* get_gemm_method(): Given the templated types and provided parameters,
- * which is the preferred method to implement this GEMM? */
-template<typename Top, typename Tret, class OutputStage = Nothing>
-KernelDescription get_gemm_method(const GemmArgs &args, const OutputStage & ={});
-
-template<typename Top, typename Tret, class OutputStage = Nothing>
-UniqueGemmCommon<Top, Tret> gemm(const GemmArgs &args, const OutputStage & ={});
-
-template<typename Top, typename Tret, class OutputStage = Nothing>
-std::vector<KernelDescription> get_compatible_kernels(const GemmArgs &args, const OutputStage & ={});
-
-} // namespace arm_gemm
diff --git a/arm_compute/core/NEON/kernels/assembly/arm_gemm_compute_iface.hpp b/arm_compute/core/NEON/kernels/assembly/arm_gemm_compute_iface.hpp
deleted file mode 100644
index 6f345c1721..0000000000
--- a/arm_compute/core/NEON/kernels/assembly/arm_gemm_compute_iface.hpp
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright (c) 2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#pragma once
-
-#include "arm_compute/core/Window.h"
-#include "arm_compute/core/Dimensions.h"
-#include "arm_compute/core/NEON/kernels/arm_gemm/ndrange.hpp"
-
-#include <cassert>
-
-/* This file contains mapping between integral types used in arm_compute and arm_gemm
- * These two codebases both require a degree of separation for the sake of modularity
- * so maintain their own types which represent similar information.
- */
-
-namespace arm_gemm {
-
-//we want to unify the maximum number of dimensions used beween arm_gemm and arm compute library
-constexpr std::size_t ndrange_max =
- arm_compute::Dimensions<unsigned int>::num_max_dimensions;
-
-using ndrange_t=NDRange<ndrange_max>;
-using ndcoord_t=NDCoordinate<ndrange_max>;
-
-/* Converts an `arm_gemm::ndrange_t` to a `arm_compute::Window`
- *
- * As `NDRange<T>` does not not encode start positions, we specify
- * the start to be zero in the produced `arm_compute::Window`
- *
- * @param [ndr] the `arm_gemm::ndrange_t` we wish to convert into a `arm_compute::Window`
- * @returns an `arm_compute::Window` representing the same dimensional ranges as `ndr`
- */
-inline arm_compute::Window to_window(const ndrange_t& ndr) {
- arm_compute::Window win;
-
- for(unsigned int i = 0; i!=ndrange_max; ++i) {
- //populate the window with the dimensions of the NDRange
- win.set(i, arm_compute::Window::Dimension(0, ndr.get_size(i)));
- }
-
- return win;
-}
-
-/*
- * Converts an `arm_gemm::ndcoord_t` to a `arm_compute::Window`
- *
- * @param [ndc] the `arm_gemm::ndcoord_t` we wish to convert into a `arm_compute::Window`
- * @returns an `arm_compute::Window` representing the same dimensional ranges as `ndc`
- */
-inline arm_compute::Window to_window(const ndcoord_t& ndc) {
- arm_compute::Window win;
-
- for(unsigned int i = 0; i!=ndrange_max; ++i) {
- const auto start = ndc.get_position(i);
- const auto size = ndc.get_size(i);
- const auto stop = start + size;
-
- //populate the window with the dimensions of the NDRange
- win.set(i, arm_compute::Window::Dimension(start, stop));
- }
-
- return win;
-}
-
-/** Convert an `arm_compute::Window` to an `arm_gemm::NDRange` of the same max dimensions
- *
- * It should be noted that `arm_compute::Window` specifies a `start()` and an `end()`
- * where as `arm_gemm::ndrange_t` only has a size, as a result we store the delta between the range
- *
- * @param [win] the `arm_compute::Window` we want to convert to `arm_gemm::ndrange_t`
- * @return the resultant ndrange_t
- */
-inline ndrange_t to_ndrange(const arm_compute::Window& win) {
- return {
- static_cast<unsigned int>(win[0].end() - win[0].start()),
- static_cast<unsigned int>(win[1].end() - win[1].start()),
- static_cast<unsigned int>(win[2].end() - win[2].start()),
- static_cast<unsigned int>(win[3].end() - win[3].start()),
- static_cast<unsigned int>(win[4].end() - win[4].start()),
- static_cast<unsigned int>(win[5].end() - win[5].start())
- };
-}
-
-/** Convert an `arm_compute::Window` to an `arm_gemm::NDCoord` of the same max dimensions
- *
- * @param [win] the `arm_compute::Window` we want to convert to `arm_gemm::ndcoord_t`
- * @return the resultant ndcoord_t
- */
-inline ndcoord_t to_ndcoord(const arm_compute::Window& win) {
- return {
- { static_cast<unsigned int>(win[0].start()), static_cast<unsigned int>(win[0].end() - win[0].start()) },
- { static_cast<unsigned int>(win[1].start()), static_cast<unsigned int>(win[1].end() - win[1].start()) },
- { static_cast<unsigned int>(win[2].start()), static_cast<unsigned int>(win[2].end() - win[2].start()) },
- { static_cast<unsigned int>(win[3].start()), static_cast<unsigned int>(win[3].end() - win[3].start()) },
- { static_cast<unsigned int>(win[4].start()), static_cast<unsigned int>(win[4].end() - win[4].start()) },
- { static_cast<unsigned int>(win[5].start()), static_cast<unsigned int>(win[5].end() - win[5].start()) }
- };
-}
-
-} //namespace arm_gemm
diff --git a/arm_compute/core/NEON/kernels/assembly/arm_gemm_local.hpp b/arm_compute/core/NEON/kernels/assembly/arm_gemm_local.hpp
deleted file mode 100644
index 8d3db4adf2..0000000000
--- a/arm_compute/core/NEON/kernels/assembly/arm_gemm_local.hpp
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#pragma once
-
-/* This file is used to configure integration-specific aspects of arm_gemm into ACL */
-
-#include "arm_compute/core/CPP/CPPTypes.h"
-
-namespace arm_gemm
-{
-using CPUModel = arm_compute::CPUModel;
-using CPUInfo = arm_compute::CPUInfo;
-} // namespace arm_compute
-
-
-
diff --git a/arm_compute/core/NEON/kernels/assembly/gemm_common.hpp b/arm_compute/core/NEON/kernels/assembly/gemm_common.hpp
deleted file mode 100644
index ea9b524e15..0000000000
--- a/arm_compute/core/NEON/kernels/assembly/gemm_common.hpp
+++ /dev/null
@@ -1,201 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#pragma once
-
-#include "arm_compute/core/NEON/kernels/assembly/arm_gemm_compute_iface.hpp"
-
-#include <cstddef>
-#include <cassert>
-
-#define UNUSED(x) (void)(x)
-
-namespace arm_gemm {
-
-// Abstract class for the GEMM/GEMV functions.
-//
-// GEMM implementations may be "native" (never require any input
-// permutation), "pretransposed" (require permutation up-front) or require
-// working space (permute as they go along). This interface should support
-// all of them.
-
-// The real GemmCommon class is templated based on the operand and return
-// type. This is an interface class which is independent of those types.
-class IGemmCommon {
-public:
- /* Pass in the pointers to the arrays to be operated on and their
- * strides. This "generic" version uses void *s, the preferred version
- * is the one provided by templated GemmCommon (below) which takes
- * appropriately typed pointers. If B is pretransposed (see below) then
- * the settings for B here are ignored.
- */
- virtual void set_arrays_generic(const void *A, const int lda, const int A_batch_stride, const int A_multi_stride,
- const void *B, const int ldb, /* batches share B */ const int B_multi_stride,
- void *C, const int ldc, const int C_batch_stride, const int C_multi_stride,
- const void *bias, /* no row or batch stride needed */ const int bias_multi_stride) = 0;
-
- /** @returns an ndrange containing ranges of the compute space which can be
- * broken up and parallelised over
- */
- virtual ndrange_t get_window_size() const = 0;
-
- /* The maximum thread count is specified when the GEMM is created. Some
- * implementations need to know how many threads will actually run in
- * order to work properly.
- *
- * In some cases, after creating the GEMM the number of threads needs to
- * be reduced (e.g. not enough work to split across threads). This
- * method allows the number of actual threads to be run to be set (must
- * be equal or lower).
- *
- * This has an empty default implementation, as GEMMs which don't care
- * about thread count can safely ignore this.
- */
- virtual void set_nthreads(int) { };
-
- /* Whether this GEMM can be dynamically scheduled or not. */
- virtual bool supports_dynamic_scheduling() const { return false; }
-
- /** Main execute member fucntion
- * @param [in] work_range specifies the range of work we want to be computed, total range defined by get_window_size()
- * @param [in] thread_locator where are we inside of the thread space
- * @naram [in] threadid a unique threadid
- */
- virtual void execute(const ndcoord_t& work_range, const ndcoord_t& thread_locator, int threadid) = 0;
-
- /*** Working space interface (optional) ***/
- /* Total number of bytes of temporary working space needed. If zero, it's not necessary to call set_working_space(). */
- virtual size_t get_working_size() const { return 0; }
- /* Provide working space buffer - the void * passed in must remain allocated for the duration of any execute calls. */
- virtual void set_working_space(void *) { };
-
- /*** "Pretransposed" interface (optional) ***/
- /* Is this object set up for pretranspose? If so, pretranspose_array() needs to be called before execute(); */
- virtual bool B_is_pretransposed() const { return false; }
- /* Does pretranspose still need to be done? */
- virtual bool B_pretranspose_required() const { return false; }
- /* Total number of bytes of space needed for pretransposed arrays. */
- virtual size_t get_B_pretransposed_array_size() const { return 0; }
- /* Perform pretranspose - arguments are output, input, input row stride and input multi stride. */
- /* The "real" version of this depends on the templated operand type (see below). */
- virtual void pretranspose_B_array_generic(void *, const void *, const int, const int) = 0;
- /* Set pretransposed data - the void * passed in must previously have been passed to pretranspose_B_array() for the same or a similar GEMM. */
- virtual void set_pretransposed_B_data(void *) { }
-
- /*** "Quantized bias" interface (optional) ***/
- /* Set the bias vector for quantized GEMMs */
- virtual void set_quantized_bias(const int32_t *bias, size_t bias_multi_stride)
- {
- UNUSED(bias);
- UNUSED(bias_multi_stride);
- }
-
- // Destructor
- virtual ~IGemmCommon() { }
-};
-
-/* "Real" GemmCommon class which is templated on the operand and return types.
- *
- * In addition to correctly typed versions of the functions that operate on
- * operand and return data, this class provides a default implementation of
- * 'set_arrays' to capture the provided arguments in protected class
- * members, as essentially any implementation will need these.
- */
-template<typename To, typename Tr>
-class GemmCommon : public IGemmCommon {
-protected:
- const To *_Aptr=nullptr;
- int _lda=0;
- int _A_batch_stride=0;
- int _A_multi_stride=0;
- const To *_Bptr=nullptr;
- int _ldb=0;
- int _B_multi_stride=0;
- Tr *_Cptr=nullptr;
- int _ldc=0;
- int _C_batch_stride=0;
- int _C_multi_stride=0;
- const Tr *_bias=nullptr;
- int _bias_multi_stride=0;
-
-public:
- /* Pass in the pointers to the arrays to be operated on and their
- * strides (templated version with appropriate types). */
- virtual void set_arrays(const To *A, const int lda, const int A_batch_stride, const int A_multi_stride,
- const To *B, const int ldb, /* batches share B */ const int B_multi_stride,
- Tr *C, const int ldc, const int C_batch_stride, const int C_multi_stride,
- const Tr *bias, /* no row or batch stride needed */ const int bias_multi_stride) {
- _Aptr = A;
- _lda = lda;
- _A_batch_stride = A_batch_stride;
- _A_multi_stride = A_multi_stride;
- _Bptr = B;
- _ldb = ldb;
- _B_multi_stride = B_multi_stride;
- _Cptr = C;
- _ldc = ldc;
- _C_batch_stride = C_batch_stride;
- _C_multi_stride = C_multi_stride;
- _bias = bias;
- _bias_multi_stride = bias_multi_stride;
- }
-
- /* Implementation of the void * overload which casts its arguments to the appropriate type. */
- void set_arrays_generic(const void *A, const int lda, const int A_batch_stride, const int A_multi_stride,
- const void *B, const int ldb, /* batches share B */ const int B_multi_stride,
- void *C, const int ldc, const int C_batch_stride, const int C_multi_stride,
- const void *bias, /* no row or batch stride needed */ const int bias_multi_stride) override {
- set_arrays(static_cast<const To *>(A), lda, A_batch_stride, A_multi_stride,
- static_cast<const To *>(B), ldb, B_multi_stride,
- static_cast<Tr *>(C), ldc, C_batch_stride, C_multi_stride,
- static_cast<const Tr *>(bias), bias_multi_stride);
- }
-
- /*** "Pretransposed" interface ***/
-
- /* Perform pretranspose - the void * passed in must remain allocated for the duration of any execute calls. */
- /* Arguments are: output buffer pointer, source pointer, source row stride, source multi stride */
- virtual void pretranspose_B_array(void *, const To *, const int, const int) { };
-
- /* Implementation of the void * overload which casts its arguments to the appropriate type. */
- void pretranspose_B_array_generic(void *out, const void *in, const int row_stride, const int multi_stride) override {
- pretranspose_B_array(out, static_cast<const To *>(in), row_stride, multi_stride);
- }
-};
-
-template<typename GemmKernel>
-inline
-int unsigned get_total_window_size(const GemmKernel& kernel)
-{
- auto window=kernel.get_window_size();
-
- unsigned int total = 1;
- for(unsigned i = 0; i != arm_gemm::ndrange_max; ++i)
- {
- total *= window.get_size(i);
- }
-
- return total;
-}
-
-} // namespace arm_gemm
diff --git a/arm_compute/core/NEON/kernels/convolution/common/activation.hpp b/arm_compute/core/NEON/kernels/convolution/common/activation.hpp
deleted file mode 100644
index 091b1652c9..0000000000
--- a/arm_compute/core/NEON/kernels/convolution/common/activation.hpp
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#pragma once
-
-namespace neon_convolution_kernels
-{
-
-enum class ActivationFunction
-{
- None,
- ReLU,
- ReLU6,
-};
-
-}
diff --git a/arm_compute/core/NEON/kernels/convolution/common/alloc.hpp b/arm_compute/core/NEON/kernels/convolution/common/alloc.hpp
deleted file mode 100644
index 799e95d3e6..0000000000
--- a/arm_compute/core/NEON/kernels/convolution/common/alloc.hpp
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#pragma once
-
-#ifdef ALLOC_ALIGN
-#define ALLOCATE(x) aligned_alloc(ALLOC_ALIGN, x)
-#else
-#define ALLOCATE(x) malloc(x)
-#endif
diff --git a/arm_compute/core/NEON/kernels/convolution/common/arm.hpp b/arm_compute/core/NEON/kernels/convolution/common/arm.hpp
deleted file mode 100644
index 90e7828553..0000000000
--- a/arm_compute/core/NEON/kernels/convolution/common/arm.hpp
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/** Sets the macro __arm_any__ if compiling for Aarch32 or Aarch64.
- * Includes `arm_neon.h` if compiling for either architecture.
- */
-
-#ifdef __arm__
-#define __arm_any__
-#endif // __arm__
-
-#ifdef __aarch64__
-#define __arm_any__
-#endif // __aarch64__
-
-#ifdef __arm_any__
-#include <arm_neon.h>
-#endif // __arm_any__
diff --git a/arm_compute/core/NEON/kernels/convolution/common/convolution.hpp b/arm_compute/core/NEON/kernels/convolution/common/convolution.hpp
deleted file mode 100644
index 2ab2597785..0000000000
--- a/arm_compute/core/NEON/kernels/convolution/common/convolution.hpp
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#pragma once
-
-enum PaddingType {
- PADDING_SAME, PADDING_VALID
-};
diff --git a/arm_compute/core/NEON/kernels/convolution/common/padding.hpp b/arm_compute/core/NEON/kernels/convolution/common/padding.hpp
deleted file mode 100644
index 97b21e0ff5..0000000000
--- a/arm_compute/core/NEON/kernels/convolution/common/padding.hpp
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#pragma once
-
-#include <cstddef>
-
-// Utilities for copying tensor tiles and adding/removing padding.
-namespace padding
-{
-
-/* Copy a tile and apply padding to the output copy.
- */
-template <typename T>
-void copy_and_pad_tile(
- unsigned int tile_rows,
- unsigned int tile_cols,
- unsigned int n_channels,
- const T *inptr,
- unsigned int in_row_stride,
- unsigned int in_col_stride,
- T* outptr,
- unsigned int out_row_stride,
- unsigned int out_col_stride,
- unsigned int pad_top,
- unsigned int pad_left,
- unsigned int pad_bottom,
- unsigned int pad_right,
- T pad_value=static_cast<T>(0)
-);
-
-/** Copy a tile and remove padding elements in the output.
- */
-template <unsigned int TileRows, unsigned int TileCols>
-class CopyCropped
-{
- public:
- static void execute(
- size_t size, // Amount of data to copy
- const void *inptr,
- size_t in_row_stride,
- size_t in_col_stride,
- void *outptr,
- size_t out_row_stride,
- size_t out_col_stride,
- unsigned int pad_top,
- unsigned int pad_left,
- unsigned int pad_bottom,
- unsigned int pad_right
- );
-};
-
-template <typename T>
-void crop_and_copy_tile(
- unsigned int tile_rows,
- unsigned int tile_cols,
- unsigned int n_channels,
- const T *inptr,
- unsigned int in_row_stride,
- unsigned int in_col_stride,
- T *outptr,
- unsigned int out_row_stride,
- unsigned int out_col_stride,
- unsigned int crop_top,
- unsigned int crop_left,
- unsigned int crop_bottom,
- unsigned int crop_right
-);
-
-}
diff --git a/arm_compute/core/NEON/kernels/convolution/common/perf.h b/arm_compute/core/NEON/kernels/convolution/common/perf.h
deleted file mode 100644
index 3c0d36646d..0000000000
--- a/arm_compute/core/NEON/kernels/convolution/common/perf.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (c) 2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#pragma once
-
-/* Prototypes from perf.c */
-
-void start_counter(int fd);
-long long get_counter(int fd);
-long long stop_counter(int fd);
-int open_instruction_counter(void);
-int open_cycle_counter(void);
diff --git a/arm_compute/core/NEON/kernels/convolution/common/qasymm8.hpp b/arm_compute/core/NEON/kernels/convolution/common/qasymm8.hpp
deleted file mode 100644
index 6029cb67e3..0000000000
--- a/arm_compute/core/NEON/kernels/convolution/common/qasymm8.hpp
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#pragma once
-#include <cstdint>
-
-namespace qasymm8
-{
-
-struct QAsymm8Params
-{
- uint8_t quantize(float value) const;
- float dequantize(uint8_t value) const;
-
- uint8_t offset;
- float scale;
-};
-
-struct QAsymm8RescaleParams
-{
- static QAsymm8RescaleParams make_rescale_params(
- const QAsymm8Params& weight_quant,
- const QAsymm8Params& input_quant,
- const QAsymm8Params& output_quant
- );
-
- QAsymm8RescaleParams(int32_t shift, int32_t multiplier, float rescale);
-
- const int32_t shift, multiplier;
- const float rescale;
-};
-
-}
diff --git a/arm_compute/core/NEON/kernels/convolution/common/qsymm8.hpp b/arm_compute/core/NEON/kernels/convolution/common/qsymm8.hpp
deleted file mode 100644
index 41bfbe4d8a..0000000000
--- a/arm_compute/core/NEON/kernels/convolution/common/qsymm8.hpp
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#pragma once
-#include <cstdint>
-#include <vector>
-#include "qasymm8.hpp"
-
-
-namespace qsymm8 {
-
-struct QSymm8Params {
- int8_t quantize(float value) const;
- float dequantize(int8_t value) const;
-
- float scale;
-};
-
-struct QSymm8RescaleParams {
- static QSymm8RescaleParams
- make_rescale_params(const QSymm8Params &weight_quant,
- const QSymm8Params &input_quant,
- const QSymm8Params &output_quant);
-
- QSymm8RescaleParams(int32_t shift, int32_t multiplier, float rescale);
-
- const int32_t shift, multiplier;
- const float rescale;
-};
-
-struct QSymm8PerChannelParams {
- int8_t quantize(float value, float scale) const;
- float dequantize(int8_t value, float scale) const;
-
- std::vector<float> scales;
-};
-
-struct QSymm8PerChannelRescaleParams {
- static QSymm8PerChannelRescaleParams
- make_rescale_params(const QSymm8PerChannelParams &weight_quant,
- const QSymm8PerChannelParams &input_quant,
- const QSymm8PerChannelParams &output_quant);
-
- static QSymm8PerChannelRescaleParams
- make_rescale_params(const QSymm8PerChannelParams &weight_quant,
- const qasymm8::QAsymm8Params &input_quant,
- const qasymm8::QAsymm8Params &output_quant);
-
- QSymm8PerChannelRescaleParams(std::vector<int32_t>& shift, std::vector<int32_t>& multiplier, std::vector<float>& rescale);
-
- std::vector<int32_t> shifts, multipliers;
- std::vector<float> rescales;
-};
-
-} // namespace qsymm8
diff --git a/arm_compute/core/NEON/kernels/convolution/common/shims.hpp b/arm_compute/core/NEON/kernels/convolution/common/shims.hpp
deleted file mode 100644
index 243d305e19..0000000000
--- a/arm_compute/core/NEON/kernels/convolution/common/shims.hpp
+++ /dev/null
@@ -1,749 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#pragma once
-#ifndef DOXYGEN_SKIP_THIS
-#include <cstdint>
-#endif /* DOXYGEN_SKIP_THIS */
-#include "arm.hpp"
-
-namespace reorder {
-/** Re-order a tensor from NCHW format to NHWC.
- *
- * @note The stride parameters are optional and are provided to allow padding in either input or output tensors.
- *
- * @param[in] in Input tensor in NCHW format.
- * @param[out] out Output tensor, to be written in NHWC format.
- * @param n_batches Number of batches in the tensors.
- * @param n_channels Number of channels in the tensors
- * @param n_rows Height of the tensor
- * @param n_cols Width of the tensor
- * @param in_batch_stride Stride over batches in the input tensor. If `0` defaults to `n_channels * in_channel_stride`.
- * @param in_channel_stride Stride over channels in the input tensor. If `0` defaults to `n_rows * in_row_stride`.
- * @param in_row_stride Stride over rows in the input tensor. If `0` defaults to `n_cols`.
- * @param out_batch_stride Stride over batches in the output tensor. If `0` defaults to `n_rows * out_row_stride`.
- * @param out_row_stride Stride over rows in the output tensor. If `0` defaults to `n_cols * out_col_stride`.
- * @param out_col_stride Stride over columns in the output tensor. If `0` defaults to `n_channels`.
- */
-template <typename T>
-inline void nchw_to_nhwc(
- const T* const in,
- T* const out,
- const int n_batches,
- const int n_channels,
- const int n_rows,
- const int n_cols,
- int in_batch_stride=0,
- int in_channel_stride=0,
- int in_row_stride=0,
- int out_batch_stride=0,
- int out_row_stride=0,
- int out_col_stride=0
-);
-
-/** Re-order a tensor from NHWC format to NCHW.
- *
- * @note The stride parameters are optional and are provided to allow padding in either input or output tensors.
- *
- * @param[in] in Input tensor in NHWC format.
- * @param[out] out Output tensor, to be written in NCHW format.
- * @param n_batches Number of batches in the tensors.
- * @param n_rows Height of the tensor
- * @param n_cols Width of the tensor
- * @param n_channels Number of channels in the tensors
- * @param in_batch_stride Stride over batches in the input tensor. If `0` defaults to `n_rows * in_row_stride`.
- * @param in_row_stride Stride over rows in the input tensor. If `0` defaults to `n_cols * in_col_stride`.
- * @param in_col_stride Stride over columns in the input tensor. If `0` defaults to `n_channels`.
- * @param out_batch_stride Stride over batches in the output tensor. If `0` defaults to `n_channels * out_channel_stride`.
- * @param out_channel_stride Stride over channels in the output tensor. If `0` defaults to `n_rows * out_row_stride`.
- * @param out_row_stride Stride over rows in the output tensor. If `0` defaults to `n_cols`.
- */
-template <typename T>
-inline void nhwc_to_nchw(
- const T* const in, // Input data in NHWC form
- T* const out, // Output data in NCHW form
- const int n_batches,
- const int n_rows,
- const int n_cols,
- const int n_channels,
- int in_batch_stride=0,
- int in_row_stride=0,
- int in_col_stride=0,
- int out_batch_stride=0,
- int out_channel_stride=0,
- int out_row_stride=0
-);
-
-/** Re-order a weight tensor from [Output feature map x Input feature map x
- * Height x Width] format to [Height x Width x Input feature map x Output
- * feature map] format.
- */
-template <typename T>
-inline void ofm_ifm_h_w_to_h_w_ifm_ofm(
- const T* const in, // Input in [Output x Input x Height x Width] form
- T* const out, // Output in [Height x Width x Input x Output] form
- const int n_output_feature_maps,
- const int n_input_feature_maps,
- const int n_rows,
- const int n_cols,
- int in_output_feature_map_stride=0,
- int in_input_feature_map_stride=0,
- int in_row_stride=0,
- int out_row_stride=0,
- int out_col_stride=0,
- int out_input_feature_map_stride=0
-);
-
-/** Re-order a weight tensor from [Height x Width x Input feature map x Output
- * feature map] format to [Output feature map x Input feature map x Height x
- * Width] format.
- */
-template <typename T>
-inline void h_w_ifm_ofm_to_ofm_ifm_h_w(
- const T* const in, // Input in [Height x Width x Input x Output] form
- T* const out, // Output in [Output x Input x Height x Width] form
- const int n_rows,
- const int n_cols,
- const int n_input_feature_maps,
- const int n_output_feature_maps,
- int in_row_stride=0,
- int in_col_stride=0,
- int in_input_feature_map_stride=0,
- int out_output_feature_map_stride=0,
- int out_input_feature_map_stride=0,
- int out_row_stride=0
-);
-
-/*****************************************************************************/
-/* 32-bit implementation : NCHW -> NHWC
- */
-template <>
-inline void nchw_to_nhwc(
- const int32_t* const in,
- int32_t* const out,
- const int n_batches,
- const int n_channels,
- const int n_rows,
- const int n_cols,
- int in_batch_stride,
- int in_channel_stride,
- int in_row_stride,
- int out_batch_stride,
- int out_row_stride,
- int out_col_stride
-)
-{
- typedef int32_t T;
-
- // Fill in the stride values
- in_row_stride = (in_row_stride) ? in_row_stride : n_cols;
- in_channel_stride = (in_channel_stride) ? in_channel_stride
- : n_rows * in_row_stride;
- in_batch_stride = (in_batch_stride) ? in_batch_stride
- : n_channels * in_channel_stride;
-
- out_col_stride = (out_col_stride) ? out_col_stride : n_channels;
- out_row_stride = (out_row_stride) ? out_row_stride : n_cols * out_col_stride;
- out_batch_stride = (out_batch_stride) ? out_batch_stride
- : n_rows * out_row_stride;
-
- // Perform the re-ordering
- for (int n = 0; n < n_batches; n++)
- {
- const T* const in_batch = in + n*in_batch_stride;
- T* const out_batch = out + n*out_batch_stride;
-
- for (int i = 0; i < n_rows; i++)
- {
- const T* const in_row = in_batch + i*in_row_stride;
- T* const out_row = out_batch + i*out_row_stride;
-
- int j = 0, j_remaining = n_cols;
-#ifdef __arm_any__
- for (; j_remaining >= 4; j += 4, j_remaining -= 4)
- {
- int c = 0, c_remaining = n_channels;
- for (; c_remaining >= 4; c += 4, c_remaining -= 4)
- {
- // Read 4 channels worth of 4 columns, then zip to produce 4 columns
- // worth of 4 channels.
- int32x4_t channel_pixels[4];
- channel_pixels[0] = vld1q_s32(in_row + (c + 0)*in_channel_stride + j);
- channel_pixels[1] = vld1q_s32(in_row + (c + 1)*in_channel_stride + j);
- channel_pixels[2] = vld1q_s32(in_row + (c + 2)*in_channel_stride + j);
- channel_pixels[3] = vld1q_s32(in_row + (c + 3)*in_channel_stride + j);
-
- const auto zip1 = vzipq_s32(channel_pixels[0], channel_pixels[2]);
- const auto zip2 = vzipq_s32(channel_pixels[1], channel_pixels[3]);
- const auto out_0 = vzipq_s32(zip1.val[0], zip2.val[0]);
- const auto out_1 = vzipq_s32(zip1.val[1], zip2.val[1]);
-
- vst1q_s32(out_row + (j + 0)*out_col_stride + c, out_0.val[0]);
- vst1q_s32(out_row + (j + 1)*out_col_stride + c, out_0.val[1]);
- vst1q_s32(out_row + (j + 2)*out_col_stride + c, out_1.val[0]);
- vst1q_s32(out_row + (j + 3)*out_col_stride + c, out_1.val[1]);
- }
- for (; c_remaining; c++, c_remaining--)
- {
- for (int _j = 0; _j < 4; _j++)
- {
- const T* const in_col = in_row + j + _j;
- T* const out_col = out_row + (j + _j)*out_col_stride;
- const T* const in_channel = in_col + c*in_channel_stride;
- out_col[c] = *(in_channel);
- }
- }
- }
- for (; j_remaining >= 2; j += 2, j_remaining -= 2)
- {
- int c = 0, c_remaining = n_channels;
- for (; c_remaining >= 2; c += 2, c_remaining -= 2)
- {
- // Read 2 channels worth of 2 columns, then zip to produce 2 columns
- // worth of 2 channels.
- int32x2_t channel_pixels[2];
- channel_pixels[0] = vld1_s32(in_row + (c + 0)*in_channel_stride + j);
- channel_pixels[1] = vld1_s32(in_row + (c + 1)*in_channel_stride + j);
-
- const auto output = vzip_s32(channel_pixels[0], channel_pixels[1]);
-
- vst1_s32(out_row + (j + 0)*out_col_stride + c, output.val[0]);
- vst1_s32(out_row + (j + 1)*out_col_stride + c, output.val[1]);
- }
- for (; c_remaining; c++, c_remaining--)
- {
- for (int _j = 0; _j < 2; _j++)
- {
- const T* const in_col = in_row + j + _j;
- T* const out_col = out_row + (j + _j)*out_col_stride;
- const T* const in_channel = in_col + c*in_channel_stride;
- out_col[c] = *(in_channel);
- }
- }
- }
-#endif // __arm_any__
- for (; j_remaining; j++, j_remaining--)
- {
- const T* const in_col = in_row + j;
- T* const out_col = out_row + j*out_col_stride;
-
- for (int c = 0; c < n_channels; c++)
- {
- const T* const in_channel = in_col + c*in_channel_stride;
- out_col[c] = *(in_channel);
- }
- }
- }
- }
-}
-
-template <>
-inline void nchw_to_nhwc(
- const uint32_t* const in,
- uint32_t* const out,
- const int n_batches,
- const int n_channels,
- const int n_rows,
- const int n_cols,
- int in_batch_stride,
- int in_channel_stride,
- int in_row_stride,
- int out_batch_stride,
- int out_row_stride,
- int out_col_stride
-)
-{
- nchw_to_nhwc(
- reinterpret_cast<const int32_t*>(in),
- reinterpret_cast<int32_t*>(out),
- n_batches, n_channels, n_rows, n_cols,
- in_batch_stride, in_channel_stride, in_row_stride,
- out_batch_stride, out_row_stride, out_col_stride
- );
-}
-
-template <>
-inline void nchw_to_nhwc(
- const float* const in,
- float* const out,
- const int n_batches,
- const int n_channels,
- const int n_rows,
- const int n_cols,
- int in_batch_stride,
- int in_channel_stride,
- int in_row_stride,
- int out_batch_stride,
- int out_row_stride,
- int out_col_stride
-)
-{
- nchw_to_nhwc(
- reinterpret_cast<const int32_t*>(in),
- reinterpret_cast<int32_t*>(out),
- n_batches, n_channels, n_rows, n_cols,
- in_batch_stride, in_channel_stride, in_row_stride,
- out_batch_stride, out_row_stride, out_col_stride
- );
-}
-
-/*****************************************************************************/
-/* Generic implementation : NCHW -> NHWC
- */
-template <typename T>
-inline void nchw_to_nhwc(
- const T* const in,
- T* const out,
- const int n_batches,
- const int n_channels,
- const int n_rows,
- const int n_cols,
- int in_batch_stride,
- int in_channel_stride,
- int in_row_stride,
- int out_batch_stride,
- int out_row_stride,
- int out_col_stride
-)
-{
- // Fill in the stride values
- in_row_stride = (in_row_stride) ? in_row_stride : n_cols;
- in_channel_stride = (in_channel_stride) ? in_channel_stride
- : n_rows * in_row_stride;
- in_batch_stride = (in_batch_stride) ? in_batch_stride
- : n_channels * in_channel_stride;
-
- out_col_stride = (out_col_stride) ? out_col_stride : n_channels;
- out_row_stride = (out_row_stride) ? out_row_stride : n_cols * out_col_stride;
- out_batch_stride = (out_batch_stride) ? out_batch_stride
- : n_rows * out_row_stride;
-
- // Perform the re-ordering
- for (int n = 0; n < n_batches; n++)
- {
- const T* const in_batch = in + n*in_batch_stride;
- T* const out_batch = out + n*out_batch_stride;
-
- for (int i = 0; i < n_rows; i++)
- {
- const T* const in_row = in_batch + i*in_row_stride;
- T* const out_row = out_batch + i*out_row_stride;
-
- for (int j = 0; j < n_cols; j++)
- {
- const T* const in_col = in_row + j;
- T* const out_col = out_row + j*out_col_stride;
-
- for (int c = 0; c < n_channels; c++)
- {
- const T* const in_channel = in_col + c*in_channel_stride;
- out_col[c] = *(in_channel);
- }
- }
- }
- }
-}
-
-/*****************************************************************************/
-/* 32-bit implementation : NHWC -> NCHW
- */
-template <>
-inline void nhwc_to_nchw(
- const int32_t* const in, // Input data in NHWC form
- int32_t* const out, // Output data in NCHW form
- const int n_batches,
- const int n_rows,
- const int n_cols,
- const int n_channels,
- int in_batch_stride,
- int in_row_stride,
- int in_col_stride,
- int out_batch_stride,
- int out_channel_stride,
- int out_row_stride
-)
-{
- typedef int32_t T;
-
- // Fill in stride values
- in_col_stride = (in_col_stride) ? in_col_stride : n_channels;
- in_row_stride = (in_row_stride) ? in_row_stride : n_cols * in_col_stride;
- in_batch_stride = (in_batch_stride) ? in_batch_stride
- : n_rows * in_row_stride;
-
- out_row_stride = (out_row_stride) ? out_row_stride : n_cols;
- out_channel_stride = (out_channel_stride) ? out_channel_stride
- : n_rows * out_row_stride;
- out_batch_stride = (out_batch_stride) ? out_batch_stride
- : n_channels * out_channel_stride;
-
- // Perform the re-ordering
- // For every batch
- for (int n = 0; n < n_batches; n++)
- {
- const T* const in_batch = in + n*in_batch_stride;
- T* const out_batch = out + n*out_batch_stride;
-
- // For every row
- for (int i = 0; i < n_rows; i++)
- {
- const T* const in_i = in_batch + i*in_row_stride;
- T* const out_i = out_batch + i*out_row_stride;
-
- // For every column, beginning with chunks of 4
- int j = 0, j_remaining = n_cols;
-#ifdef __arm_any__
- for (; j_remaining >= 4; j += 4, j_remaining -=4)
- {
- // For every channel, beginning with chunks of 4
- int c = 0, c_remaining = n_channels;
- for (; c_remaining >= 4; c += 4, c_remaining -= 4)
- {
- // Read 4 columns worth of 4 channels then zip to produce 4 channels
- // worth of 4 columns.
- int32x4_t pixel_channels[4];
- pixel_channels[0] = vld1q_s32(in_i + (j + 0)*in_col_stride + c);
- pixel_channels[1] = vld1q_s32(in_i + (j + 1)*in_col_stride + c);
- pixel_channels[2] = vld1q_s32(in_i + (j + 2)*in_col_stride + c);
- pixel_channels[3] = vld1q_s32(in_i + (j + 3)*in_col_stride + c);
-
- const auto zip1 = vzipq_s32(pixel_channels[0], pixel_channels[2]);
- const auto zip2 = vzipq_s32(pixel_channels[1], pixel_channels[3]);
- const auto out_0 = vzipq_s32(zip1.val[0], zip2.val[0]);
- const auto out_1 = vzipq_s32(zip1.val[1], zip2.val[1]);
-
- vst1q_s32(out_i + j + (c + 0)*out_channel_stride, out_0.val[0]);
- vst1q_s32(out_i + j + (c + 1)*out_channel_stride, out_0.val[1]);
- vst1q_s32(out_i + j + (c + 2)*out_channel_stride, out_1.val[0]);
- vst1q_s32(out_i + j + (c + 3)*out_channel_stride, out_1.val[1]);
- }
- for (; c_remaining; c++, c_remaining--)
- {
- for (int _j = 0; _j < 4; _j++)
- {
- const T* const in_j = in_i + (j + _j)*in_col_stride;
- T* const out_j = out_i + (j + _j);
-
- const T* const in_channel = in_j + c;
- T* const out_channel = out_j + c*out_channel_stride;
- *(out_channel) = *(in_channel);
- }
- }
- }
- for (; j_remaining >= 2; j += 2, j_remaining -=2)
- {
- int c = 0, c_remaining = n_channels;
- for (; c_remaining >= 2; c += 2, c_remaining -= 2)
- {
- // Read 2 columns worth of 2 channels then zip to produce 2 channels
- // worth of 2 columns.
- int32x2_t pixel_channels[2];
- pixel_channels[0] = vld1_s32(in_i + (j + 0)*in_col_stride + c);
- pixel_channels[1] = vld1_s32(in_i + (j + 1)*in_col_stride + c);
-
- const auto output = vzip_s32(pixel_channels[0], pixel_channels[1]);
-
- vst1_s32(out_i + j + (c + 0)*out_channel_stride, output.val[0]);
- vst1_s32(out_i + j + (c + 1)*out_channel_stride, output.val[1]);
- }
- for (; c_remaining; c++, c_remaining--)
- {
- for (int _j = 0; _j < 2; _j++)
- {
- const T* const in_j = in_i + (j + _j)*in_col_stride;
- T* const out_j = out_i + (j + _j);
-
- const T* const in_channel = in_j + c;
- T* const out_channel = out_j + c*out_channel_stride;
- *(out_channel) = *(in_channel);
- }
- }
- }
-#endif // __arm_any__
- for (; j_remaining; j++, j_remaining--)
- {
- const T* const in_j = in_i + j*in_col_stride;
- T* const out_j = out_i + j;
-
- // For every channel
- for (int c = 0; c < n_channels; c++)
- {
- const T* const in_channel = in_j + c;
- T* const out_channel = out_j + c*out_channel_stride;
- *(out_channel) = *(in_channel);
- }
- }
- }
- }
-}
-
-template <>
-inline void nhwc_to_nchw(
- const uint32_t* const in, // Input data in NHWC form
- uint32_t* const out, // Output data in NCHW form
- const int n_batches,
- const int n_rows,
- const int n_cols,
- const int n_channels,
- int in_batch_stride,
- int in_row_stride,
- int in_col_stride,
- int out_batch_stride,
- int out_channel_stride,
- int out_row_stride
-)
-{
- // Redirect to generic 32-bit implementation
- nhwc_to_nchw(
- reinterpret_cast<const int32_t*>(in),
- reinterpret_cast<int32_t*>(out),
- n_batches, n_rows, n_cols, n_channels,
- in_batch_stride, in_row_stride, in_col_stride,
- out_batch_stride, out_channel_stride, out_row_stride
- );
-}
-
-template <>
-inline void nhwc_to_nchw(
- const float* const in, // Input data in NHWC form
- float* const out, // Output data in NCHW form
- const int n_batches,
- const int n_rows,
- const int n_cols,
- const int n_channels,
- int in_batch_stride,
- int in_row_stride,
- int in_col_stride,
- int out_batch_stride,
- int out_channel_stride,
- int out_row_stride
-)
-{
- // Redirect to generic 32-bit implementation
- nhwc_to_nchw(
- reinterpret_cast<const int32_t*>(in),
- reinterpret_cast<int32_t*>(out),
- n_batches, n_rows, n_cols, n_channels,
- in_batch_stride, in_row_stride, in_col_stride,
- out_batch_stride, out_channel_stride, out_row_stride
- );
-}
-
-/*****************************************************************************/
-/* Generic implementation : NHWC -> NCHW
- */
-template <typename T>
-inline void nhwc_to_nchw(
- const T* const in, // Input data in NHWC form
- T* const out, // Output data in NCHW form
- const int n_batches,
- const int n_rows,
- const int n_cols,
- const int n_channels,
- int in_batch_stride,
- int in_row_stride,
- int in_col_stride,
- int out_batch_stride,
- int out_channel_stride,
- int out_row_stride
-)
-{
- // Fill in stride values
- in_col_stride = (in_col_stride) ? in_col_stride : n_channels;
- in_row_stride = (in_row_stride) ? in_row_stride : n_cols * in_col_stride;
- in_batch_stride = (in_batch_stride) ? in_batch_stride
- : n_rows * in_row_stride;
-
- out_row_stride = (out_row_stride) ? out_row_stride : n_cols;
- out_channel_stride = (out_channel_stride) ? out_channel_stride
- : n_rows * out_row_stride;
- out_batch_stride = (out_batch_stride) ? out_batch_stride
- : n_channels * out_channel_stride;
-
- // Perform the re-ordering
- // For every batch
- for (int n = 0; n < n_batches; n++)
- {
- const T* const in_batch = in + n*in_batch_stride;
- T* const out_batch = out + n*out_batch_stride;
-
- // For every row
- for (int i = 0; i < n_rows; i++)
- {
- const T* const in_i = in_batch + i*in_row_stride;
- T* const out_i = out_batch + i*out_row_stride;
-
- // For every column
- for (int j = 0; j < n_cols; j++)
- {
- const T* const in_j = in_i + j*in_col_stride;
- T* const out_j = out_i + j;
-
- // For every channel
- for (int c = 0; c < n_channels; c++)
- {
- const T* const in_channel = in_j + c;
- T* const out_channel = out_j + c*out_channel_stride;
- *(out_channel) = *(in_channel);
- }
- }
- }
- }
-}
-
-/*****************************************************************************/
-/* Generic weight re-order implementation.
- */
-template <typename T>
-inline void ofm_ifm_h_w_to_h_w_ifm_ofm(
- const T* const in, // Input in [Output x Input x Height x Width] form
- T* const out, // Output in [Height x Width x Input x Output] form
- const int n_output_feature_maps,
- const int n_input_feature_maps,
- const int n_rows,
- const int n_cols,
- int in_output_feature_map_stride,
- int in_input_feature_map_stride,
- int in_row_stride,
- int out_row_stride,
- int out_col_stride,
- int out_input_feature_map_stride
-)
-{
- // Fill in stride values
- in_row_stride = (in_row_stride)
- ? in_row_stride
- : n_cols;
- in_input_feature_map_stride = (in_input_feature_map_stride)
- ? in_input_feature_map_stride
- : n_rows * in_row_stride;
- in_output_feature_map_stride = (in_output_feature_map_stride)
- ? in_output_feature_map_stride
- : n_input_feature_maps * in_input_feature_map_stride;
-
- out_input_feature_map_stride = (out_input_feature_map_stride)
- ? out_input_feature_map_stride
- : n_output_feature_maps;
- out_col_stride = (out_col_stride)
- ? out_col_stride
- : n_input_feature_maps * out_input_feature_map_stride;
- out_row_stride = (out_row_stride)
- ? out_row_stride
- : n_cols * out_col_stride;
-
- // Perform the re-ordering
- for (int i = 0; i < n_rows; i++)
- {
- const T* const in_row = in + i * in_row_stride;
- T* out_row = out + i * out_row_stride;
-
- for (int j = 0; j < n_cols; j++)
- {
- const T* const in_col = in_row + j;
- T* const out_col = out_row + j * out_col_stride;
-
- for (int ifm = 0; ifm < n_input_feature_maps; ifm++)
- {
- const T* const in_ifm = in_col + ifm * in_input_feature_map_stride;
- T* const out_ifm = out_col + ifm * out_input_feature_map_stride;
-
- for (int ofm = 0; ofm < n_output_feature_maps; ofm++)
- {
- const T* const in_ofm = in_ifm + ofm * in_output_feature_map_stride;
- T* const out_ofm = out_ifm + ofm;
- *(out_ofm) = *(in_ofm);
- }
- }
- }
- }
-}
-
-/*****************************************************************************/
-/* Generic weight re-order implementation.
- */
-template <typename T>
-inline void h_w_ifm_ofm_to_ofm_ifm_h_w(
- const T* const in, // Input in [Height x Width x Input x Output] form
- T* const out, // Output in [Output x Input x Height x Width] form
- const int n_rows,
- const int n_cols,
- const int n_input_feature_maps,
- const int n_output_feature_maps,
- int in_row_stride,
- int in_col_stride,
- int in_input_feature_map_stride,
- int out_output_feature_map_stride,
- int out_input_feature_map_stride,
- int out_row_stride
-)
-{
- // Fill in the stride values
- in_input_feature_map_stride = (in_input_feature_map_stride)
- ? in_input_feature_map_stride
- : n_output_feature_maps;
- in_col_stride = (in_col_stride)
- ? in_col_stride
- : n_input_feature_maps * in_input_feature_map_stride;
- in_row_stride = (in_row_stride)
- ? in_row_stride
- : n_cols * in_col_stride;
-
- out_row_stride = (out_row_stride)
- ? out_row_stride
- : n_cols;
- out_input_feature_map_stride = (out_input_feature_map_stride)
- ? out_input_feature_map_stride
- : n_rows * out_row_stride;
- out_output_feature_map_stride = (out_output_feature_map_stride)
- ? out_output_feature_map_stride
- : n_input_feature_maps * out_input_feature_map_stride;
-
- // Perform the re-ordering
- for (int i = 0; i < n_rows; i++)
- {
- const T* const in_row = in + i * in_row_stride;
- T* const out_row = out + i * out_row_stride;
-
- for (int j = 0; j < n_cols; j++)
- {
- const T* const in_col = in_row + j * in_col_stride;
- T* const out_col = out_row + j;
-
- for (int ifm = 0; ifm < n_input_feature_maps; ifm++)
- {
- const T* const in_ifm = in_col + ifm * in_input_feature_map_stride;
- T* const out_ifm = out_col + ifm * out_input_feature_map_stride;
-
- for (int ofm = 0; ofm < n_output_feature_maps; ofm++)
- {
- const T* const in_ofm = in_ifm + ofm;
- T* const out_ofm = out_ifm + ofm * out_output_feature_map_stride;
- *(out_ofm) = *(in_ofm);
- }
- }
- }
- }
-}
-
-} // namespace reorder
diff --git a/arm_compute/core/NEON/kernels/convolution/common/tensor.hpp b/arm_compute/core/NEON/kernels/convolution/common/tensor.hpp
deleted file mode 100644
index ad0a677a8f..0000000000
--- a/arm_compute/core/NEON/kernels/convolution/common/tensor.hpp
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#pragma once
-#include <cstdlib>
-#include <random>
-
-#include "alloc.hpp"
-
-enum TensorOrder
-{
- NHWC, ///< [Batch x Height x Width x Channels]
- NCHW, ///< [Batch x Channels x Height x Width]
-};
-
-struct Tensor4DShape
-{
- int n_batches, n_rows, n_cols, n_channels;
- TensorOrder ordering;
-
- // Create a new tensor with the default (NHWC) ordering
- inline Tensor4DShape(
- const int n_batches,
- const int n_rows,
- const int n_cols,
- const int n_channels,
- const TensorOrder ordering=NHWC
- ) : n_batches(n_batches),
- n_rows(n_rows),
- n_cols(n_cols),
- n_channels(n_channels),
- ordering(ordering)
- {
- }
-
- inline int index(const int n, const int i, const int j, const int c) const
- {
- if (this->ordering == NHWC)
- {
- return ((n*this->n_rows + i)*this->n_cols + j)*this->n_channels + c;
- }
- else // NCHW
- {
- return ((n*this->n_channels + c)*this->n_rows + i)*this->n_cols + j;
- }
- }
-
- inline int size() const
- {
- return n_batches * n_rows * n_cols * n_channels;
- }
-
- inline bool TestEq(const Tensor4DShape& other) const
- {
- return (n_batches == other.n_batches &&
- n_rows == other.n_rows &&
- n_cols == other.n_cols &&
- n_channels == other.n_channels);
- }
-};
-
-
-enum WeightOrder
-{
- HWIO, ///< [Height x Width x Input channels x Output channels]
- OIHW, ///< [Output channels x Input channels x Height x Width]
-};
-
-struct KernelShape
-{
- int n_output_channels, n_rows, n_cols, n_input_channels;
- WeightOrder ordering;
-
- inline KernelShape(
- const int n_output_channels,
- const int n_rows,
- const int n_cols,
- const int n_input_channels,
- const WeightOrder ordering=HWIO
- ) : n_output_channels(n_output_channels),
- n_rows(n_rows),
- n_cols(n_cols),
- n_input_channels(n_input_channels),
- ordering(ordering)
- {
- }
-
- inline int index(int oc, int i, int j, int ic) const
- {
- if (this->ordering == HWIO)
- {
- return ((i*this->n_cols + j)*this->n_input_channels + ic)*this->n_output_channels + oc;
- }
- else // OIHW
- {
- return ((oc*this->n_input_channels + ic)*this->n_rows + i)*this->n_cols + j;
- }
- }
-
- inline int size(void) const
- {
- return n_output_channels * n_rows * n_cols * n_input_channels;
- }
-};
-
-
-template <typename ShapeT, typename T>
-class Tensor4D final
-{
- public:
- Tensor4D(ShapeT shape) :
- shape(shape),
- _data(reinterpret_cast<T*>(ALLOCATE(size_bytes())))
- {
- Clear();
- }
-
- Tensor4D(const Tensor4D<ShapeT, T>&) = delete;
- Tensor4D operator=(const Tensor4D<ShapeT, T>&) = delete;
-
- ~Tensor4D() {
- free(_data);
- }
-
- inline T* ptr() const {
- return _data;
- }
-
- inline size_t size_bytes() const {
- return shape.size() * sizeof(T);
- }
-
- /* Extract an element of the tensor.
- *
- * If the shape is a Tensor4DShape then the index is given as batch, row,
- * column and channel. If the shape is a KernelShape then the index is
- * given as output channel, row, column and input channel.
- */
- inline T& element(const int a, const int b, const int c, const int d) const
- {
- return _data[shape.index(a, b, c, d)];
- }
-
- inline void Clear() {
- Fill(static_cast<T>(0));
- }
-
- inline void Fill(T val) {
- for (int i = 0; i < shape.size(); i++)
- _data[i] = val;
- }
-
- const ShapeT shape;
-
- private:
- T* const _data;
-};
diff --git a/arm_compute/core/NEON/kernels/convolution/common/tensor_utils.hpp b/arm_compute/core/NEON/kernels/convolution/common/tensor_utils.hpp
deleted file mode 100644
index 0c234431b1..0000000000
--- a/arm_compute/core/NEON/kernels/convolution/common/tensor_utils.hpp
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#pragma once
-#include "tensor.hpp"
-
-// Methods to print tensors and weights
-void PrintTensor(const Tensor4D<Tensor4DShape, float>& tensor);
-void PrintWeights(const Tensor4D<KernelShape, float>& weights);
-
-// Test the equivalence of two tensors
-// Counts the instances that |a - b|/|a| > max_err
-bool CmpTensors(
- const Tensor4D<Tensor4DShape, float>& a,
- const Tensor4D<Tensor4DShape, float>& b,
- const float max_err=0.0f
-);
-
-// Fill the tensor with a test pattern
-void TestPattern(Tensor4D<Tensor4DShape, float>& tensor);
-void TestPattern(Tensor4D<KernelShape, float>& weights);
-
-// Fill the tensor with random values
-void Randomise(Tensor4D<Tensor4DShape, float>& tensor, const int seed=0);
-void Randomise(Tensor4D<KernelShape, float>& weights, const int seed=0);
diff --git a/arm_compute/core/NEON/kernels/convolution/common/utils.hpp b/arm_compute/core/NEON/kernels/convolution/common/utils.hpp
deleted file mode 100644
index 99b2282f7e..0000000000
--- a/arm_compute/core/NEON/kernels/convolution/common/utils.hpp
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2017-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#pragma once
-
-#include <limits>
-
-void PrintMatrix(const float *const m, const int M, const int N, const int row_stride);
-
-constexpr inline int iceildiv(const int a, const int b)
-{
- return (a + b - 1) / b;
-}
-
-template <typename T>
-inline T roundup(const T a, const T b)
-{
- return b * iceildiv(a, b);
-}
-
-template<typename T>
-struct TypeBounds
-{
- static constexpr T lower() noexcept { return std::numeric_limits<T>::has_infinity
- ? -std::numeric_limits<T>::infinity()
- : std::numeric_limits<T>::lowest(); };
- static constexpr T upper() noexcept { return std::numeric_limits<T>::has_infinity
- ? std::numeric_limits<T>::infinity()
- : std::numeric_limits<T>::max(); };
-};
-
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-template<>
-struct TypeBounds<__fp16>
-{
- static constexpr __fp16 lower() noexcept { return -std::numeric_limits<float>::infinity(); };
- static constexpr __fp16 upper() noexcept { return std::numeric_limits<float>::infinity(); }
-};
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
diff --git a/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp b/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp
deleted file mode 100644
index a4a833d90a..0000000000
--- a/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp
+++ /dev/null
@@ -1,551 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#pragma once
-
-#include <arm_neon.h>
-#include "activation.hpp"
-#include "padding.hpp"
-
-namespace depthwise
-{
-
-namespace nck = neon_convolution_kernels;
-
-class IDepthwiseConvolution
-{
- public:
- virtual ~IDepthwiseConvolution() = default;
-
- virtual int output_size(
- int dim_size,
- unsigned int padding_before,
- unsigned int padding_after
- ) const = 0;
-
- /* Set input tensor and stride. */
- virtual void set_input(const void *inptr) = 0;
- virtual void set_input(const void *inptr, int column_stride) = 0;
- virtual void set_input(const void *inptr, int row_stride, int column_stride) = 0;
- virtual void set_input(const void *inptr, int batch_stride, int row_stride, int column_stride) = 0;
-
- /* Set output tensor and stride. */
- virtual void set_output(void *outptr) = 0;
- virtual void set_output(void *outptr, int column_stride) = 0;
- virtual void set_output(void *outptr, int row_stride, int column_stride) = 0;
- virtual void set_output(void *outptr, int batch_stride, int row_stride, int column_stride) = 0;
-
- /* Weights and biases are re-ordered to improve memory access patterns. Use
- * these methods to determine the size of the re-pack buffer and to set the
- * address (and implicitly reorder the weights and biases into) the buffer.
- */
- virtual size_t get_packed_params_size(void) const = 0;
- virtual void set_packed_params_buffer(void *) = 0;
-
- virtual void pack_params(const void *weights, const void *biases=nullptr) const = 0;
- virtual void pack_params(void *buffer, const void *weights, const void *biases=nullptr) const = 0;
- virtual void pack_params(
- void *buffer,
- const void* weights,
- unsigned int weight_row_stride,
- unsigned int weight_col_stride,
- const void *biases=nullptr
- ) const = 0;
-
- /* Working space is used to pad tensors on the fly. Before running any
- * inference check the amount of space required, allocate and provide a
- * pointer to the convolution engine.
- */
- virtual size_t get_working_space_size(unsigned int nthreads=1) const = 0;
- virtual void set_working_space(void *) = 0;
-
- virtual unsigned int get_window(void) const = 0;
- virtual void run(
- unsigned int start,
- unsigned int stop,
- unsigned int threadid=0
- ) = 0;
-};
-
-template <
- unsigned int OutputTileRows, unsigned int OutputTileCols,
- unsigned int KernelRows, unsigned int KernelCols,
- unsigned int StrideRows, unsigned int StrideCols,
- typename TIn, typename TBias, typename TOut,
- typename Derived
->
-class DepthwiseConvolutionBase : public IDepthwiseConvolution
-{
- public:
- // Information about the specific convolution instance
- using InputType = TIn;
- using BiasType = TBias;
- using OutputType = TOut;
- static constexpr int output_tile_rows = OutputTileRows;
- static constexpr int output_tile_cols = OutputTileCols;
- static constexpr int kernel_rows = KernelRows;
- static constexpr int kernel_cols = KernelCols;
- static constexpr int stride_rows = StrideRows;
- static constexpr int stride_cols = StrideCols;
- static constexpr int inner_tile_rows = stride_rows * (output_tile_rows - 1) + kernel_rows;
- static constexpr int inner_tile_cols = stride_cols * (output_tile_cols - 1) + kernel_cols;
-
- /** Create a new depthwise convolution engine.
- *
- * @param[in] n_batches Number of batches tensors.
- * @param[in] n_input_rows Number of rows in input tensor.
- * @param[in] n_input_cols Number of columns in input tensor.
- * @param[in] n_channels Number of channels in input and output tensors.
- */
- DepthwiseConvolutionBase(
- int n_batches, int n_input_rows, int n_input_cols, int n_channels,
- nck::ActivationFunction activation,
- unsigned int padding_top,
- unsigned int padding_left,
- unsigned int padding_bottom,
- unsigned int padding_right
- );
-
- /** Create a new depthwise convolution engine.
- *
- * @param[in] n_batches Number of batches tensors.
- * @param[in] n_input_rows Number of rows in input tensor.
- * @param[in] n_input_cols Number of columns in input tensor.
- * @param[in] n_channels Number of channels in input and output tensors.
- */
- DepthwiseConvolutionBase(
- int n_batches, int n_input_rows, int n_input_cols, int n_channels,
- int n_output_rows, int n_output_cols,
- nck::ActivationFunction activation,
- unsigned int padding_top,
- unsigned int padding_left,
- unsigned int padding_bottom,
- unsigned int padding_right
- );
-
- // Cannot copy or move a DepthwiseConvolution.
- DepthwiseConvolutionBase(DepthwiseConvolutionBase&) = delete;
- DepthwiseConvolutionBase operator=(DepthwiseConvolutionBase&) = delete;
-
- /* Set input tensor and stride. */
- void set_input(const void *inptr) override;
- void set_input(const void *inptr, int column_stride) override;
- void set_input(const void *inptr, int row_stride, int column_stride) override;
- void set_input(const void *inptr, int batch_stride, int row_stride, int column_stride) override;
-
- /* Set output tensor and stride. */
- void set_output(void *outptr) override;
- void set_output(void *outptr, int column_stride) override;
- void set_output(void *outptr, int row_stride, int column_stride) override;
- void set_output(void *outptr, int batch_stride, int row_stride, int column_stride) override;
-
- /** Get the number of output rows/columns.
- *
- * @param[in] dim_size Number of elements in the dimension (rows/columns)
- * @param[in] same_padding True if the padding is SAME, otherwise false.
- */
- static int get_output_size(
- int dim_size, unsigned int padding_before, unsigned int padding_after
- );
-
- int output_size(
- int dim_size, unsigned int padding_before, unsigned int padding_after
- ) const override;
-
- /* Determine how much memory is required to store the packed weights and
- * biases.
- */
- size_t get_packed_params_size(void) const override;
-
- /* Set the buffer for the packed weights and biases, and perform the
- * packing.
- */
- void set_packed_params_buffer(void *buffer) override;
-
- void pack_params(const void *weights, const void *biases=nullptr) const override;
-
- void pack_params(
- void *buffer,
- const void *weights,
- const void *biases=nullptr
- ) const override;
-
- void pack_params(
- void *buffer,
- const void *weights,
- unsigned int weight_row_stride,
- unsigned int weight_col_stride,
- const void *biases=nullptr
- ) const override;
-
- /** Query the amount of working space required.
- * @param[in] The largest number of threads which will be used to execute
- * the kernel.
- */
- size_t get_working_space_size(unsigned int n_threads=1) const override;
-
- /** Set the working space buffer.
- */
- void set_working_space(void *buffer) override;
-
- /** Get the window of work to be performed by an instance of the operator.
- */
- unsigned int get_window(void) const override;
-
- /** Perform a portion of the work associated with the operator.
- *
- * Will perform the window of work described by $[start, stop)$.
- *
- * @param[in] start Start of the window of work to perform.
- * @param[in] stop End of the work to perform.
- * @param[in] ID of the thread performing the work.
- */
- void run(
- unsigned int start,
- unsigned int stop,
- unsigned int threadid=0
- ) override;
-
- protected:
- /** Get the value to use to pad the tensor.
- */
- TIn _input_padding_value(void) const;
-
- /** Implementation of the parameter packing.
- */
- void _pack_params(
- void *buffer,
- const void *weights,
- unsigned int weight_row_stride,
- unsigned int weight_col_stride,
- const void *biases=nullptr
- ) const;
-
- /** Process a tile-row of the tensors.
- */
- void process_tile_row(
- unsigned int threadid,
- int n_channels,
- const void* packed_params,
- const InputType* inptr,
- OutputType* outptr,
- int row_pad_in_top,
- int row_pad_in_left,
- int row_pad_in_bottom,
- int row_pad_out_bottom,
- int n_tiles,
- int n_input_cols,
- int n_output_cols
- );
-
- /** Process a single tile of the tensor.
- *
- * This method will apply input/output padding (if required) and call the
- * depthwise tile implementation.
- */
- void process_tile(
- unsigned int threadid,
- int n_channels,
- const void* packed_params,
- const InputType* inptr,
- OutputType* outptr,
- int pad_in_top,
- int pad_in_left,
- int pad_in_bottom,
- int pad_in_right,
- int pad_out_bottom,
- int pad_out_right
- );
-
- /** Perform depthwise convolution on a single tile.
- */
- template <nck::ActivationFunction Activation>
- void execute_tile(
- int n_channels,
- const void* packed_params,
- const InputType* inptr,
- unsigned int in_row_stride,
- unsigned int in_col_stride,
- OutputType* outptr,
- unsigned int out_row_stride,
- unsigned int out_col_stride
- );
-
- template <nck::ActivationFunction Activation>
- void execute_tile(
- int n_channels,
- const void* packed_params,
- const InputType* inptrs[inner_tile_rows][inner_tile_cols],
- OutputType* outptrs[output_tile_rows][output_tile_cols]
- );
-
- int n_channels(void) const;
-
- private:
- // Member variables of instances of a convolution engine.
- const InputType* _input;
- OutputType* _output;
- void* _packed_parameters;
- void* _working_space; // Per-thread working space
- const int _n_batches, _n_input_rows, _n_input_cols, _n_channels,
- _n_output_rows, _n_output_cols, _n_tile_rows, _n_tile_cols;
- const unsigned int _padding_top, _padding_left, _padding_bottom, _padding_right;
- const nck::ActivationFunction _activation;
-
- // Stride information for a convolution instance
- int _input_col_stride, _input_row_stride, _input_batch_stride;
- int _output_col_stride, _output_row_stride, _output_batch_stride;
-
- // Methods for getting access to working space
- size_t _get_input_working_space_size(void) const;
- size_t _get_output_working_space_size(void) const;
-
- void *_get_input_working_space(unsigned int threadid) const;
- void *_get_output_working_space(unsigned int threadid) const;
-};
-
-
-template <
- unsigned int OutputTileRows, unsigned int OutputTileCols,
- unsigned int KernelRows, unsigned int KernelCols,
- unsigned int StrideRows, unsigned int StrideCols,
- typename TIn, typename TBias, typename TOut
->
-class DepthwiseConvolution : public DepthwiseConvolutionBase<
- OutputTileRows, OutputTileCols,
- KernelRows, KernelCols,
- StrideRows, StrideCols,
- TIn, TBias, TOut,
- DepthwiseConvolution<
- OutputTileRows, OutputTileCols,
- KernelRows, KernelCols,
- StrideRows, StrideCols,
- TIn, TBias, TOut
- >
->
-{
- using Base = DepthwiseConvolutionBase<
- OutputTileRows, OutputTileCols,
- KernelRows, KernelCols,
- StrideRows, StrideCols,
- TIn, TBias, TOut,
- DepthwiseConvolution<
- OutputTileRows, OutputTileCols,
- KernelRows, KernelCols,
- StrideRows, StrideCols,
- TIn, TBias, TOut
- > >;
- friend Base;
- using InputType = typename Base::InputType;
- using OutputType = typename Base::OutputType;
-
- public:
- using Base::DepthwiseConvolutionBase;
-
- protected:
- template <nck::ActivationFunction Activation>
- void execute_tile(
- int n_channels,
- const void* packed_params,
- const TIn* inptr,
- unsigned int in_row_stride,
- unsigned int in_col_stride,
- TOut* outptr,
- unsigned int out_row_stride,
- unsigned int out_col_stride
- );
-
- template <nck::ActivationFunction Activation>
- void execute_tile(
- int n_channels,
- const void* packed_params,
- const InputType* inptrs[Base::inner_tile_rows][Base::inner_tile_cols],
- OutputType* outptrs[Base::output_tile_rows][Base::output_tile_cols]
- );
-};
-
-
-template <
- unsigned int OutputTileRows, unsigned int OutputTileCols,
- unsigned int KernelRows, unsigned int KernelCols,
- unsigned int StrideRows, unsigned int StrideCols
->
-class DepthwiseConvolution<
- OutputTileRows, OutputTileCols,
- KernelRows, KernelCols,
- StrideRows, StrideCols,
- float, float, float
-> : public DepthwiseConvolutionBase<
- OutputTileRows, OutputTileCols,
- KernelRows, KernelCols,
- StrideRows, StrideCols,
- float, float, float,
- DepthwiseConvolution<
- OutputTileRows, OutputTileCols,
- KernelRows, KernelCols,
- StrideRows, StrideCols,
- float, float, float
- >
->
-{
- using Base = DepthwiseConvolutionBase<
- OutputTileRows, OutputTileCols,
- KernelRows, KernelCols,
- StrideRows, StrideCols,
- float, float, float,
- DepthwiseConvolution<
- OutputTileRows, OutputTileCols,
- KernelRows, KernelCols,
- StrideRows, StrideCols,
- float, float, float
- > >;
- friend Base;
- using InputType = typename Base::InputType;
- using OutputType = typename Base::OutputType;
-
- public:
- DepthwiseConvolution(
- int n_batches, int n_input_rows, int n_input_cols, int n_channels,
- nck::ActivationFunction activation,
- unsigned int padding_top,
- unsigned int padding_left,
- unsigned int padding_bottom,
- unsigned int padding_right
- );
-
- DepthwiseConvolution(
- int n_batches, int n_input_rows, int n_input_cols, int n_channels,
- int n_output_rows, int n_output_cols,
- nck::ActivationFunction activation,
- unsigned int padding_top,
- unsigned int padding_left,
- unsigned int padding_bottom,
- unsigned int padding_right
- );
-
- protected:
- template <nck::ActivationFunction Activation>
- void execute_tile(
- int n_channels,
- const void* packed_params,
- const float* inptr,
- unsigned int in_row_stride,
- unsigned int in_col_stride,
- float* outptr,
- unsigned int out_row_stride,
- unsigned int out_col_stride
- );
-
- template <nck::ActivationFunction Activation>
- void execute_tile(
- int n_channels,
- const void* packed_params,
- const float* inptrs[Base::inner_tile_rows][Base::inner_tile_cols],
- float* outptrs[Base::output_tile_rows][Base::output_tile_cols]
- );
-};
-
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-template <
- unsigned int OutputTileRows, unsigned int OutputTileCols,
- unsigned int KernelRows, unsigned int KernelCols,
- unsigned int StrideRows, unsigned int StrideCols
->
-class DepthwiseConvolution<
- OutputTileRows, OutputTileCols,
- KernelRows, KernelCols,
- StrideRows, StrideCols,
- float16_t, float16_t, float16_t
-> : public DepthwiseConvolutionBase<
- OutputTileRows, OutputTileCols,
- KernelRows, KernelCols,
- StrideRows, StrideCols,
- float16_t, float16_t, float16_t,
- DepthwiseConvolution<
- OutputTileRows, OutputTileCols,
- KernelRows, KernelCols,
- StrideRows, StrideCols,
- float16_t, float16_t, float16_t
- >
->
-{
- using Base = DepthwiseConvolutionBase<
- OutputTileRows, OutputTileCols,
- KernelRows, KernelCols,
- StrideRows, StrideCols,
- float16_t, float16_t, float16_t,
- DepthwiseConvolution<
- OutputTileRows, OutputTileCols,
- KernelRows, KernelCols,
- StrideRows, StrideCols,
- float16_t, float16_t, float16_t
- > >;
- friend Base;
- using InputType = typename Base::InputType;
- using OutputType = typename Base::OutputType;
-
- public:
- DepthwiseConvolution(
- int n_batches, int n_input_rows, int n_input_cols, int n_channels,
- nck::ActivationFunction activation,
- unsigned int padding_top,
- unsigned int padding_left,
- unsigned int padding_bottom,
- unsigned int padding_right
- );
-
- DepthwiseConvolution(
- int n_batches, int n_input_rows, int n_input_cols, int n_channels,
- int n_output_rows, int n_output_cols,
- nck::ActivationFunction activation,
- unsigned int padding_top,
- unsigned int padding_left,
- unsigned int padding_bottom,
- unsigned int padding_right
- );
-
- protected:
- template <nck::ActivationFunction Activation>
- void execute_tile(
- int n_channels,
- const void* packed_params,
- const float16_t* inptr,
- unsigned int in_row_stride,
- unsigned int in_col_stride,
- float16_t* outptr,
- unsigned int out_row_stride,
- unsigned int out_col_stride
- );
-
- template <nck::ActivationFunction Activation>
- void execute_tile(
- int n_channels,
- const void* packed_params,
- const float16_t* inptrs[Base::inner_tile_rows][Base::inner_tile_cols],
- float16_t* outptrs[Base::output_tile_rows][Base::output_tile_cols]
- );
-};
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-} // namespace depthwise
diff --git a/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_dilated.hpp b/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_dilated.hpp
deleted file mode 100644
index e0d7f0c7f1..0000000000
--- a/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_dilated.hpp
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#pragma once
-
-#include <deque>
-#include <functional>
-#include <memory>
-
-#include "depthwise.hpp"
-
-namespace depthwise
-{
-
-template <
- unsigned int OutputTileRows, unsigned int OutputTileCols,
- unsigned int KernelRows, unsigned int KernelCols,
- unsigned int StrideRows, unsigned int StrideCols,
- typename TIn, typename TBias, typename TOut
->
-class DilatedDepthwiseConvolution : public IDepthwiseConvolution
-{
- public:
- /** Create a new dilated depthwise convolution engine.
- */
- DilatedDepthwiseConvolution(
- int n_batches, int n_input_rows, int n_input_cols, int n_channels,
- int dilation_factor,
- nck::ActivationFunction activation,
- unsigned int padding_top,
- unsigned int padding_left,
- unsigned int padding_bottom,
- unsigned int padding_right
- );
-
- /** Create a new dilated depthwise convolution engine.
- */
- DilatedDepthwiseConvolution(
- int n_batches, int n_input_rows, int n_input_cols, int n_channels,
- int dilation_factor, int n_output_rows, int n_output_cols,
- nck::ActivationFunction activation,
- unsigned int padding_top,
- unsigned int padding_left,
- unsigned int padding_bottom,
- unsigned int padding_right
- );
-
- // Cannot copy or move a DilatedDepthwiseConvolution.
- DilatedDepthwiseConvolution(DilatedDepthwiseConvolution&) = delete;
- DilatedDepthwiseConvolution operator=(DilatedDepthwiseConvolution&) = delete;
-
- /* Set input tensor and stride. */
- void set_input(const void *inptr) override;
- void set_input(const void *inptr, int column_stride) override;
- void set_input(const void *inptr, int row_stride, int column_stride) override;
- void set_input(const void *inptr, int batch_stride, int row_stride, int column_stride) override;
-
- /* Set output tensor and stride. */
- void set_output(void *outptr) override;
- void set_output(void *outptr, int column_stride) override;
- void set_output(void *outptr, int row_stride, int column_stride) override;
- void set_output(void *outptr, int batch_stride, int row_stride, int column_stride) override;
-
- static int get_output_size(
- int dim_size,
- unsigned int padding_before,
- unsigned int padding_after,
- int dilation_factor
- );
-
- int output_size(
- int dim_size, unsigned int padding_before, unsigned int padding_after
- ) const override;
-
- /* Weights and biases are re-ordered to improve memory access patterns. Use
- * these methods to determine the size of the re-pack buffer and to set the
- * address (and implicitly reorder the weights and biases into) the buffer.
- */
- size_t get_packed_params_size(void) const override;
- void set_packed_params_buffer(void *) override;
-
- void pack_params(const void *weights, const void *biases=nullptr) const override;
- void pack_params(void *buffer, const void *weights, const void *biases=nullptr) const override;
- void pack_params(
- void *buffer,
- const void* weights,
- unsigned int weight_row_stride,
- unsigned int weight_col_stride,
- const void *biases=nullptr
- ) const override;
-
- /* Working space is used to pad tensors on the fly. Before running any
- * inference check the amount of space required, allocate and provide a
- * pointer to the convolution engine.
- */
- size_t get_working_space_size(unsigned int nthreads=1) const override;
- void set_working_space(void *) override;
-
- unsigned int get_window(void) const override;
- void run(unsigned int start, unsigned int stop, unsigned int threadid=0) override;
-
- protected:
- /** Protected constructor which also accepts a function to construct a new
- * subconvolution
- */
- DilatedDepthwiseConvolution(
- int n_batches, int n_input_rows, int n_input_cols, int n_channels,
- int dilation_factor, int n_output_rows, int n_output_cols,
- nck::ActivationFunction activation,
- unsigned int padding_top,
- unsigned int padding_left,
- unsigned int padding_bottom,
- unsigned int padding_right,
- std::function<IDepthwiseConvolution *(int, int, int, int, int, int, nck::ActivationFunction, unsigned int, unsigned int, unsigned int, unsigned int)> subconvfn
- );
-
- const int _dilation_factor;
- const int _n_input_rows, _n_input_cols, _n_channels;
- const int _padding_top, _padding_left;
- const int _n_output_rows, _n_output_cols;
-
- /* Dilated depthwise convolution is performed through repeated calls to
- * non-dilated convolutions. If the dilation factor is $n$, then we perform
- * $(n + 1)^2$ depthwise convolutions.
- */
- using BaseDepthwise = DepthwiseConvolution<
- OutputTileRows, OutputTileCols,
- KernelRows, KernelCols,
- StrideRows, StrideCols,
- TIn, TBias, TOut
- >;
- std::deque<std::deque<std::unique_ptr<IDepthwiseConvolution>>> _convs;
-};
-
-} // namespace depthwise
diff --git a/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_quantized.hpp b/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_quantized.hpp
deleted file mode 100644
index 37c1f1bc84..0000000000
--- a/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_quantized.hpp
+++ /dev/null
@@ -1,291 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#pragma once
-#include "depthwise.hpp"
-#include "qasymm8.hpp"
-#include "qsymm8.hpp"
-#pragma once
-
-using namespace neon_convolution_kernels;
-using namespace qasymm8;
-
-inline int32x4_t saturating_doubling_high_mul(const int32x4_t& a, const int32x4_t& b)
-{
- return vqrdmulhq_s32(a, b);
-}
-
-inline int32x4_t saturating_doubling_high_mul(const int32x4_t& a, const int32_t& b)
-{
- return vqrdmulhq_n_s32(a, b);
-}
-
-inline int32_t saturating_doubling_high_mul(const int32_t& a, const int32_t& b)
-{
- return vget_lane_s32(vqrdmulh_n_s32(vdup_n_s32(a), b), 0);
-}
-
-inline int32x4_t rounding_divide_by_exp2(const int32x4_t& x, const int32x4_t shift)
-{
- const int32x4_t fixup = vshrq_n_s32(vandq_s32(x, shift), 31);
- const int32x4_t fixed = vqaddq_s32(x, fixup);
- return vrshlq_s32(fixed, shift);
-}
-
-inline int32x4_t rounding_divide_by_exp2(const int32x4_t& x, const int exponent)
-{
- const int32x4_t shift = vdupq_n_s32(-exponent);
- const int32x4_t fixup = vshrq_n_s32(vandq_s32(x, shift), 31);
- const int32x4_t fixed = vqaddq_s32(x, fixup);
- return vrshlq_s32(fixed, shift);
-}
-
-inline int32x2_t rounding_divide_by_exp2(const int32x2_t& x, const int exponent)
-{
- const int32x2_t shift = vdup_n_s32(-exponent);
- const int32x2_t fixup = vshr_n_s32(vand_s32(x, shift), 31);
- const int32x2_t fixed = vqadd_s32(x, fixup);
- return vrshl_s32(fixed, shift);
-}
-
-inline int32_t rounding_divide_by_exp2(const int32_t& x, const int exponent)
-{
- const int32x2_t xs = vdup_n_s32(x);
- return vget_lane_s32(rounding_divide_by_exp2(xs, exponent), 0);
-}
-
-namespace depthwise
-{
-
-namespace nck = neon_convolution_kernels;
-
-template <
- unsigned int OutputTileRows, unsigned int OutputTileCols,
- unsigned int KernelRows, unsigned int KernelCols,
- unsigned int StrideRows, unsigned int StrideCols
->
-class QAsymm8DepthwiseConvolution : public DepthwiseConvolutionBase<
- OutputTileRows, OutputTileCols,
- KernelRows, KernelCols,
- StrideRows, StrideCols,
- uint8_t, int32_t, uint8_t,
- QAsymm8DepthwiseConvolution<OutputTileRows, OutputTileCols, KernelRows, KernelCols, StrideRows, StrideCols>
->
-{
- using Base = DepthwiseConvolutionBase<
- OutputTileRows, OutputTileCols,
- KernelRows, KernelCols,
- StrideRows, StrideCols,
- uint8_t, int32_t, uint8_t,
- QAsymm8DepthwiseConvolution<OutputTileRows, OutputTileCols, KernelRows, KernelCols, StrideRows, StrideCols>
- >;
- friend Base;
- using InputType = typename Base::InputType;
- using OutputType = typename Base::OutputType;
-
- public:
- QAsymm8DepthwiseConvolution(
- int n_batches, int n_input_rows, int n_input_cols, int n_channels,
- nck::ActivationFunction activation,
- const qasymm8::QAsymm8Params& weight_quantisation,
- const qasymm8::QAsymm8Params& input_quantisation,
- const qasymm8::QAsymm8Params& output_quantisation,
- unsigned int padding_top,
- unsigned int padding_left,
- unsigned int padding_bottom,
- unsigned int padding_right
- );
-
- QAsymm8DepthwiseConvolution(
- int n_batches, int n_input_rows, int n_input_cols, int n_channels,
- int n_output_rows, int n_output_cols,
- nck::ActivationFunction activation,
- const qasymm8::QAsymm8Params& weight_quantisation,
- const qasymm8::QAsymm8Params& input_quantisation,
- const qasymm8::QAsymm8Params& output_quantisation,
- unsigned int padding_top,
- unsigned int padding_left,
- unsigned int padding_bottom,
- unsigned int padding_right
- );
-
- QAsymm8DepthwiseConvolution(
- int n_batches, int n_input_rows, int n_input_cols, int n_channels,
- nck::ActivationFunction activation,
- const qasymm8::QAsymm8Params& weight_quantisation,
- const qasymm8::QAsymm8Params& input_quantisation,
- const qasymm8::QAsymm8Params& output_quantisation,
- const qasymm8::QAsymm8RescaleParams& rescale_parameters,
- unsigned int padding_top,
- unsigned int padding_left,
- unsigned int padding_bottom,
- unsigned int padding_right
- );
-
- QAsymm8DepthwiseConvolution(
- int n_batches, int n_input_rows, int n_input_cols, int n_channels,
- int n_output_rows, int n_output_cols,
- nck::ActivationFunction activation,
- const qasymm8::QAsymm8Params& weight_quantisation,
- const qasymm8::QAsymm8Params& input_quantisation,
- const qasymm8::QAsymm8Params& output_quantisation,
- const qasymm8::QAsymm8RescaleParams& rescale_parameters,
- unsigned int padding_top,
- unsigned int padding_left,
- unsigned int padding_bottom,
- unsigned int padding_right
- );
-
- protected:
- uint8_t _input_padding_value(void) const;
-
- void _pack_params(
- void *buffer,
- const void *weights,
- unsigned int weight_row_stride,
- unsigned int weight_col_stride,
- const void *biases=nullptr
- ) const;
-
- template <nck::ActivationFunction Activation>
- void execute_tile(
- int n_channels,
- const void* packed_params,
- const uint8_t* inptr,
- unsigned int in_row_stride,
- unsigned int in_col_stride,
- uint8_t* outptr,
- unsigned int out_row_stride,
- unsigned int out_col_stride
- );
-
- template <nck::ActivationFunction Activation>
- void execute_tile(
- int n_channels,
- const void* packed_params,
- const uint8_t* inptrs[Base::inner_tile_rows][Base::inner_tile_cols],
- uint8_t* outptrs[Base::output_tile_rows][Base::output_tile_cols]
- );
-
- private:
- // Quantization parameters
- const qasymm8::QAsymm8Params _weights_quant, _inputs_quant, _output_quant;
- const qasymm8::QAsymm8RescaleParams rescale_parameters;
-};
-
-template <
- unsigned int OutputTileRows, unsigned int OutputTileCols,
- unsigned int KernelRows, unsigned int KernelCols,
- unsigned int StrideRows, unsigned int StrideCols
->
-class QSymm8HybridPerChannelDepthwiseConvolution : public DepthwiseConvolutionBase<
- OutputTileRows, OutputTileCols,
- KernelRows, KernelCols,
- StrideRows, StrideCols,
- uint8_t, int32_t, uint8_t,
- QSymm8HybridPerChannelDepthwiseConvolution<OutputTileRows, OutputTileCols, KernelRows, KernelCols, StrideRows, StrideCols>
->
-{
- using Base = DepthwiseConvolutionBase<
- OutputTileRows, OutputTileCols,
- KernelRows, KernelCols,
- StrideRows, StrideCols,
- uint8_t, int32_t, uint8_t,
- QSymm8HybridPerChannelDepthwiseConvolution<OutputTileRows, OutputTileCols, KernelRows, KernelCols, StrideRows, StrideCols>
- >;
- friend Base;
- using InputType = typename Base::InputType;
- using OutputType = typename Base::OutputType;
-
- public:
- QSymm8HybridPerChannelDepthwiseConvolution(
- int n_batches, int n_input_rows, int n_input_cols, int n_channels,
- nck::ActivationFunction activation,
- const qsymm8::QSymm8PerChannelParams& weight_quantisation,
- const qasymm8::QAsymm8Params& input_quantisation,
- const qasymm8::QAsymm8Params& output_quantisation,
- unsigned int padding_top,
- unsigned int padding_left,
- unsigned int padding_bottom,
- unsigned int padding_right
- );
-
- QSymm8HybridPerChannelDepthwiseConvolution(
- int n_batches, int n_input_rows, int n_input_cols, int n_channels,
- nck::ActivationFunction activation,
- const qsymm8::QSymm8PerChannelParams& weight_quantisation,
- const qasymm8::QAsymm8Params& input_quantisation,
- const qasymm8::QAsymm8Params& output_quantisation,
- const qsymm8::QSymm8PerChannelRescaleParams& rescale_parameters,
- unsigned int padding_top,
- unsigned int padding_left,
- unsigned int padding_bottom,
- unsigned int padding_right
- );
-
- size_t get_packed_params_size(void) const override
- {
- return this->n_channels() * (sizeof(int8_t)*KernelRows*KernelCols + 3*sizeof(int32_t));
-
- }
-
- protected:
- uint8_t _input_padding_value(void) const;
-
- void _pack_params(
- void *buffer,
- const void *weights,
- unsigned int weight_row_stride,
- unsigned int weight_col_stride,
- const void *biases=nullptr
- ) const;
-
- template <nck::ActivationFunction Activation>
- void execute_tile(
- int n_channels,
- const void* packed_params,
- const uint8_t* inptr,
- unsigned int in_row_stride,
- unsigned int in_col_stride,
- uint8_t* outptr,
- unsigned int out_row_stride,
- unsigned int out_col_stride
- );
-
- template <nck::ActivationFunction Activation>
- void execute_tile(
- int n_channels,
- const void* packed_params,
- const uint8_t* inptrs[Base::inner_tile_rows][Base::inner_tile_cols],
- uint8_t* outptrs[Base::output_tile_rows][Base::output_tile_cols]
- );
-
- private:
- // Quantization parameters
- const qsymm8::QSymm8PerChannelParams _weights_quant;
- const qasymm8::QAsymm8Params _input_quant, _output_quant;
- const qsymm8::QSymm8PerChannelRescaleParams _rescale_parameters;
-};
-
-} // namespace depthwise
diff --git a/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_quantized_dilated.hpp b/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_quantized_dilated.hpp
deleted file mode 100644
index cf1c6f581f..0000000000
--- a/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_quantized_dilated.hpp
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#pragma once
-#include "depthwise_dilated.hpp"
-#include "depthwise_quantized.hpp"
-
-namespace depthwise {
-
-template <unsigned int OutputTileRows, unsigned int OutputTileCols,
- unsigned int KernelRows, unsigned int KernelCols,
- unsigned int StrideRows, unsigned int StrideCols>
-class QAsymm8DilatedDepthwiseConvolution
- : public DilatedDepthwiseConvolution<
- OutputTileRows, OutputTileCols, KernelRows, KernelCols, StrideRows,
- StrideCols, uint8_t, int32_t, uint8_t> {
-public:
- /** Create a new dilated depthwise convolution engine.
- */
- QAsymm8DilatedDepthwiseConvolution(
- int n_batches, int n_input_rows, int n_input_cols, int n_channels,
- int dilation_factor, nck::ActivationFunction activation,
- const qasymm8::QAsymm8Params &weight_quantisation,
- const qasymm8::QAsymm8Params &input_quantisation,
- const qasymm8::QAsymm8Params &output_quantisation,
- unsigned int padding_top, unsigned int padding_left,
- unsigned int padding_bottom, unsigned int padding_right);
-
- /** Create a new dilated depthwise convolution engine.
- */
- QAsymm8DilatedDepthwiseConvolution(
- int n_batches, int n_input_rows, int n_input_cols, int n_channels,
- int dilation_factor, int n_output_rows, int n_output_cols,
- nck::ActivationFunction activation,
- const qasymm8::QAsymm8Params &weight_quantisation,
- const qasymm8::QAsymm8Params &input_quantisation,
- const qasymm8::QAsymm8Params &output_quantisation,
- unsigned int padding_top, unsigned int padding_left,
- unsigned int padding_bottom, unsigned int padding_right);
-
- /** Create a new dilated depthwise convolution engine.
- */
- QAsymm8DilatedDepthwiseConvolution(
- int n_batches, int n_input_rows, int n_input_cols, int n_channels,
- int dilation_factor, nck::ActivationFunction activation,
- const qasymm8::QAsymm8Params &weight_quantisation,
- const qasymm8::QAsymm8Params &input_quantisation,
- const qasymm8::QAsymm8Params &output_quantisation,
- const qasymm8::QAsymm8RescaleParams &rescale_parameters,
- unsigned int padding_top, unsigned int padding_left,
- unsigned int padding_bottom, unsigned int padding_right);
-
- /** Create a new dilated depthwise convolution engine.
- */
- QAsymm8DilatedDepthwiseConvolution(
- int n_batches, int n_input_rows, int n_input_cols, int n_channels,
- int dilation_factor, int n_output_rows, int n_output_cols,
- nck::ActivationFunction activation,
- const qasymm8::QAsymm8Params &weight_quantisation,
- const qasymm8::QAsymm8Params &input_quantisation,
- const qasymm8::QAsymm8Params &output_quantisation,
- const qasymm8::QAsymm8RescaleParams& rescale_parameters,
- unsigned int padding_top, unsigned int padding_left,
- unsigned int padding_bottom, unsigned int padding_right);
-};
-
-} // namespace depthwise
diff --git a/arm_compute/core/NEON/kernels/convolution/winograd/winograd.hpp b/arm_compute/core/NEON/kernels/convolution/winograd/winograd.hpp
deleted file mode 100644
index bc0d9d4296..0000000000
--- a/arm_compute/core/NEON/kernels/convolution/winograd/winograd.hpp
+++ /dev/null
@@ -1,621 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#pragma once
-
-#include "arm_compute/core/NEON/kernels/assembly/arm_gemm.hpp"
-
-#include <cstddef>
-#include <utility>
-
-namespace winograd
-{
-
-class ITransform
-{
- public:
- virtual ~ITransform() = default;
-
- /**
- * Get the working space required to perform the transformation.
- *
- * Note, the working space is only required when performing the
- * transformation - hence it can be reused whenever the transformation is
- * not running.
- *
- * @param nthreads The greatest number of threads that will be used to execute the transform.
- * @return Size of working space required in bytes.
- */
- virtual size_t get_working_space_size(unsigned int nthreads=1) const = 0;
-
- /**
- * Set the working space to be used by the transformation.
- *
- * Note, the working space is only required when performing the
- * transformation - hence it can be reused whenever the transformation is
- * not running.
- *
- * @param Pointer to the working space.
- */
- virtual void set_working_space(void *buffer) = 0;
-
- /**
- * Get the window of work a given operator can perform.
- */
- virtual unsigned int get_window() const = 0;
-
- /**
- * Perform work upon a window of the transform.
- */
- virtual void run(unsigned int start, unsigned int stop, unsigned int threadid=0) = 0;
-};
-
-class IInputTransform : public ITransform
-{
- public:
- virtual ~IInputTransform() = default;
-
- /**
- * Set the pointer to the (NHWC-ordered) tensor to be transformed.
- */
- virtual void set_input_tensor(const void *input) = 0;
-
- /**
- * Set the pointer to the (NHWC-ordered) tensor to be transformed.
- * @param col_stride Stride between columns of the tensor, measured in elements (not bytes).
- */
- virtual void set_input_tensor(const void *input, int col_stride) = 0;
-
- /**
- * Set the pointer to the (NHWC-ordered) tensor to be transformed.
- * @param row_stride Stride between rows of the tensor, measured in elements (not bytes).
- * @param col_stride Stride between columns of the tensor, measured in elements (not bytes).
- */
- virtual void set_input_tensor(const void *input, int row_stride, int col_stride) = 0;
-
- /**
- * Set the pointer to the (NHWC-ordered) tensor to be transformed.
- * @param batch_stride Stride between batches of the tensor, measured in elements (not bytes).
- * @param row_stride Stride between rows of the tensor, measured in elements (not bytes).
- * @param col_stride Stride between columns of the tensor, measured in elements (not bytes).
- */
- virtual void set_input_tensor(const void *input, int batch_stride, int row_stride, int col_stride) = 0;
-
- /**
- * Set pointers to the matrices written by the transform.
- * @param matrices Pointer to the start of the first matrix representing the transformed input.
- * @param inter_matrix_stride Stride (in elements) between matrices.
- * @param matrix_row_stride Stride (in elements) between the rows within a single matrix.
- */
- virtual void set_output_matrices(void *matrices, int inter_matrix_stride, int matrix_row_stride) = 0;
-};
-
-class IOutputTransform : public ITransform
-{
- public:
- virtual ~IOutputTransform() = default;
-
- /**
- * Set pointers to the matrices written by the transform.
- * @param matrices Pointer to the start of the first matrix representing the input to the transform.
- * @param inter_matrix_stride Stride (in elements) between matrices.
- * @param matrix_row_stride Stride (in elements) between the rows within a single matrix.
- */
- virtual void set_input_matrices(const void *matrices, int inter_matrix_stride, int matrix_row_stride) = 0;
-
- /**
- * Set pointer to the bias tensor (can be ignored or called with nullptr for no bias.
- */
- virtual void set_bias(const void *bias=nullptr) = 0;
-
- /**
- * Set pointer to the output tensor produced by the transform.
- */
- virtual void set_output_tensor(void *output) = 0;
-
- /**
- * Set pointer to the output tensor produced by the transform.
- * @param col_stride Stride between columns of the tensor, measured in elements (not bytes).
- */
- virtual void set_output_tensor(void *output, int col_stride) = 0;
-
- /**
- * Set pointer to the output tensor produced by the transform.
- * @param row_stride Stride between rows of the tensor, measured in elements (not bytes).
- * @param col_stride Stride between columns of the tensor, measured in elements (not bytes).
- */
- virtual void set_output_tensor(void *output, int row_stride, int col_stride) = 0;
-
- /**
- * Set pointer to the output tensor produced by the transform.
- * @param batch_stride Stride between batches of the tensor, measured in elements (not bytes).
- * @param row_stride Stride between rows of the tensor, measured in elements (not bytes).
- * @param col_stride Stride between columns of the tensor, measured in elements (not bytes).
- */
- virtual void set_output_tensor(void *output, int batch_stride, int row_stride, int col_stride) = 0;
-};
-
-class IWeightTransform : public ITransform
-{
- public:
- virtual ~IWeightTransform() = default;
-
- /** Set pointer to the weight tensor read by the transform. */
- virtual void set_weight_tensor(const void *weights) = 0;
-
- /**
- * Set pointers to the matrices written by the transform.
- * @param matrices Pointer to the start of the first matrix representing the transformed input.
- * @param inter_matrix_stride Stride (in elements) between matrices.
- * @param matrix_row_stride Stride (in elements) between the rows within a single matrix.
- */
- virtual void set_output_matrices(void *matrices, int inter_matrix_stride, int matrix_row_stride) = 0;
-};
-
-enum class WinogradRoots
-{
- Integers,
-};
-
-template <int InnerTileRows, int InnerTileCols, typename TIn, typename TOut, WinogradRoots Roots>
-class InputTransform : public IInputTransform
-{
- public:
- /** Create an InputTransform operator fixed on a given problem and set of
- * pointers.
- */
- InputTransform(
- int kernel_rows, /**< Number of rows in the kernel */
- int kernel_cols, /**< Number of columns in the kernel */
- int n_batches, /**< Number of batches in input tensor. */
- int n_rows, /**< Number of rows in input tensor. */
- int n_cols, /**< Number of columns in input tensor. */
- int n_channels, /**< Number of channels in input tensor. */
- int padding_top, /**< Padding to apply to the top of the image. */
- int padding_left, /**< Padding to apply to the left of the image. */
- int padding_bottom, /**< Padding to apply to the bottom of the image. */
- int padding_right /**< Padding to apply to the right of the image. */
- );
-
- InputTransform(InputTransform&) = delete;
- InputTransform operator=(InputTransform&) = delete;
-
- /** Set pointers to the input tensor read by the transform. */
- void set_input_tensor(const void *input) override;
- void set_input_tensor(const void *input, int col_stride) override;
- void set_input_tensor(const void *input, int row_stride, int col_stride) override;
- void set_input_tensor(const void *input, int batch_stride, int row_stride, int col_stride) override;
-
- /** Set pointers to the matrices written by the transform. */
- void set_output_matrices(void *matrices, int iter_matrix_stride, int matrix_row_stride) override;
-
- /** Get the working space required to perform the transformation. */
- size_t get_working_space_size(unsigned int nthreads=1) const override;
- void set_working_space(void *buffer) override;
-
- /** Get the window of work a given operator can perform. */
- unsigned int get_window() const override;
- static constexpr unsigned int WINDOW_BLOCK = 16; // Base size of window
-
- /** Perform work upon a window of the input. */
- void run(unsigned int start, unsigned int stop, unsigned int threadid=0) override;
-
- protected:
- const int _n_batches, _n_rows, _n_cols, _n_channels;
-
- private:
- void transform_unpadded_tile(
- unsigned int threadid,
- int n_channels,
- TOut *outptr,
- const TIn *inptr
- );
-
- void transform_padded_tile(
- unsigned int threadid,
- int n_channels,
- TOut *outptr,
- const TIn *inptr,
- int padding_top,
- int padding_left,
- int padding_bottom,
- int padding_right
- );
-
- /* Tile implementation */
- static void transform_tile(
- int n_channels, /** @param[in] Number of channels in the tensor. */
- const TIn* inptr_base, /** @param[in] Pointer to the base of the input tile. */
- int input_row_stride, /** @param[in] Stride between rows of the input tensor. */
- int input_col_stride, /** @param[in] Stride between columns of the input tensor. */
- TOut* mptr_base, /** @param[out] Base pointer to transformed input matrices. */
- int matrix_stride /** @param[in] Stride between matrices in the input space. */
- );
-
- /** Get the working space for a thread. */
- void * get_working_space(unsigned int threadid) const;
-
- const TIn* _inptr;
- TOut* _outptr;
-
- const int _overlap_rows, _overlap_cols;
- const int _padding_top, _padding_left, _padding_bottom, _padding_right;
- const int _tiles_M, _tiles_N;
- int _matrix_stride, _matrix_row_stride, _matrix_batch_stride;
- int _in_col_stride, _in_row_stride, _in_batch_stride;
-
- const int _working_space_col_stride, _working_space_row_stride;
- TIn *_working_space;
-};
-
-template <int InnerTileRows, typename TIn, typename TOut, WinogradRoots Roots>
-class InputTransform<InnerTileRows, 1, TIn, TOut, Roots> :
- public InputTransform<1, InnerTileRows, TIn, TOut, Roots>
-{
- using Base = InputTransform<1, InnerTileRows, TIn, TOut, Roots>;
-
- public:
- InputTransform(
- int kernel_rows, /**< Number of rows in the kernel. */
- int kernel_cols, /**< Number of columns in the kernel. */
- int n_batches, /**< Number of batches in input tensor. */
- int n_rows, /**< Number of rows in input tensor. */
- int n_cols, /**< Number of columns in input tensor. */
- int n_channels, /**< Number of channels in input tensor. */
- int padding_top, /**< Padding to apply to the top of the image. */
- int padding_left, /**< Padding to apply to the left of the image. */
- int padding_bottom, /**< Padding to apply to the bottom of the image. */
- int padding_right /**< Padding to apply to the right of the image. */
- );
-
- /** Set pointers to the input tensor read by the transform. */
- void set_input_tensor(const void *input) override;
- void set_input_tensor(const void *input, int col_stride) override;
- void set_input_tensor(const void *input, int row_stride, int col_stride) override;
- void set_input_tensor(const void *input, int batch_stride, int row_stride, int col_stride) override;
-};
-
-template <
- int KernelRows, int KernelCols,
- int InnerTileRows, int InnerTileCols,
- typename TIn, typename TOut,
- WinogradRoots Roots
->
-class OutputTransform : public IOutputTransform
-{
- public:
- OutputTransform(
- int n_batches, /**< Number of batches in output tensor. */
- int n_rows, /**< Number of rows in output tensor. */
- int n_cols, /**< Number of columns in output tensor. */
- int n_channels, /**< Number of channels in output tensor. */
- const arm_gemm::Activation &activation
- );
-
- OutputTransform(OutputTransform&) = delete;
- OutputTransform operator=(OutputTransform&) = delete;
-
- /** Set pointers to the matrices read by the transform. */
- void set_input_matrices(const void *matrices, int iter_matrix_stride, int matrix_row_stride) override;
-
- /** Set pointer to the bias tensor (can be ignored or called with nullptr for no bias */
- void set_bias(const void *bias=nullptr) override;
-
- /** Set pointers to the output tensor written by the transform. */
- void set_output_tensor(void *output) override;
- void set_output_tensor(void *output, int col_stride) override;
- void set_output_tensor(void *output, int row_stride, int col_stride) override;
- void set_output_tensor(void *output, int batch_stride, int row_stride, int col_stride) override;
-
- /** Get the working space required to perform the transformation. */
- size_t get_working_space_size(unsigned int nthreads=1) const override;
- void set_working_space(void *buffer) override;
-
- /** Get the window of work a given operator can perform. */
- unsigned int get_window() const override;
- static constexpr unsigned int WINDOW_BLOCK = 16; // Base size of window
-
- /** Perform work upon a window of the input. */
- void run(unsigned int start, unsigned int stop, unsigned int threadid=0) override;
-
- protected:
- static constexpr int inner_tile_rows = InnerTileRows;
- static constexpr int inner_tile_cols = InnerTileCols;
- static constexpr int output_tile_rows = InnerTileRows - KernelRows + 1;
- static constexpr int output_tile_cols = InnerTileCols - KernelCols + 1;
-
- const int _n_batches, _n_rows, _n_cols, _n_channels;
- const TOut _output_min, _output_max;
-
- private:
- void transform_uncropped_tile(
- unsigned int threadid,
- int n_channels,
- TOut *outptr,
- const TIn *inptr,
- const TOut *biases
- );
-
- void transform_cropped_tile(
- unsigned int threadid,
- int n_channels,
- TOut *outptr,
- const TIn *inptr,
- const TOut *biases,
- int pad_bottom,
- int pad_right
- );
-
- /** Implementation of the tile transformation method. */
- static void transform_tile(
- int n_channels,
- const TIn* matrix_base,
- int matrix_stride,
- const TOut* biases,
- TOut* output,
- int output_row_stride,
- int output_col_stride,
- TOut output_min,
- TOut output_max
- );
-
- /** Get the working space for a thread. */
- void * get_working_space(unsigned int threadid) const;
-
- const TIn* _matrix_base;
- const TOut* _biases;
- int _matrix_stride, _matrix_row_stride, _matrix_batch_stride;
- TOut* _outptr;
- const int _tiles_M, _tiles_N;
- int _out_col_stride, _out_row_stride, _out_batch_stride;
-
- const int _working_space_col_stride, _working_space_row_stride;
- TOut *_working_space;
-};
-
-template <
- int KernelRows,
- int InnerTileRows,
- typename TIn, typename TOut,
- WinogradRoots Roots
->
-class OutputTransform<KernelRows, 1, InnerTileRows, 1, TIn, TOut, Roots> :
- public OutputTransform<1, KernelRows, 1, InnerTileRows, TIn, TOut, Roots>
-{
- using Base = OutputTransform<1, KernelRows, 1, InnerTileRows, TIn, TOut, Roots>;
-
- public:
- OutputTransform(
- int n_batches, /**< Number of batches in output tensor. */
- int n_rows, /**< Number of rows in output tensor. */
- int n_cols, /**< Number of columns in output tensor. */
- int n_channels, /**< Number of channels in output tensor. */
- const arm_gemm::Activation &activation
- );
-
- /** Set pointers to the output tensor written by the transform. */
- void set_output_tensor(void *output) override;
- void set_output_tensor(void *output, int col_stride) override;
- void set_output_tensor(void *output, int row_stride, int col_stride) override;
- void set_output_tensor(void *output, int batch_stride, int row_stride, int col_stride) override;
-};
-
-template <
- int KernelRows, int KernelCols,
- int InnerTileRows, int InnerTileCols,
- typename TIn, typename TOut,
- WinogradRoots Roots
->
-class WeightTransform : public IWeightTransform
-{
- public:
- WeightTransform(
- int n_output_channels, /**< Number of output channels in the kernel. */
- int n_input_channels /**< Number of input channels in the kernel. */
- );
-
- WeightTransform(WeightTransform&) = delete;
- WeightTransform operator=(WeightTransform&) = delete;
-
- /** Set pointer to the weight tensor read by the transform. */
- void set_weight_tensor(const void *weights) override;
-
- /** Set pointer to the matrices written by the transform. */
- void set_output_matrices(void *matrices, int inter_matrix_stride, int matrix_row_stride) override;
-
- /** Get the working space required to perform the transformation. */
- size_t get_working_space_size(unsigned int nthreads=1) const override;
- void set_working_space(void *buffer) override;
-
- /** Get the window of work a given operator can perform. */
- unsigned int get_window() const override;
- static constexpr unsigned int WINDOW_BLOCK = 16; // Base size of window
-
- /** Perform work upon a window of the input. */
- void run(unsigned int start, unsigned int stop, unsigned int threadid=0) override;
-
- protected:
- static const int kernel_rows = KernelRows;
- static const int kernel_cols = KernelCols;
- static const int inner_tile_rows = InnerTileRows;
- static const int inner_tile_cols = InnerTileCols;
-
- private:
- /** Apply the transform to a tensor. */
- static void execute(
- int n_output_channels,
- int n_input_channels,
- const TIn* input,
- TOut* output,
- int matrix_stride,
- int matrix_row_stride
- );
-
- const int _n_output_channels, _n_input_channels;
- TOut *_matrices;
- int _matrix_stride, _matrix_row_stride;
- const TIn *_weights;
-};
-
-template <int KernelRows, int InnerTileRows, typename TIn, typename TOut, WinogradRoots Roots>
-class WeightTransform<KernelRows, 1, InnerTileRows, 1, TIn, TOut, Roots> :
- public WeightTransform<1, KernelRows, 1, InnerTileRows, TIn, TOut, Roots>
-{
- public:
- using WeightTransform<1, KernelRows, 1, InnerTileRows, TIn, TOut, Roots>::WeightTransform;
-};
-
-template <int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols, WinogradRoots Roots>
-class WinogradGEMM
-{
- public:
- // Information about the specific Winograd instance
- static constexpr int output_tile_rows = OutputTileRows;
- static constexpr int output_tile_cols = OutputTileCols;
- static constexpr int kernel_rows = KernelRows;
- static constexpr int kernel_cols = KernelCols;
- static constexpr int inner_tile_rows = output_tile_rows + kernel_rows - 1;
- static constexpr int inner_tile_cols = output_tile_cols + kernel_cols - 1;
- static constexpr int N_GEMMS = inner_tile_rows * inner_tile_cols;
-
- /** Transform weights from the spatial to the Winograd domain. */
- template <typename TIn, typename TOut>
- using WeightsTransform = WeightTransform<
- KernelRows, KernelCols, inner_tile_rows, inner_tile_cols,
- TIn, TOut, Roots
- >;
-
- /** Transform input feature maps from the spatial to the Winograd domain.
- */
- template <typename TIn, typename TOut>
- using InputTransform = InputTransform<
- inner_tile_rows, inner_tile_cols, TIn, TOut, Roots
- >;
-
- /** Transform output feature maps from the Winograd to the spatial domain.
- */
- template <typename TIn, typename TOut>
- using OutputTransform = OutputTransform<
- KernelRows, KernelCols, inner_tile_rows, inner_tile_cols,
- TIn, TOut, Roots
- >;
-
- /** Perform a convolution.
- */
- template <typename TOut, typename TIn, typename TInGEMM=TIn, typename TOutGEMM=TOut>
- class Convolution
- {
- public:
- // Information about the typed Winograd instance
- typedef TOut OutputType;
- typedef TOutGEMM GemmOutputType;
- typedef TInGEMM GemmInputType;
- typedef TIn InputType;
-
- /** Get the output shape of a convolution. */
- static std::pair<unsigned int, unsigned int> get_output_shape(
- const std::pair<unsigned int, unsigned int> input_shape,
- bool padding_same);
-
- /** Get the memory required to store the kernel transformed into the
- * Winograd domain.
- */
- static size_t get_kernel_storage_size(unsigned int n_input_channels,
- unsigned int n_output_channels);
-
- /** Get the memory required to store the input tensor transformed into
- * the Winograd domain.
- */
- static size_t get_input_storage_size(
- unsigned int n_batches, // Number of batches
- unsigned int n_rows, // Number of input rows
- unsigned int n_cols, // Number of input columns
- unsigned int n_channels, // Number of input channels
- bool padding_same);
-
- /** Get the memory required to store the output tensor in the Winograd
- * domain.
- */
- static size_t get_output_storage_size(
- unsigned int n_batches, // Number of batches
- unsigned int n_rows, // Number of output rows
- unsigned int n_cols, // Number of output columns
- unsigned int n_channels // Number of output channels
- );
-
- /** Get the memory required to apply a Winograd operator to some input.
- */
- static size_t get_working_space_size(
- unsigned int n_batches,
- unsigned int n_rows, // Number of input rows
- unsigned int n_cols, // Number of input columns
- unsigned int n_input_channels, // Number of input channels
- unsigned int n_output_channels, // Number of output channels
- bool padding_same);
-
- /* Get the memory required by a single "input" matrix.
- */
- static size_t get_input_matrix_size(
- unsigned int n_batches, // Number of batches
- unsigned int n_rows, // Number of input rows
- unsigned int n_cols, // Number of input columns
- unsigned int n_channels, // Number of input channels
- bool padding_same);
-
- static int get_input_matrix_stride(
- unsigned int n_batches, // Number of batches
- unsigned int n_rows, // Number of input rows
- unsigned int n_cols, // Number of input columns
- unsigned int n_channels, // Number of input channels
- bool padding_same);
-
- /* Get the memory required by a single "output" matrix.
- */
- static size_t get_output_matrix_size(
- unsigned int n_batches, // Number of batches
- unsigned int n_rows, // Number of output rows
- unsigned int n_cols, // Number of output columns
- unsigned int n_channels // Number of output channels
- );
-
- static int get_output_matrix_stride(
- unsigned int n_batches, // Number of batches
- unsigned int n_rows, // Number of output rows
- unsigned int n_cols, // Number of output columns
- unsigned int n_channels // Number of output channels
- );
-
- /* Get the memory required by a single "kernel" matrix.
- */
- static size_t get_kernel_matrix_size(unsigned int n_input_channels,
- unsigned int n_output_channels);
- static int get_kernel_matrix_stride(unsigned int n_input_channels,
- unsigned int n_output_channels);
-
- static constexpr int M_BLOCK = 4; /** Size of block used by GEMM. */
- static constexpr int N_BLOCK = 16; /** Size of block used by GEMM. */
- };
-};
-
-} // namespace winograd
diff --git a/arm_compute/core/NEON/kernels/convolution/winograd/winograd_layer.hpp b/arm_compute/core/NEON/kernels/convolution/winograd/winograd_layer.hpp
deleted file mode 100644
index ed8fede385..0000000000
--- a/arm_compute/core/NEON/kernels/convolution/winograd/winograd_layer.hpp
+++ /dev/null
@@ -1,207 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#pragma once
-#include "arm_gemm_local.hpp"
-#include "arm_gemm.hpp"
-#include "winograd.hpp"
-
-namespace winograd
-{
-
-
-class IWinogradConvolutionLayer
-{
- public:
- virtual ~IWinogradConvolutionLayer() = default;
-
- virtual unsigned int weight_transform_get_window(void) const = 0;
- virtual void weight_transform_run(unsigned int start, unsigned int stop) = 0;
-
- virtual IInputTransform& input_transform(void) = 0; // Expose the input transform
- virtual IOutputTransform& output_transform(void) = 0; // Expose the output transform
- virtual arm_gemm::IGemmCommon *gemm(void) = 0; // Expose the underlying GEMM
-};
-
-/** Example of how to construct an ACL-like interface.
- *
- * Use `get_weight_storage_size`, `get_input_storage_size` and
- * `get_output_storage_size` to allocate memory for the convolution engine.
- * Then create a `WinogradConvolutionLayer`.
- *
- * Initialise the weights using `weights_transform.run(...)`.
- *
- * For each inference:
- * 1. Transform the inputs to the Winograd domain using `input_transform.run(...)`
- * 2. Perform a number of GEMMs using `gemms.run(...)`
- * 3. Transform the output to the spatial domain using `output_transform.run(...)`
- */
-template <int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols,
- typename TIn, typename TInGEMM, typename TOutGEMM, typename TOut,
- WinogradRoots Roots>
-class WinogradConvolutionLayer : public IWinogradConvolutionLayer
-{
- public:
- using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols, Roots>;
- using WeightsTransform = typename WinogradBase::template WeightsTransform<TIn, TInGEMM>;
- using InputTransform = typename WinogradBase::template InputTransform<TIn, TInGEMM>;
- using WinogradConv = typename WinogradBase::template Convolution<TOut, TIn, TInGEMM, TOutGEMM>;
- using OutputTransform = typename WinogradBase::template OutputTransform<TOutGEMM, TOut>;
-
- private:
- static constexpr int InnerTileRows = OutputTileRows + KernelRows - 1;
- static constexpr int InnerTileCols = OutputTileCols + KernelCols - 1;
- static constexpr int N_GEMMS = InnerTileRows * InnerTileCols;
-
- const int _n_output_rows, _n_output_cols;
- const int _kernel_matrix_stride, _kernel_matrix_row_stride;
- const int _input_matrix_stride, _input_matrix_row_stride;
- const int _output_matrix_stride, _output_matrix_row_stride;
- const int _tile_rows, _tile_cols;
- const int _m, _k, _n;
-
- WeightsTransform weights_transform; /** Operator to transform weights to Winograd domain. */
- InputTransform _input_transform; /** Operator to transform input to Winograd domain. */
- const arm_gemm::GemmArgs gemm_args;
- arm_gemm::UniqueGemmCommon<TInGEMM, TOutGEMM> gemms; /** Operator to perform multiple GEMMs. */
- OutputTransform _output_transform; /** Operator to transform output from Winograd domain. */
-
- public:
-
- /** Determine how much memory (in units of TIn) to allocate for the
- * transformed weights.
- */
- static unsigned int get_weight_storage_size(
- const int n_output_channels, /** Number of output feature maps. */
- const int n_input_channels /** Number of input feature maps. */
- );
-
- static unsigned int get_weight_stride(
- const int n_output_channels, /** Number of output feature maps. */
- const int n_input_channels /** Number of input feature maps. */
- );
-
- static unsigned int get_weight_multi_stride(
- const int n_output_channels, /** Number of output feature maps. */
- const int n_input_channels /** Number of input feature maps. */
- );
-
- /** Determine how much memory (in units of TIn) to allocate for the
- * transformed input.
- */
- static unsigned int get_input_storage_size(
- const int n_batches, /** Number of batches in the input tensor. */
- const int n_channels, /** Number of feature maps in the input tensor. */
- const int n_rows, /** Number of rows in each feature map. */
- const int n_cols, /** Number of columns in each feature map. */
- const bool same_padding /** Use "SAME" padding, otherwise use "VALID". */
- );
-
- /** Get the row stride for the A matrix in the Winograd domain. */
- static unsigned int get_input_stride(
- const int n_batches, /** Number of batches in the input tensor. */
- const int n_channels, /** Number of feature maps in the input tensor. */
- const int n_rows, /** Number of rows in each feature map. */
- const int n_cols, /** Number of columns in each feature map. */
- const bool same_padding /** Use "SAME" padding, otherwise use "VALID". */
- );
-
- /** Get the stride between A matrices in the Winograd domain. */
- static unsigned int get_input_multi_stride(
- const int n_batches, /** Number of batches in the input tensor. */
- const int n_channels, /** Number of feature maps in the input tensor. */
- const int n_rows, /** Number of rows in each feature map. */
- const int n_cols, /** Number of columns in each feature map. */
- const bool same_padding /** Use "SAME" padding, otherwise use "VALID". */
- );
-
- /** Determine how much memory (in units of TOut) to allocate for the
- * (Winograd domain) output.
- */
- static unsigned int get_output_storage_size(
- const int n_batches, /** Number of batches in the output tensor. */
- const int n_rows, /** Number of rows in each feature map of the input tensor. */
- const int n_cols, /** Number of columns in each feature map of the input tensor. */
- const int n_output_channels, /** Number of feature maps in the output tensor. */
- const bool same_padding /** Use "SAME" padding, otherwise use "VALID". */
- );
-
- static unsigned int get_output_stride(
- const int n_batches, /** Number of batches in the output tensor. */
- const int n_rows, /** Number of rows in each feature map of the input tensor. */
- const int n_cols, /** Number of columns in each feature map of the input tensor. */
- const int n_output_channels, /** Number of feature maps in the output tensor. */
- const bool same_padding /** Use "SAME" padding, otherwise use "VALID". */
- );
-
- static unsigned int get_output_multi_stride(
- const int n_batches, /** Number of batches in the output tensor. */
- const int n_rows, /** Number of rows in each feature map of the input tensor. */
- const int n_cols, /** Number of columns in each feature map of the input tensor. */
- const int n_output_channels, /** Number of feature maps in the output tensor. */
- const bool same_padding /** Use "SAME" padding, otherwise use "VALID". */
- );
-
- /** Get the shape (rows, cols) of a feature map of the output tensor. */
- static std::pair<int, int> get_output_feature_map_shape(
- const int n_input_rows, /** Number of rows in the input feature map. */
- const int n_input_cols, /** Number of columns in the input feature map. */
- const bool same_padding /** Use "SAME" padding, otherwise use "VALID". */
- );
-
- /** Create a new Winograd convolution layer.
- */
- WinogradConvolutionLayer(
- const arm_gemm::CPUInfo &cpuinfo, /** Describes CPU properties. */
- const int n_threads, /** Maximum number of threads used to execute the convolution. */
- const int n_batches, /** Number of batches in the input and output tensors. */
- const int n_input_channels, /** Number of feature maps in a batch of the input tensor. */
- const int n_input_rows, /** Number of rows in a feature map of the input tensor. */
- const int n_input_cols, /** Number of columns in a feature map of the input tensor. */
- const int n_output_channels, /** Number of feature maps in the output tensor. */
- const bool same_padding, /** Use "SAME" padding, otherwise use "VALID". */
- const arm_gemm::Activation &activation,
- const TIn* const weights, /** Pointer to weight tensor in spatial domain. Must be ordered as "Height x Rows x Input Feature Maps x Output Feature Maps. */
- TInGEMM* const weights_storage, /** Pointer to storage for weight tensor in the Winograd domain. Must be at least the size returned by `get_weight_storage_size`. */
- const TIn* const input, /** Pointer to NHWC ordered input tensor, in the spatial domain. */
- TInGEMM* const winograd_input, /** Pointer to working space for the input tensor in the Winograd domain. Must be at least the size returned by `get_input_storage_size`. */
- const TOut* const biases, /** Pointer to biases vector. Pass nullptr if no bias is provided. */
- TOut* const output, /** Pointer to NHWC ordered output tensor, in the spatial domain. */
- TOutGEMM* const winograd_output, /** Pointer to working space for the output tensor in the Winograd domain. Must be at least the size returned by `get_output_storage_size`. */
- const bool pretranspose_B=true, /** Hint that the B matrix can be pretransposed. */
- arm_gemm::GemmConfig *gemm_cfg=nullptr /** Pointer to GEMM configuration. */
- );
-
- /* Utility methods for interacting with the layer. */
- unsigned int weight_transform_get_window(void) const;
- void weight_transform_run(const unsigned int start, const unsigned int stop);
-
- IInputTransform& input_transform(void);
- IOutputTransform& output_transform(void);
-
- /* Get a pointer to the GEMM underlying the Winograd transform. */
- arm_gemm::IGemmCommon *gemm(void);
-};
-
-}
diff --git a/arm_compute/core/NEON/kernels/detail/NEActivationFunctionDetail.h b/arm_compute/core/NEON/kernels/detail/NEActivationFunctionDetail.h
deleted file mode 100644
index 4861559695..0000000000
--- a/arm_compute/core/NEON/kernels/detail/NEActivationFunctionDetail.h
+++ /dev/null
@@ -1,247 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_DETAIL_NEACTIVATION_FUNCTION_DETAIL_H
-#define ARM_COMPUTE_DETAIL_NEACTIVATION_FUNCTION_DETAIL_H
-
-#include "arm_compute/core/NEON/wrapper/wrapper.h"
-
-namespace arm_compute
-{
-namespace detail
-{
-/** Dummy activation object */
-template <typename T, int S>
-struct dummy
-{
- /** NEON vector type. */
- using ExactType = typename wrapper::traits::neon_vector<T, S>::type;
-
- /** Construct a dummy activation object.
- *
- * @param[in] act_info Activation layer information.
- */
- explicit dummy(ActivationLayerInfo act_info)
- {
- ARM_COMPUTE_UNUSED(act_info);
- }
- /** Run activation function.
- *
- * @param[in] vval Vector of values.
- */
- void operator()(ExactType &vval)
- {
- ARM_COMPUTE_UNUSED(vval);
- }
-};
-/** Linear activation object */
-template <typename T, int S>
-struct linear
-{
- /** NEON vector type. */
- using ExactType = typename wrapper::traits::neon_vector<T, S>::type;
- /** NEON vector tag type. */
- using ExactTagType = typename wrapper::traits::neon_vector<T, S>::tag_type;
-
- /** Construct a Linear activation object.
- *
- * @param[in] act_info Activation layer information.
- */
- explicit linear(ActivationLayerInfo act_info)
- : valpha(wrapper::vdup_n(static_cast<T>(act_info.a()), ExactTagType{})),
- vbeta(wrapper::vdup_n(static_cast<T>(act_info.b()), ExactTagType{}))
- {
- }
-
- /** Run activation function.
- *
- * @param[in] vval Vector of values.
- */
- void operator()(ExactType &vval)
- {
- vval = wrapper::vmla(vval, valpha, vbeta);
- }
-
- /** Vector of alphas. */
- const ExactType valpha;
- /** Vector of betas. */
- const ExactType vbeta;
-};
-/** Square activation object */
-template <typename T, int S>
-struct square
-{
- /** NEON vector type. */
- using ExactType = typename wrapper::traits::neon_vector<T, S>::type;
- /** NEON vector tag type. */
- using ExactTagType = typename wrapper::traits::neon_vector<T, S>::tag_type;
-
- /** Construct a Square activation object.
- *
- * @param[in] act_info Activation layer information.
- */
- explicit square(ActivationLayerInfo act_info)
- {
- ARM_COMPUTE_UNUSED(act_info);
- }
-
- /** Run activation function.
- *
- * @param[in] vval Vector of values.
- */
- void operator()(ExactType &vval)
- {
- vval = wrapper::vmul(vval, vval);
- }
-};
-/** Logistic activation object */
-template <typename T, int S>
-struct logistic
-{
- /** NEON vector type. */
- using ExactType = typename wrapper::traits::neon_vector<T, S>::type;
- /** NEON vector tag type. */
- using ExactTagType = typename wrapper::traits::neon_vector<T, S>::tag_type;
-
- /** Construct a Logistic activation object.
- *
- * @param[in] act_info Activation layer information.
- */
- explicit logistic(ActivationLayerInfo act_info)
- : vone(wrapper::vdup_n(static_cast<T>(1.f), ExactTagType{}))
- {
- ARM_COMPUTE_UNUSED(act_info);
- }
-
- /** Run activation function.
- *
- * @param[in] vval Vector of values.
- */
- void operator()(ExactType &vval)
- {
- vval = wrapper::vinv(wrapper::vadd(vone, wrapper::vexpq(wrapper::vneg(vval))));
- }
-
- /** Vector of ones. */
- const ExactType vone;
-};
-/** RELU activation object */
-template <typename T, int S>
-struct relu
-{
- /** NEON vector type. */
- using ExactType = typename wrapper::traits::neon_vector<T, S>::type;
- /** NEON vector tag type. */
- using ExactTagType = typename wrapper::traits::neon_vector<T, S>::tag_type;
-
- /** Construct a RELU activation object.
- *
- * @param[in] act_info Activation layer information.
- */
- explicit relu(ActivationLayerInfo act_info)
- : vzero(wrapper::vdup_n(static_cast<T>(0.f), ExactTagType{}))
- {
- ARM_COMPUTE_UNUSED(act_info);
- }
-
- /** Run activation function.
- *
- * @param[in] vval Vector of values.
- */
- void operator()(ExactType &vval)
- {
- vval = wrapper::vmax(vzero, vval);
- }
-
- /** Vector of zeroes. */
- const ExactType vzero;
-};
-/** Bounded RELU activation object */
-template <typename T, int S>
-struct brelu
-{
- /** NEON vector type. */
- using ExactType = typename wrapper::traits::neon_vector<T, S>::type;
- /** NEON vector tag type. */
- using ExactTagType = typename wrapper::traits::neon_vector<T, S>::tag_type;
-
- /** Construct a bounded RELU activation object.
- *
- * @param[in] act_info Activation layer information.
- */
- explicit brelu(ActivationLayerInfo act_info)
- : vzero(wrapper::vdup_n(static_cast<T>(0.f), ExactTagType{})),
- valpha(wrapper::vdup_n(static_cast<T>(act_info.a()), ExactTagType{}))
- {
- }
-
- /** Run activation function.
- *
- * @param[in] vval Vector of values.
- */
- void operator()(ExactType &vval)
- {
- vval = wrapper::vmin(valpha, wrapper::vmax(vzero, vval));
- }
-
- /** Vector of zeroes. */
- const ExactType vzero;
- /** Vector of alphas. */
- const ExactType valpha;
-};
-/** Lower-Upper Bounded RELU activation object */
-template <typename T, int S>
-struct lubrelu
-{
- /** NEON vector type. */
- using ExactType = typename wrapper::traits::neon_vector<T, S>::type;
- /** NEON vector tag type. */
- using ExactTagType = typename wrapper::traits::neon_vector<T, S>::tag_type;
-
- /** Construct a lower-upper bounded RELU activation object.
- *
- * @param[in] act_info Activation layer information.
- */
- explicit lubrelu(ActivationLayerInfo act_info)
- : valpha(wrapper::vdup_n(static_cast<T>(act_info.a()), ExactTagType{})),
- vbeta(wrapper::vdup_n(static_cast<T>(act_info.b()), ExactTagType{}))
- {
- }
-
- /** Run activation function.
- *
- * @param[in] vval Vector of values.
- */
- void operator()(ExactType &vval)
- {
- vval = wrapper::vmin(valpha, wrapper::vmax(vbeta, vval));
- }
-
- /** Vector of alphas. */
- const ExactType valpha;
- /** Vector of betas. */
- const ExactType vbeta;
-};
-} // namespace detail
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_DETAIL_NEACTIVATION_FUNCTION_DETAIL_H */
diff --git a/arm_compute/core/NEON/kernels/detail/NEDirectConvolution3x3.h b/arm_compute/core/NEON/kernels/detail/NEDirectConvolution3x3.h
deleted file mode 100644
index d756a9a98f..0000000000
--- a/arm_compute/core/NEON/kernels/detail/NEDirectConvolution3x3.h
+++ /dev/null
@@ -1,170 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_NECONVOLUTIONKERNEL3x3_H
-#define ARM_COMPUTE_NECONVOLUTIONKERNEL3x3_H
-
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace detail
-{
-inline float32x4x3_t load_matrix_row(const float *ptr)
-{
- const float32x4x3_t r =
- {
- {
- vld1q_dup_f32(ptr),
- vld1q_dup_f32(1 + ptr),
- vld1q_dup_f32(2 + ptr)
- }
- };
- return r;
-}
-
-template <unsigned int stridex>
-float32x4x2_t convolve_3x3(const float *in_top, const float *in_mid, const float *in_low, const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2);
-
-template <>
-inline float32x4x2_t convolve_3x3<1>(const float *in_top, const float *in_mid, const float *in_low, const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2)
-{
- const float32x4x3_t vtop =
- {
- {
- vld1q_f32(in_top),
- vld1q_f32(in_top + 4),
- vld1q_f32(in_top + 8)
- }
- };
- const float32x4x3_t vmid =
- {
- {
- vld1q_f32(in_mid),
- vld1q_f32(in_mid + 4),
- vld1q_f32(in_mid + 8)
- }
- };
- const float32x4x3_t vlow =
- {
- {
- vld1q_f32(in_low),
- vld1q_f32(in_low + 4),
- vld1q_f32(in_low + 8)
- }
- };
- float32x4x2_t out =
- {
- {
- vmulq_f32(vtop.val[0], m0.val[0]),
- vmulq_f32(vtop.val[1], m0.val[0])
- }
- };
- out.val[0] = vmlaq_f32(out.val[0], vextq_f32(vtop.val[0], vtop.val[1], 1), m0.val[1]);
- out.val[0] = vmlaq_f32(out.val[0], vextq_f32(vtop.val[0], vtop.val[1], 2), m0.val[2]);
-
- out.val[0] = vmlaq_f32(out.val[0], vmid.val[0], m1.val[0]);
- out.val[0] = vmlaq_f32(out.val[0], vextq_f32(vmid.val[0], vmid.val[1], 1), m1.val[1]);
- out.val[0] = vmlaq_f32(out.val[0], vextq_f32(vmid.val[0], vmid.val[1], 2), m1.val[2]);
-
- out.val[0] = vmlaq_f32(out.val[0], vlow.val[0], m2.val[0]);
- out.val[0] = vmlaq_f32(out.val[0], vextq_f32(vlow.val[0], vlow.val[1], 1), m2.val[1]);
- out.val[0] = vmlaq_f32(out.val[0], vextq_f32(vlow.val[0], vlow.val[1], 2), m2.val[2]);
-
- out.val[1] = vmlaq_f32(out.val[1], vextq_f32(vtop.val[1], vtop.val[2], 1), m0.val[1]);
- out.val[1] = vmlaq_f32(out.val[1], vextq_f32(vtop.val[1], vtop.val[2], 2), m0.val[2]);
-
- out.val[1] = vmlaq_f32(out.val[1], vmid.val[1], m1.val[0]);
- out.val[1] = vmlaq_f32(out.val[1], vextq_f32(vmid.val[1], vmid.val[2], 1), m1.val[1]);
- out.val[1] = vmlaq_f32(out.val[1], vextq_f32(vmid.val[1], vmid.val[2], 2), m1.val[2]);
-
- out.val[1] = vmlaq_f32(out.val[1], vlow.val[1], m2.val[0]);
- out.val[1] = vmlaq_f32(out.val[1], vextq_f32(vlow.val[1], vlow.val[2], 1), m2.val[1]);
- out.val[1] = vmlaq_f32(out.val[1], vextq_f32(vlow.val[1], vlow.val[2], 2), m2.val[2]);
- return out;
-}
-
-template <>
-inline float32x4x2_t convolve_3x3<2>(const float *in_top, const float *in_mid, const float *in_low, const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2)
-{
- float32x4x2_t out = convolve_3x3<1>(in_top, in_mid, in_low, m0, m1, m2);
- out.val[0] = vsetq_lane_f32(vgetq_lane_f32(out.val[0], 2), out.val[0], 1);
- out.val[0] = vsetq_lane_f32(vgetq_lane_f32(out.val[1], 0), out.val[0], 2);
- out.val[0] = vsetq_lane_f32(vgetq_lane_f32(out.val[1], 2), out.val[0], 3);
- return out;
-}
-
-template <>
-inline float32x4x2_t convolve_3x3<3>(const float *in_top, const float *in_mid, const float *in_low, const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2)
-{
- float32x4x2_t out = convolve_3x3<1>(in_top, in_mid, in_low, m0, m1, m2);
- out.val[0] = vsetq_lane_f32(vgetq_lane_f32(out.val[0], 3), out.val[0], 1);
- return out;
-}
-
-template <unsigned int stridex>
-void store_results(float *buffer, const float32x4x2_t &values);
-
-template <>
-void store_results<1>(float *buffer, const float32x4x2_t &values)
-{
- vst1q_f32(buffer, values.val[0]);
- vst1q_f32(buffer + 4, values.val[1]);
-}
-
-template <>
-void store_results<2>(float *buffer, const float32x4x2_t &values)
-{
- vst1q_f32(buffer, values.val[0]);
-}
-
-template <>
-void store_results<3>(float *buffer, const float32x4x2_t &values)
-{
- vst1_f32(buffer, vget_low_f32(values.val[0]));
-}
-
-template <unsigned int stridex>
-int get_input_num_elems_processed(unsigned int num_elems_written_per_iteration);
-
-template <>
-int get_input_num_elems_processed<1>(unsigned int num_elems_written_per_iteration)
-{
- return num_elems_written_per_iteration;
-}
-
-template <>
-int get_input_num_elems_processed<2>(unsigned int num_elems_written_per_iteration)
-{
- return num_elems_written_per_iteration << 1;
-}
-
-template <>
-int get_input_num_elems_processed<3>(unsigned int num_elems_written_per_iteration)
-{
- return num_elems_written_per_iteration * 3;
-}
-}
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NECONVOLUTIONKERNEL3x3_H */ \ No newline at end of file
diff --git a/arm_compute/core/NEON/kernels/detail/NEDirectConvolutionDetail.h b/arm_compute/core/NEON/kernels/detail/NEDirectConvolutionDetail.h
deleted file mode 100644
index d4cbc7f4af..0000000000
--- a/arm_compute/core/NEON/kernels/detail/NEDirectConvolutionDetail.h
+++ /dev/null
@@ -1,965 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_NEDIRECTCONVOLUTIONDETAIL_H
-#define ARM_COMPUTE_NEDIRECTCONVOLUTIONDETAIL_H
-
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/NEON/NEFixedPoint.h"
-#include "arm_compute/core/NEON/wrapper/wrapper.h"
-#include "arm_compute/core/utils/misc/Requires.h"
-
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace detail
-{
-/** Loads a 3x3 matrix as a row (float).
- *
- * @param[in] ptr Pointer to a float 3x3 matrix.
- * @param[in] weights_offset (Optional) Weights quantization offset.
- *
- * @return The loaded matrix.
- */
-inline float32x4x3_t load_matrix_row(const float *ptr, int weights_offset = 0)
-{
- ARM_COMPUTE_UNUSED(weights_offset);
- const float32x4x3_t r =
- {
- {
- vld1q_dup_f32(ptr),
- vld1q_dup_f32(1 + ptr),
- vld1q_dup_f32(2 + ptr)
- }
- };
- return r;
-}
-
-/** Loads a 3x3 matrix as a row (uint8_t/int8_t).
- *
- * @param[in] ptr Pointer to a uint8_t/int8_t 3x3 matrix.
- * @param[in] weights_offset (Optional) Weights quantization offset.
- *
- * @return The loaded matrix.
- */
-template < typename T, REQUIRES_TA(std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value) >
-inline int32x4x3_t load_matrix_row(const T *ptr, int weights_offset = 0)
-{
- const int32x4_t v_weights_offset = vdupq_n_s32(weights_offset);
-
- /* ptr is a pointer to a row in a 3x3 matrix, the function returns 3 vectors holding exactly the same value in all lanes:
- r.val[0] contains the first element, r.val[1] the second element and r.val[2] the third element (in all lanes) */
- int32x4x3_t r =
- {
- {
- vaddq_s32(v_weights_offset, vdupq_n_s32(*ptr)),
- vaddq_s32(v_weights_offset, vdupq_n_s32(*(ptr + 1))),
- vaddq_s32(v_weights_offset, vdupq_n_s32(*(ptr + 2)))
- }
- };
- return r;
-}
-
-/** Stores a float32x4x2_t array into a memory location.
- *
- * @param[in] buffer Pointer to the memory location where the values will be stored.
- * @param[in] values Values that will be stored.
- *
- */
-template <unsigned int stridex>
-void store_results(float *buffer, const float32x4x2_t &values);
-
-template <>
-inline void store_results<1>(float *buffer, const float32x4x2_t &values)
-{
- vst1q_f32(buffer, values.val[0]);
- vst1q_f32(buffer + 4, values.val[1]);
-}
-
-template <>
-inline void store_results<2>(float *buffer, const float32x4x2_t &values)
-{
- vst1q_f32(buffer, values.val[0]);
-}
-
-template <>
-inline void store_results<3>(float *buffer, const float32x4x2_t &values)
-{
- vst1_f32(buffer, vget_low_f32(values.val[0]));
-}
-
-/** Stores a uint32_t array into a memory location.
- *
- * @param[in] buffer Pointer to the memory location where the values will be stored.
- * @param[in] values Values that will be stored.
- *
- */
-template <unsigned int stridex>
-void store_results(int32_t *buffer, const int32x4x2_t &values);
-
-template <>
-inline void store_results<1>(int32_t *buffer, const int32x4x2_t &values)
-{
- vst1q_s32(buffer, values.val[0]);
- vst1q_s32(buffer + 4, values.val[1]);
-}
-
-template <>
-inline void store_results<2>(int32_t *buffer, const int32x4x2_t &values)
-{
- vst1q_s32(buffer, values.val[0]);
-}
-
-template <>
-inline void store_results<3>(int32_t *buffer, const int32x4x2_t &values)
-{
- vst1_s32(buffer, vget_low_s32(values.val[0]));
-}
-
-template <unsigned int stridex>
-inline void accumulate_results(float *buffer, const float32x4x2_t &values);
-
-template <>
-inline void accumulate_results<1>(float *buffer, const float32x4x2_t &values)
-{
- vst1q_f32(buffer, vaddq_f32(vld1q_f32(buffer), values.val[0]));
- vst1q_f32(buffer + 4, vaddq_f32(vld1q_f32(buffer + 4), values.val[1]));
-}
-
-template <>
-inline void accumulate_results<2>(float *buffer, const float32x4x2_t &values)
-{
- vst1q_f32(buffer, vaddq_f32(vld1q_f32(buffer), values.val[0]));
-}
-
-template <>
-inline void accumulate_results<3>(float *buffer, const float32x4x2_t &values)
-{
- vst1_f32(buffer, vadd_f32(vld1_f32(buffer), vget_low_f32(values.val[0])));
-}
-
-template <unsigned int stridex>
-void accumulate_results(int32_t *buffer, const int32x4x2_t &values);
-
-template <>
-inline void accumulate_results<1>(int32_t *buffer, const int32x4x2_t &values)
-{
- vst1q_s32(buffer, vaddq_s32(vld1q_s32(buffer), values.val[0]));
- vst1q_s32(buffer + 4, vaddq_s32(vld1q_s32(buffer + 4), values.val[1]));
-}
-
-template <>
-inline void accumulate_results<2>(int32_t *buffer, const int32x4x2_t &values)
-{
- vst1q_s32(buffer, vaddq_s32(vld1q_s32(buffer), values.val[0]));
-}
-
-template <>
-inline void accumulate_results<3>(int32_t *buffer, const int32x4x2_t &values)
-{
- vst1_s32(buffer, vadd_s32(vld1_s32(buffer), vget_low_s32(values.val[0])));
-}
-
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-/** Stores a float16x8x2_t array into a memory location.
- *
- * @param[in] buffer Pointer to the memory location where the values will be stored.
- * @param[in] values Values that will be stored.
- *
- */
-template <unsigned int stridex>
-void store_results(float16_t *buffer, const float16x8x2_t &values);
-
-template <>
-inline void store_results<1>(float16_t *buffer, const float16x8x2_t &values)
-{
- vst1q_f16(buffer, values.val[0]);
- vst1q_f16(buffer + 8, values.val[1]);
-}
-
-template <>
-inline void store_results<2>(float16_t *buffer, const float16x8x2_t &values)
-{
- vst1q_f16(buffer, values.val[0]);
-}
-
-template <>
-inline void store_results<3>(float16_t *buffer, const float16x8x2_t &values)
-{
- vst1_f16(buffer, vget_low_f16(values.val[0]));
-}
-
-template <unsigned int stridex>
-inline void accumulate_results(float16_t *buffer, const float16x8x2_t &values);
-
-template <>
-inline void accumulate_results<1>(float16_t *buffer, const float16x8x2_t &values)
-{
- vst1q_f16(buffer, vaddq_f16(vld1q_f16(buffer), values.val[0]));
- vst1q_f16(buffer + 8, vaddq_f16(vld1q_f16(buffer + 8), values.val[1]));
-}
-
-template <>
-inline void accumulate_results<2>(float16_t *buffer, const float16x8x2_t &values)
-{
- vst1q_f16(buffer, vaddq_f16(vld1q_f16(buffer), values.val[0]));
-}
-
-template <>
-inline void accumulate_results<3>(float16_t *buffer, const float16x8x2_t &values)
-{
- vst1_f16(buffer, vadd_f16(vld1_f16(buffer), vget_low_f16(values.val[0])));
-}
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-
-/** Perform a 3x3 convolution for 4 consecutive elements on float32 when dilation.x() or dilation.y() is not 1.
- *
- * @param[in] in_top Pointer to the first row of the input.
- * @param[in] in_mid Pointer to the second row of the input.
- * @param[in] in_low Pointer to the third row of the input.
- * @param[in] m0 First row of the filter.
- * @param[in] m1 Second row of the filter.
- * @param[in] m2 Third row of the filter.
- * @param[in] dilation_x Dilation, in elements across x.
- * @param[in] input_offset (Optional) Input quantization offset.
- *
- */
-inline float32x4_t single_convolve_3x3_dilation(const float *in_top, const float *in_mid, const float *in_low,
- const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2,
- const size_t dilation_x, int input_offset)
-{
- ARM_COMPUTE_UNUSED(input_offset);
-
- const float32x4x3_t vtop =
- {
- {
- vld1q_f32(in_top),
- vld1q_f32(in_top + dilation_x),
- vld1q_f32(in_top + 2 * dilation_x)
- }
- };
- const float32x4x3_t vmid =
- {
- {
- vld1q_f32(in_mid),
- vld1q_f32(in_mid + dilation_x),
- vld1q_f32(in_mid + 2 * dilation_x)
- }
- };
- const float32x4x3_t vlow =
- {
- {
- vld1q_f32(in_low),
- vld1q_f32(in_low + dilation_x),
- vld1q_f32(in_low + 2 * dilation_x)
- }
- };
- float32x4_t out = vmulq_f32(vtop.val[0], m0.val[0]);
- out = vmlaq_f32(out, vtop.val[1], m0.val[1]);
- out = vmlaq_f32(out, vtop.val[2], m0.val[2]);
-
- out = vmlaq_f32(out, vmid.val[0], m1.val[0]);
- out = vmlaq_f32(out, vmid.val[1], m1.val[1]);
- out = vmlaq_f32(out, vmid.val[2], m1.val[2]);
-
- out = vmlaq_f32(out, vlow.val[0], m2.val[0]);
- out = vmlaq_f32(out, vlow.val[1], m2.val[1]);
- out = vmlaq_f32(out, vlow.val[2], m2.val[2]);
-
- return out;
-}
-
-/** Perform a 3x3 convolution for 8 consecutive elements on float32 when dilation.x() or dilation.y() is not 1.
- *
- * @param[in] in_top Pointer to the first row of the input.
- * @param[in] in_mid Pointer to the second row of the input.
- * @param[in] in_low Pointer to the third row of the input.
- * @param[in] m0 First row of the filter.
- * @param[in] m1 Second row of the filter.
- * @param[in] m2 Third row of the filter.
- * @param[in] dilation_x Dilation, in elements across x.
- * @param[in] stridex Stride value in elements across x.
- * @param[in] input_offset (Optional) Input quantization offset.
- *
- */
-inline float32x4x2_t convolve_3x3_dilation(const float *in_top, const float *in_mid, const float *in_low,
- const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2,
- const size_t dilation_x, unsigned int stridex, int input_offset = 0)
-{
- ARM_COMPUTE_ERROR_ON(stridex > 3);
- float32x4x2_t out =
- {
- {
- single_convolve_3x3_dilation(in_top, in_mid, in_low, m0, m1, m2, dilation_x, input_offset),
- single_convolve_3x3_dilation(in_top + 4, in_mid + 4, in_low + 4, m0, m1, m2, dilation_x, input_offset)
- }
- };
-
- if(stridex == 2)
- {
- out.val[0] = vsetq_lane_f32(vgetq_lane_f32(out.val[0], 2), out.val[0], 1);
- out.val[0] = vsetq_lane_f32(vgetq_lane_f32(out.val[1], 0), out.val[0], 2);
- out.val[0] = vsetq_lane_f32(vgetq_lane_f32(out.val[1], 2), out.val[0], 3);
- }
- else if(stridex == 3)
- {
- out.val[0] = vsetq_lane_f32(vgetq_lane_f32(out.val[0], 3), out.val[0], 1);
- }
-
- return out;
-}
-
-/** Perform a convolve3x3 on float32.
- *
- * @param[in] in_top Pointer to the first row of the input.
- * @param[in] in_mid Pointer to the second row of the input.
- * @param[in] in_low Pointer to the third row of the input.
- * @param[out] out_ptr Pointer to the output.
- * @param[in] m0 First row of the filter.
- * @param[in] m1 Second row of the filter.
- * @param[in] m2 Third row of the filter.
- * @param[in] stridex Stride value in elements across x.
- * @param[in] input_offset (Optional) Input quantization offset.
- *
- */
-template <bool accumulate>
-void convolve_3x3(const float *in_top, const float *in_mid, const float *in_low, float *out_ptr,
- const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2,
- unsigned int stridex, int input_offset = 0);
-
-template <bool accumulate>
-inline void convolve_3x3(const float *in_top, const float *in_mid, const float *in_low, float *out_ptr,
- const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2,
- unsigned int stridex, int input_offset)
-{
- ARM_COMPUTE_UNUSED(input_offset);
- ARM_COMPUTE_ERROR_ON(stridex > 3);
-
- float32x4x2_t out =
- {
- {
- vdupq_n_f32(0.f),
- vdupq_n_f32(0.f)
- }
- };
- if(stridex == 2)
- {
- const float32x4x2_t vtop = vld2q_f32(in_top);
- const float32x4x2_t vmid = vld2q_f32(in_mid);
- const float32x4x2_t vlow = vld2q_f32(in_low);
- const float32x4_t vtop_end = vld1q_f32(in_top + 8);
- const float32x4_t vmid_end = vld1q_f32(in_mid + 8);
- const float32x4_t vlow_end = vld1q_f32(in_low + 8);
-
- out.val[0] = vmulq_f32(vtop.val[0], m0.val[0]);
-
- out.val[0] = vmlaq_f32(out.val[0], vtop.val[1], m0.val[1]);
- out.val[0] = vmlaq_f32(out.val[0], vextq_f32(vtop.val[0], vtop_end, 1), m0.val[2]);
-
- out.val[0] = vmlaq_f32(out.val[0], vmid.val[0], m1.val[0]);
- out.val[0] = vmlaq_f32(out.val[0], vmid.val[1], m1.val[1]);
- out.val[0] = vmlaq_f32(out.val[0], vextq_f32(vmid.val[0], vmid_end, 1), m1.val[2]);
-
- out.val[0] = vmlaq_f32(out.val[0], vlow.val[0], m2.val[0]);
- out.val[0] = vmlaq_f32(out.val[0], vlow.val[1], m2.val[1]);
- out.val[0] = vmlaq_f32(out.val[0], vextq_f32(vlow.val[0], vlow_end, 1), m2.val[2]);
-
- accumulate ? accumulate_results<2>(out_ptr, out) : store_results<2>(out_ptr, out);
- }
- else
- {
- const float32x4x3_t vtop =
- {
- {
- vld1q_f32(in_top),
- vld1q_f32(in_top + 4),
- vld1q_f32(in_top + 8)
- }
- };
- const float32x4x3_t vmid =
- {
- {
- vld1q_f32(in_mid),
- vld1q_f32(in_mid + 4),
- vld1q_f32(in_mid + 8)
- }
- };
- const float32x4x3_t vlow =
- {
- {
- vld1q_f32(in_low),
- vld1q_f32(in_low + 4),
- vld1q_f32(in_low + 8)
- }
- };
- out.val[0] = vmulq_f32(vtop.val[0], m0.val[0]);
- out.val[1] = vmulq_f32(vtop.val[1], m0.val[0]);
-
- out.val[0] = vmlaq_f32(out.val[0], vextq_f32(vtop.val[0], vtop.val[1], 1), m0.val[1]);
- out.val[0] = vmlaq_f32(out.val[0], vextq_f32(vtop.val[0], vtop.val[1], 2), m0.val[2]);
-
- out.val[0] = vmlaq_f32(out.val[0], vmid.val[0], m1.val[0]);
- out.val[0] = vmlaq_f32(out.val[0], vextq_f32(vmid.val[0], vmid.val[1], 1), m1.val[1]);
- out.val[0] = vmlaq_f32(out.val[0], vextq_f32(vmid.val[0], vmid.val[1], 2), m1.val[2]);
-
- out.val[0] = vmlaq_f32(out.val[0], vlow.val[0], m2.val[0]);
- out.val[0] = vmlaq_f32(out.val[0], vextq_f32(vlow.val[0], vlow.val[1], 1), m2.val[1]);
- out.val[0] = vmlaq_f32(out.val[0], vextq_f32(vlow.val[0], vlow.val[1], 2), m2.val[2]);
-
- out.val[1] = vmlaq_f32(out.val[1], vextq_f32(vtop.val[1], vtop.val[2], 1), m0.val[1]);
- out.val[1] = vmlaq_f32(out.val[1], vextq_f32(vtop.val[1], vtop.val[2], 2), m0.val[2]);
-
- out.val[1] = vmlaq_f32(out.val[1], vmid.val[1], m1.val[0]);
- out.val[1] = vmlaq_f32(out.val[1], vextq_f32(vmid.val[1], vmid.val[2], 1), m1.val[1]);
- out.val[1] = vmlaq_f32(out.val[1], vextq_f32(vmid.val[1], vmid.val[2], 2), m1.val[2]);
-
- out.val[1] = vmlaq_f32(out.val[1], vlow.val[1], m2.val[0]);
- out.val[1] = vmlaq_f32(out.val[1], vextq_f32(vlow.val[1], vlow.val[2], 1), m2.val[1]);
- out.val[1] = vmlaq_f32(out.val[1], vextq_f32(vlow.val[1], vlow.val[2], 2), m2.val[2]);
-
- if(stridex == 3)
- {
- out.val[0] = vsetq_lane_f32(vgetq_lane_f32(out.val[0], 3), out.val[0], 1);
- accumulate ? accumulate_results<3>(out_ptr, out) : store_results<3>(out_ptr, out);
- }
- else
- {
- accumulate ? accumulate_results<1>(out_ptr, out) : store_results<1>(out_ptr, out);
- }
- }
-}
-
-/** Perform a 3x3 convolution for 4 consecutive 8-bit elements when dilation.x() or dilation.y() is not 1.
- *
- * @param[in] in_top Pointer to the first row of the input.
- * @param[in] in_mid Pointer to the second row of the input.
- * @param[in] in_low Pointer to the third row of the input.
- * @param[in] m0 First row of the filter.
- * @param[in] m1 Second row of the filter.
- * @param[in] m2 Third row of the filter.
- * @param[in] dilation_x Dilation, in elements across x.
- * @param[in] input_offset Input quantization offset.
- *
- */
-template < typename T, REQUIRES_TA(std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value) >
-inline int32x4_t single_convolve_3x3_dilation(const T *in_top, const T *in_mid, const T *in_low,
- const int32x4x3_t &m0, const int32x4x3_t &m1, const int32x4x3_t &m2,
- size_t dilation_x, int32_t input_offset)
-{
- using VectorType = typename std::conditional<std::is_same<T, uint8_t>::value, uint8x8x3_t, int8x8x3_t>::type;
- using OutputTagType = typename wrapper::traits::neon_bitvector_tag_t<int32_t, wrapper::traits::BitWidth::W128>;
-
- const int32x4_t v_input_offset = wrapper::vdup_n(input_offset, OutputTagType{});
-
- const VectorType vtop =
- {
- {
- wrapper::vload(in_top),
- wrapper::vload(in_top + dilation_x),
- wrapper::vload(in_top + 2 * dilation_x)
- }
- };
- const VectorType vmid =
- {
- {
- wrapper::vload(in_mid),
- wrapper::vload(in_mid + dilation_x),
- wrapper::vload(in_mid + 2 * dilation_x)
- }
- };
- const VectorType vlow =
- {
- {
- wrapper::vload(in_low),
- wrapper::vload(in_low + dilation_x),
- wrapper::vload(in_low + 2 * dilation_x)
- }
- };
-
- const int32x4x3_t vtop_s32 =
- {
- {
- wrapper::vaddw(v_input_offset, wrapper::vreinterpret(wrapper::vgetlow(wrapper::vmovl(vtop.val[0])))),
- wrapper::vaddw(v_input_offset, wrapper::vreinterpret(wrapper::vgetlow(wrapper::vmovl(vtop.val[1])))),
- wrapper::vaddw(v_input_offset, wrapper::vreinterpret(wrapper::vgetlow(wrapper::vmovl(vtop.val[2])))),
- }
- };
- const int32x4x3_t vmid_s32 =
- {
- {
- wrapper::vaddw(v_input_offset, wrapper::vreinterpret(wrapper::vgetlow(wrapper::vmovl(vmid.val[0])))),
- wrapper::vaddw(v_input_offset, wrapper::vreinterpret(wrapper::vgetlow(wrapper::vmovl(vmid.val[1])))),
- wrapper::vaddw(v_input_offset, wrapper::vreinterpret(wrapper::vgetlow(wrapper::vmovl(vmid.val[2])))),
- }
- };
- const int32x4x3_t vlow_s32 =
- {
- {
- wrapper::vaddw(v_input_offset, wrapper::vreinterpret(wrapper::vgetlow(wrapper::vmovl(vlow.val[0])))),
- wrapper::vaddw(v_input_offset, wrapper::vreinterpret(wrapper::vgetlow(wrapper::vmovl(vlow.val[1])))),
- wrapper::vaddw(v_input_offset, wrapper::vreinterpret(wrapper::vgetlow(wrapper::vmovl(vlow.val[2])))),
- }
- };
-
- int32x4_t out = wrapper::vmul(vtop_s32.val[0], m0.val[0]);
- out = wrapper::vmla(out, vtop_s32.val[1], m0.val[1]);
- out = wrapper::vmla(out, vtop_s32.val[2], m0.val[2]);
-
- out = wrapper::vmla(out, vmid_s32.val[0], m1.val[0]);
- out = wrapper::vmla(out, vmid_s32.val[1], m1.val[1]);
- out = wrapper::vmla(out, vmid_s32.val[2], m1.val[2]);
-
- out = wrapper::vmla(out, vlow_s32.val[0], m2.val[0]);
- out = wrapper::vmla(out, vlow_s32.val[1], m2.val[1]);
- out = wrapper::vmla(out, vlow_s32.val[2], m2.val[2]);
-
- return out;
-}
-
-/** Perform a 3x3 convolution for 4 consecutive 8-bit elements when dilation.x() or dilation.y() is not 1.
- *
- * @param[in] in_top Pointer to the first row of the input.
- * @param[in] in_mid Pointer to the second row of the input.
- * @param[in] in_low Pointer to the third row of the input.
- * @param[in] m0 First row of the filter.
- * @param[in] m1 Second row of the filter.
- * @param[in] m2 Third row of the filter.
- * @param[in] dilation_x Dilation, in elements across x.
- * @param[in] stridex Stride value in elements across x.
- * @param[in] input_offset Input quantization offset.
- *
- */
-template < typename T, REQUIRES_TA(std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value) >
-inline int32x4x2_t convolve_3x3_dilation(const T *in_top, const T *in_mid, const T *in_low, const int32x4x3_t &m0, const int32x4x3_t &m1, const int32x4x3_t &m2,
- const size_t dilation_x, unsigned int stridex, int input_offset)
-{
- ARM_COMPUTE_ERROR_ON(stridex > 3);
- int32x4x2_t out =
- {
- {
- single_convolve_3x3_dilation(in_top, in_mid, in_low, m0, m1, m2, dilation_x, input_offset),
- single_convolve_3x3_dilation(in_top + 4, in_mid + 4, in_low + 4, m0, m1, m2, dilation_x, input_offset)
- }
- };
-
- if(stridex == 2)
- {
- out.val[0] = wrapper::vsetlane(wrapper::vgetlane(out.val[0], 2), out.val[0], 1);
- out.val[0] = wrapper::vsetlane(wrapper::vgetlane(out.val[1], 0), out.val[0], 2);
- out.val[0] = wrapper::vsetlane(wrapper::vgetlane(out.val[1], 2), out.val[0], 3);
- }
- else if(stridex == 3)
- {
- out.val[0] = wrapper::vsetlane(wrapper::vgetlane(out.val[0], 3), out.val[0], 1);
- }
- return out;
-}
-
-/** Perform a convolve3x3 on 8-bit elements
- *
- * @param[in] in_top Pointer to the first row of the input.
- * @param[in] in_mid Pointer to the second row of the input.
- * @param[in] in_low Pointer to the third row of the input.
- * @param[out] out_ptr Pointer to the output.
- * @param[in] m0 First row of the filter.
- * @param[in] m1 Second row of the filter.
- * @param[in] m2 Third row of the filter.
- * @param[in] stridex Stride value in elements across x.
- * @param[in] input_offset Input quantization offset.
- *
- */
-template < bool accumulate, typename T1, typename T2, REQUIRES_TA(std::is_same<T1, uint8_t>::value || std::is_same<T1, int8_t>::value) >
-void convolve_3x3(const T1 *in_top, const T1 *in_mid, const T1 *in_low, T2 *out_ptr,
- const int32x4x3_t &m0, const int32x4x3_t &m1, const int32x4x3_t &m2,
- unsigned int stridex, int32_t input_offset)
-{
- ARM_COMPUTE_ERROR_ON(stridex > 3);
- using VectorType = typename std::conditional<std::is_same<T1, uint8_t>::value, uint8x8x2_t, int8x8x2_t>::type;
- using OutputTagType = typename wrapper::traits::neon_bitvector_tag_t<int32_t, wrapper::traits::BitWidth::W128>;
-
- const int32x4_t v_input_offset = wrapper::vdup_n(input_offset, OutputTagType{});
-
- const VectorType vtop =
- {
- {
- wrapper::vload(in_top),
- wrapper::vload(in_top + 8)
- }
- };
- const VectorType vmid =
- {
- {
- wrapper::vload(in_mid),
- wrapper::vload(in_mid + 8)
- }
- };
- const VectorType vlow =
- {
- {
- wrapper::vload(in_low),
- wrapper::vload(in_low + 8)
- }
- };
-
- const int32x4x3_t vtop_s32 =
- {
- {
- wrapper::vaddw(v_input_offset, wrapper::vreinterpret(wrapper::vgetlow(wrapper::vmovl(vtop.val[0])))),
- wrapper::vaddw(v_input_offset, wrapper::vreinterpret(wrapper::vgethigh(wrapper::vmovl(vtop.val[0])))),
- wrapper::vaddw(v_input_offset, wrapper::vreinterpret(wrapper::vgetlow(wrapper::vmovl(vtop.val[1])))),
- }
- };
- const int32x4x3_t vmid_s32 =
- {
- {
- wrapper::vaddw(v_input_offset, wrapper::vreinterpret(wrapper::vgetlow(wrapper::vmovl(vmid.val[0])))),
- wrapper::vaddw(v_input_offset, wrapper::vreinterpret(wrapper::vgethigh(wrapper::vmovl(vmid.val[0])))),
- wrapper::vaddw(v_input_offset, wrapper::vreinterpret(wrapper::vgetlow(wrapper::vmovl(vmid.val[1])))),
- }
- };
- const int32x4x3_t vlow_s32 =
- {
- {
- wrapper::vaddw(v_input_offset, wrapper::vreinterpret(wrapper::vgetlow(wrapper::vmovl(vlow.val[0])))),
- wrapper::vaddw(v_input_offset, wrapper::vreinterpret(wrapper::vgethigh(wrapper::vmovl(vlow.val[0])))),
- wrapper::vaddw(v_input_offset, wrapper::vreinterpret(wrapper::vgetlow(wrapper::vmovl(vlow.val[1])))),
- }
- };
-
- int32x4x2_t out
- {
- {
- wrapper::vdup_n(static_cast<int32_t>(0), OutputTagType{}),
- wrapper::vdup_n(static_cast<int32_t>(0), OutputTagType{}),
- }
- };
-
- // 0
- out.val[0] = wrapper::vmla(out.val[0], vtop_s32.val[0], m0.val[0]);
- out.val[0] = wrapper::vmla(out.val[0], wrapper::vext_1(vtop_s32.val[0], vtop_s32.val[1]), m0.val[1]);
- out.val[0] = wrapper::vmla(out.val[0], wrapper::vext_2(vtop_s32.val[0], vtop_s32.val[1]), m0.val[2]);
-
- out.val[0] = wrapper::vmla(out.val[0], vmid_s32.val[0], m1.val[0]);
- out.val[0] = wrapper::vmla(out.val[0], wrapper::vext_1(vmid_s32.val[0], vmid_s32.val[1]), m1.val[1]);
- out.val[0] = wrapper::vmla(out.val[0], wrapper::vext_2(vmid_s32.val[0], vmid_s32.val[1]), m1.val[2]);
-
- out.val[0] = wrapper::vmla(out.val[0], vlow_s32.val[0], m2.val[0]);
- out.val[0] = wrapper::vmla(out.val[0], wrapper::vext_1(vlow_s32.val[0], vlow_s32.val[1]), m2.val[1]);
- out.val[0] = wrapper::vmla(out.val[0], wrapper::vext_2(vlow_s32.val[0], vlow_s32.val[1]), m2.val[2]);
-
- // 1
- out.val[1] = wrapper::vmla(out.val[1], vtop_s32.val[1], m0.val[0]);
- out.val[1] = wrapper::vmla(out.val[1], wrapper::vext_1(vtop_s32.val[1], vtop_s32.val[2]), m0.val[1]);
- out.val[1] = wrapper::vmla(out.val[1], wrapper::vext_2(vtop_s32.val[1], vtop_s32.val[2]), m0.val[2]);
-
- out.val[1] = wrapper::vmla(out.val[1], vmid_s32.val[1], m1.val[0]);
- out.val[1] = wrapper::vmla(out.val[1], wrapper::vext_1(vmid_s32.val[1], vmid_s32.val[2]), m1.val[1]);
- out.val[1] = wrapper::vmla(out.val[1], wrapper::vext_2(vmid_s32.val[1], vmid_s32.val[2]), m1.val[2]);
-
- out.val[1] = wrapper::vmla(out.val[1], vlow_s32.val[1], m2.val[0]);
- out.val[1] = wrapper::vmla(out.val[1], wrapper::vext_1(vlow_s32.val[1], vlow_s32.val[2]), m2.val[1]);
- out.val[1] = wrapper::vmla(out.val[1], wrapper::vext_2(vlow_s32.val[1], vlow_s32.val[2]), m2.val[2]);
-
- if(stridex == 1)
- {
- accumulate ? accumulate_results<1>(out_ptr, out) : store_results<1>(out_ptr, out);
- }
- else if(stridex == 2)
- {
- out.val[0] = wrapper::vsetlane(wrapper::vgetlane(out.val[0], 2), out.val[0], 1);
- out.val[0] = wrapper::vsetlane(wrapper::vgetlane(out.val[1], 0), out.val[0], 2);
- out.val[0] = wrapper::vsetlane(wrapper::vgetlane(out.val[1], 2), out.val[0], 3);
-
- accumulate ? accumulate_results<2>(out_ptr, out) : store_results<2>(out_ptr, out);
- }
- else if(stridex == 3)
- {
- out.val[0] = wrapper::vsetlane(wrapper::vgetlane(out.val[0], 3), out.val[0], 1);
- accumulate ? accumulate_results<3>(out_ptr, out) : store_results<3>(out_ptr, out);
- }
-}
-
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-/** Loads a 3x3 matrix as a row (float16_t).
- *
- * @param[in] ptr Pointer to a float 3x3 matrix.
- *
- * @return The loaded matrix.
- */
-inline float16x8x3_t load_matrix_row(const float16_t *ptr, int weights_offset = 0)
-{
- ARM_COMPUTE_UNUSED(weights_offset);
- /* ptr is a pointer to a row in a 3x3 matrix, the function returns 3 vectors holding exactly the same value in all lanes:
- r.val[0] contains the first element, r.val[1] the second element and r.val[2] the third element (in all lanes) */
- const float16x8x3_t r =
- {
- {
- vld1q_dup_f16(ptr),
- vld1q_dup_f16(1 + ptr),
- vld1q_dup_f16(2 + ptr)
- }
- };
- return r;
-}
-
-/** Perform a 3x3 convolution for 8 consecutive elements on float16 when dilation.x() or dilation.y() is not 1.
- *
- * @param[in] in_top Pointer to the first row of the input.
- * @param[in] in_mid Pointer to the second row of the input.
- * @param[in] in_low Pointer to the third row of the input.
- * @param[in] m0 First row of the filter.
- * @param[in] m1 Second row of the filter.
- * @param[in] m2 Third row of the filter.
- * @param[in] dilation_x Dilation, in elements across x.
- * @param[in] input_offset (Optional)Input quantization offset.
- *
- */
-inline float16x8_t single_convolve_3x3_dilation(const float16_t *in_top, const float16_t *in_mid, const float16_t *in_low,
- const float16x8x3_t &m0, const float16x8x3_t &m1, const float16x8x3_t &m2,
- const size_t dilation_x, int input_offset = 0)
-{
- ARM_COMPUTE_UNUSED(input_offset);
- const float16x8x3_t vtop =
- {
- {
- vld1q_f16(in_top),
- vld1q_f16(in_top + dilation_x),
- vld1q_f16(in_top + 2 * dilation_x)
- }
- };
- const float16x8x3_t vmid =
- {
- {
- vld1q_f16(in_mid),
- vld1q_f16(in_mid + dilation_x),
- vld1q_f16(in_mid + 2 * dilation_x)
- }
- };
- const float16x8x3_t vlow =
- {
- {
- vld1q_f16(in_low),
- vld1q_f16(in_low + dilation_x),
- vld1q_f16(in_low + 2 * dilation_x)
- }
- };
- float16x8_t out = vmulq_f16(vtop.val[0], m0.val[0]);
- out = vaddq_f16(out, vmulq_f16(vtop.val[1], m0.val[1]));
- out = vaddq_f16(out, vmulq_f16(vtop.val[2], m0.val[2]));
-
- out = vaddq_f16(out, vmulq_f16(vmid.val[0], m1.val[0]));
- out = vaddq_f16(out, vmulq_f16(vmid.val[1], m1.val[1]));
- out = vaddq_f16(out, vmulq_f16(vmid.val[2], m1.val[2]));
-
- out = vaddq_f16(out, vmulq_f16(vlow.val[0], m2.val[0]));
- out = vaddq_f16(out, vmulq_f16(vlow.val[1], m2.val[1]));
- out = vaddq_f16(out, vmulq_f16(vlow.val[2], m2.val[2]));
-
- return out;
-}
-
-/** Perform a 3x3 convolution for 16 consecutive elements on float16 when dilation.x() or dilation.y() is not 1.
- *
- * @param[in] in_top Pointer to the first row of the input.
- * @param[in] in_mid Pointer to the second row of the input.
- * @param[in] in_low Pointer to the third row of the input.
- * @param[in] m0 First row of the filter.
- * @param[in] m1 Second row of the filter.
- * @param[in] m2 Third row of the filter.
- * @param[in] dilation_x Dilation, in elements across x.
- * @param[in] stridex Stride value in elements across x.
- * @param[in] input_offset (Optional) Input quantization offset.
- *
- */
-inline float16x8x2_t convolve_3x3_dilation(const float16_t *in_top, const float16_t *in_mid, const float16_t *in_low,
- const float16x8x3_t &m0, const float16x8x3_t &m1, const float16x8x3_t &m2,
- const size_t dilation_x, unsigned int stridex, int input_offset = 0)
-{
- float16x8x2_t out =
- {
- {
- single_convolve_3x3_dilation(in_top, in_mid, in_low, m0, m1, m2, dilation_x, input_offset),
- single_convolve_3x3_dilation(in_top + 8, in_mid + 8, in_low + 8, m0, m1, m2, dilation_x, input_offset)
- }
- };
-
- if(stridex == 2)
- {
- out.val[0] = vsetq_lane_f16(vgetq_lane_f16(out.val[0], 2), out.val[0], 1);
- out.val[0] = vsetq_lane_f16(vgetq_lane_f16(out.val[0], 4), out.val[0], 2);
- out.val[0] = vsetq_lane_f16(vgetq_lane_f16(out.val[0], 6), out.val[0], 3);
- out.val[0] = vsetq_lane_f16(vgetq_lane_f16(out.val[1], 0), out.val[0], 4);
- out.val[0] = vsetq_lane_f16(vgetq_lane_f16(out.val[1], 2), out.val[0], 5);
- out.val[0] = vsetq_lane_f16(vgetq_lane_f16(out.val[1], 4), out.val[0], 6);
- out.val[0] = vsetq_lane_f16(vgetq_lane_f16(out.val[1], 6), out.val[0], 7);
- }
- else if(stridex == 3)
- {
- out.val[0] = vsetq_lane_f16(vgetq_lane_f16(out.val[0], 3), out.val[0], 1);
- out.val[0] = vsetq_lane_f16(vgetq_lane_f16(out.val[0], 6), out.val[0], 2);
- out.val[0] = vsetq_lane_f16(vgetq_lane_f16(out.val[1], 1), out.val[0], 3);
- }
-
- return out;
-}
-
-/** Perform a convolve3x3 on float16.
- *
- * @param[in] in_top Pointer to the first row of the input.
- * @param[in] in_mid Pointer to the second row of the input.
- * @param[in] in_low Pointer to the third row of the input.
- * @param[out] out_ptr Pointer to the output.
- * @param[in] m0 First row of the filter.
- * @param[in] m1 Second row of the filter.
- * @param[in] m2 Third row of the filter.
- * @param[in] stridex Stride value in elements across x.
- * @param[in] input_offset (Optional) Input quantization offset.
- *
- */
-template <bool accumulate>
-inline void convolve_3x3(const float16_t *in_top, const float16_t *in_mid, const float16_t *in_low, float16_t *out_ptr,
- const float16x8x3_t &m0, const float16x8x3_t &m1, const float16x8x3_t &m2,
- unsigned int stridex, int input_offset = 0)
-{
- ARM_COMPUTE_UNUSED(input_offset);
-
- float16x8x2_t out =
- {
- {
- vdupq_n_f16(0),
- vdupq_n_f16(0)
- }
- };
- if(stridex == 2)
- {
- const float16x8x2_t vtop = vld2q_f16(in_top);
- const float16x8x2_t vmid = vld2q_f16(in_mid);
- const float16x8x2_t vlow = vld2q_f16(in_low);
- const float16x8_t vtop_end = vld1q_f16(in_top + 16);
- const float16x8_t vmid_end = vld1q_f16(in_mid + 16);
- const float16x8_t vlow_end = vld1q_f16(in_low + 16);
-
- out.val[0] = vmulq_f16(vtop.val[0], m0.val[0]);
-
- out.val[0] = vaddq_f16(out.val[0], vmulq_f16(vtop.val[1], m0.val[1]));
- out.val[0] = vaddq_f16(out.val[0], vmulq_f16(vextq_f16(vtop.val[0], vtop_end, 1), m0.val[2]));
-
- out.val[0] = vaddq_f16(out.val[0], vmulq_f16(vmid.val[0], m1.val[0]));
- out.val[0] = vaddq_f16(out.val[0], vmulq_f16(vmid.val[1], m1.val[1]));
- out.val[0] = vaddq_f16(out.val[0], vmulq_f16(vextq_f16(vmid.val[0], vmid_end, 1), m1.val[2]));
-
- out.val[0] = vaddq_f16(out.val[0], vmulq_f16(vlow.val[0], m2.val[0]));
- out.val[0] = vaddq_f16(out.val[0], vmulq_f16(vlow.val[1], m2.val[1]));
- out.val[0] = vaddq_f16(out.val[0], vmulq_f16(vextq_f16(vlow.val[0], vlow_end, 1), m2.val[2]));
-
- accumulate ? accumulate_results<2>(out_ptr, out) : store_results<2>(out_ptr, out);
- }
- else
- {
- const float16x8x3_t vtop =
- {
- {
- vld1q_f16(in_top),
- vld1q_f16(in_top + 8),
- vld1q_f16(in_top + 16)
- }
- };
- const float16x8x3_t vmid =
- {
- {
- vld1q_f16(in_mid),
- vld1q_f16(in_mid + 8),
- vld1q_f16(in_mid + 16)
- }
- };
- const float16x8x3_t vlow =
- {
- {
- vld1q_f16(in_low),
- vld1q_f16(in_low + 8),
- vld1q_f16(in_low + 16)
- }
- };
- out.val[0] = vmulq_f16(vtop.val[0], m0.val[0]);
- out.val[1] = vmulq_f16(vtop.val[1], m0.val[0]);
-
- out.val[0] = vaddq_f16(out.val[0], vmulq_f16(vextq_f16(vtop.val[0], vtop.val[1], 1), m0.val[1]));
- out.val[0] = vaddq_f16(out.val[0], vmulq_f16(vextq_f16(vtop.val[0], vtop.val[1], 2), m0.val[2]));
- out.val[0] = vaddq_f16(out.val[0], vmulq_f16(vmid.val[0], m1.val[0]));
- out.val[0] = vaddq_f16(out.val[0], vmulq_f16(vextq_f16(vmid.val[0], vmid.val[1], 1), m1.val[1]));
- out.val[0] = vaddq_f16(out.val[0], vmulq_f16(vextq_f16(vmid.val[0], vmid.val[1], 2), m1.val[2]));
- out.val[0] = vaddq_f16(out.val[0], vmulq_f16(vlow.val[0], m2.val[0]));
- out.val[0] = vaddq_f16(out.val[0], vmulq_f16(vextq_f16(vlow.val[0], vlow.val[1], 1), m2.val[1]));
- out.val[0] = vaddq_f16(out.val[0], vmulq_f16(vextq_f16(vlow.val[0], vlow.val[1], 2), m2.val[2]));
- out.val[1] = vaddq_f16(out.val[1], vmulq_f16(vextq_f16(vtop.val[1], vtop.val[2], 1), m0.val[1]));
- out.val[1] = vaddq_f16(out.val[1], vmulq_f16(vextq_f16(vtop.val[1], vtop.val[2], 2), m0.val[2]));
- out.val[1] = vaddq_f16(out.val[1], vmulq_f16(vmid.val[1], m1.val[0]));
- out.val[1] = vaddq_f16(out.val[1], vmulq_f16(vextq_f16(vmid.val[1], vmid.val[2], 1), m1.val[1]));
- out.val[1] = vaddq_f16(out.val[1], vmulq_f16(vextq_f16(vmid.val[1], vmid.val[2], 2), m1.val[2]));
- out.val[1] = vaddq_f16(out.val[1], vmulq_f16(vlow.val[1], m2.val[0]));
- out.val[1] = vaddq_f16(out.val[1], vmulq_f16(vextq_f16(vlow.val[1], vlow.val[2], 1), m2.val[1]));
- out.val[1] = vaddq_f16(out.val[1], vmulq_f16(vextq_f16(vlow.val[1], vlow.val[2], 2), m2.val[2]));
-
- if(stridex == 3)
- {
- out.val[0] = vsetq_lane_f16(vgetq_lane_f16(out.val[0], 3), out.val[0], 1);
- out.val[0] = vsetq_lane_f16(vgetq_lane_f16(out.val[0], 6), out.val[0], 2);
- out.val[0] = vsetq_lane_f16(vgetq_lane_f16(out.val[1], 1), out.val[0], 3);
-
- accumulate ? accumulate_results<3>(out_ptr, out) : store_results<3>(out_ptr, out);
- }
- else
- {
- accumulate ? accumulate_results<1>(out_ptr, out) : store_results<1>(out_ptr, out);
- }
- }
-}
-#endif /** __ARM_FEATURE_FP16_VECTOR_ARITHMETIC **/
-
-/** Get the number of elements processed on 3x3 convolution.
- *
- * @param[in] num_elems_written_per_iteration Number of elements written per iteration on 3x3 convolution.
- * @param[in] stridex Stride value in elements across x.
- *
- * @return The number of elements processed.
- */
-inline int get_input_num_elems_processed(unsigned int num_elems_written_per_iteration, unsigned int stridex)
-{
- switch(stridex)
- {
- case 1:
- return num_elems_written_per_iteration;
- case 2:
- return num_elems_written_per_iteration << 1;
- case 3:
- return num_elems_written_per_iteration * 3;
- default:
- ARM_COMPUTE_ERROR("stridex not supported");
- return 0;
- }
-}
-}
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEDIRECTCONVOLUTIONDETAIL_H */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/abs.h b/arm_compute/core/NEON/wrapper/intrinsics/abs.h
deleted file mode 100644
index aff18166f5..0000000000
--- a/arm_compute/core/NEON/wrapper/intrinsics/abs.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_WRAPPER_ABS_H
-#define ARM_COMPUTE_WRAPPER_ABS_H
-
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace wrapper
-{
-#define VABS_IMPL(stype, vtype, prefix, postfix) \
- inline vtype vabs(const vtype &a) \
- { \
- return prefix##_##postfix(a); \
- }
-
-#define VQABS_IMPL(stype, vtype, prefix, postfix) \
- inline vtype vqabs(const vtype &a) \
- { \
- return prefix##_##postfix(a); \
- }
-
-// Absolute: vabs{q}_<type>. Vd[i] = |Va[i]|
-VABS_IMPL(int8x8_t, int8x8_t, vabs, s8)
-VABS_IMPL(int16x4_t, int16x4_t, vabs, s16)
-VABS_IMPL(int32x2_t, int32x2_t, vabs, s32)
-VABS_IMPL(float32x2_t, float32x2_t, vabs, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VABS_IMPL(float16x4_t, float16x4_t, vabs, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-VABS_IMPL(int8x16_t, int8x16_t, vabsq, s8)
-VABS_IMPL(int16x8_t, int16x8_t, vabsq, s16)
-VABS_IMPL(int32x4_t, int32x4_t, vabsq, s32)
-VABS_IMPL(float32x4_t, float32x4_t, vabsq, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VABS_IMPL(float16x8_t, float16x8_t, vabsq, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-// Saturating absolute: vqabs{q}_<type>. Vd[i] = sat(|Va[i]|)
-VQABS_IMPL(int8x8_t, int8x8_t, vqabs, s8)
-VQABS_IMPL(int16x4_t, int16x4_t, vqabs, s16)
-VQABS_IMPL(int32x2_t, int32x2_t, vqabs, s32)
-
-VQABS_IMPL(int8x16_t, int8x16_t, vqabsq, s8)
-VQABS_IMPL(int16x8_t, int16x8_t, vqabsq, s16)
-VQABS_IMPL(int32x4_t, int32x4_t, vqabsq, s32)
-
-#undef VABS_IMPL
-#undef VQABS_IMPL
-} // namespace wrapper
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_WRAPPER_ABS_H */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/add.h b/arm_compute/core/NEON/wrapper/intrinsics/add.h
deleted file mode 100644
index 776e136a56..0000000000
--- a/arm_compute/core/NEON/wrapper/intrinsics/add.h
+++ /dev/null
@@ -1,201 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_WRAPPER_ADD_H
-#define ARM_COMPUTE_WRAPPER_ADD_H
-
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace wrapper
-{
-#define VADD_IMPL(stype, vtype, prefix, postfix) \
- inline vtype vadd(const vtype &a, const vtype &b) \
- { \
- return prefix##_##postfix(a, b); \
- }
-
-VADD_IMPL(uint8x8_t, uint8x8_t, vadd, u8)
-VADD_IMPL(int8x8_t, int8x8_t, vadd, s8)
-VADD_IMPL(uint16x4_t, uint16x4_t, vadd, u16)
-VADD_IMPL(int16x4_t, int16x4_t, vadd, s16)
-VADD_IMPL(uint32x2_t, uint32x2_t, vadd, u32)
-VADD_IMPL(int32x2_t, int32x2_t, vadd, s32)
-VADD_IMPL(uint64x1_t, uint64x1_t, vadd, u64)
-VADD_IMPL(int64x1_t, int64x1_t, vadd, s64)
-VADD_IMPL(float32x2_t, float32x2_t, vadd, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VADD_IMPL(float16x4_t, float16x4_t, vadd, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-VADD_IMPL(uint8x16_t, uint8x16_t, vaddq, u8)
-VADD_IMPL(int8x16_t, int8x16_t, vaddq, s8)
-VADD_IMPL(uint16x8_t, uint16x8_t, vaddq, u16)
-VADD_IMPL(int16x8_t, int16x8_t, vaddq, s16)
-VADD_IMPL(uint32x4_t, uint32x4_t, vaddq, u32)
-VADD_IMPL(int32x4_t, int32x4_t, vaddq, s32)
-VADD_IMPL(uint64x2_t, uint64x2_t, vaddq, u64)
-VADD_IMPL(int64x2_t, int64x2_t, vaddq, s64)
-VADD_IMPL(float32x4_t, float32x4_t, vaddq, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VADD_IMPL(float16x8_t, float16x8_t, vaddq, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-#undef VADD_IMPL
-
-// VQADD: Vector saturating add (No notion of saturation for floating point)
-#define VQADD_IMPL(stype, vtype, prefix, postfix) \
- inline vtype vqadd(const vtype &a, const vtype &b) \
- { \
- return prefix##_##postfix(a, b); \
- }
-
-VQADD_IMPL(uint8x8_t, uint8x8_t, vqadd, u8)
-VQADD_IMPL(int8x8_t, int8x8_t, vqadd, s8)
-VQADD_IMPL(uint16x4_t, uint16x4_t, vqadd, u16)
-VQADD_IMPL(int16x4_t, int16x4_t, vqadd, s16)
-VQADD_IMPL(uint32x2_t, uint32x2_t, vqadd, u32)
-VQADD_IMPL(int32x2_t, int32x2_t, vqadd, s32)
-VQADD_IMPL(uint64x1_t, uint64x1_t, vqadd, u64)
-VQADD_IMPL(int64x1_t, int64x1_t, vqadd, s64)
-VQADD_IMPL(float32x2_t, float32x2_t, vadd, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VQADD_IMPL(float16x4_t, float16x4_t, vadd, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-VQADD_IMPL(uint8x16_t, uint8x16_t, vqaddq, u8)
-VQADD_IMPL(int8x16_t, int8x16_t, vqaddq, s8)
-VQADD_IMPL(uint16x8_t, uint16x8_t, vqaddq, u16)
-VQADD_IMPL(int16x8_t, int16x8_t, vqaddq, s16)
-VQADD_IMPL(uint32x4_t, uint32x4_t, vqaddq, u32)
-VQADD_IMPL(int32x4_t, int32x4_t, vqaddq, s32)
-VQADD_IMPL(uint64x2_t, uint64x2_t, vqaddq, u64)
-VQADD_IMPL(int64x2_t, int64x2_t, vqaddq, s64)
-VQADD_IMPL(float32x4_t, float32x4_t, vaddq, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VQADD_IMPL(float16x8_t, float16x8_t, vaddq, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-#undef VQADD_IMPL
-
-// VADDW: Vector widening add
-#define VADDW_IMPL(wtype, vtype, prefix, postfix) \
- inline wtype vaddw(const wtype &a, const vtype &b) \
- { \
- return prefix##_##postfix(a, b); \
- }
-
-VADDW_IMPL(uint16x8_t, uint8x8_t, vaddw, u8)
-VADDW_IMPL(int16x8_t, int8x8_t, vaddw, s8)
-VADDW_IMPL(uint32x4_t, uint16x4_t, vaddw, u16)
-VADDW_IMPL(int32x4_t, int16x4_t, vaddw, s16)
-VADDW_IMPL(uint64x2_t, uint32x2_t, vaddw, u32)
-VADDW_IMPL(int64x2_t, int32x2_t, vaddw, s32)
-#undef VADDW_IMPL
-
-// VADDL: Vector long add
-#define VADDL_IMPL(wtype, vtype, prefix, postfix) \
- inline wtype vaddl(const vtype &a, const vtype &b) \
- { \
- return prefix##_##postfix(a, b); \
- }
-
-VADDL_IMPL(uint16x8_t, uint8x8_t, vaddl, u8)
-VADDL_IMPL(int16x8_t, int8x8_t, vaddl, s8)
-VADDL_IMPL(uint32x4_t, uint16x4_t, vaddl, u16)
-VADDL_IMPL(int32x4_t, int16x4_t, vaddl, s16)
-VADDL_IMPL(uint64x2_t, uint32x2_t, vaddl, u32)
-VADDL_IMPL(int64x2_t, int32x2_t, vaddl, s32)
-#undef VADDL_IMPL
-
-#if defined(__aarch64__)
-// VADDV: Across vector add
-#define VADDV_IMPL(stype, vtype, prefix, postfix) \
- inline stype vaddv(const vtype &a) \
- { \
- return prefix##_##postfix(a); \
- }
-
-VADDV_IMPL(uint8_t, uint8x8_t, vaddv, u8)
-VADDV_IMPL(int8_t, int8x8_t, vaddv, s8)
-VADDV_IMPL(uint16_t, uint16x4_t, vaddv, u16)
-VADDV_IMPL(int16_t, int16x4_t, vaddv, s16)
-VADDV_IMPL(uint32_t, uint32x2_t, vaddv, u32)
-VADDV_IMPL(int32_t, int32x2_t, vaddv, s32)
-VADDV_IMPL(float, float32x2_t, vaddv, f32)
-
-VADDV_IMPL(uint8_t, uint8x16_t, vaddvq, u8)
-VADDV_IMPL(int8_t, int8x16_t, vaddvq, s8)
-VADDV_IMPL(uint16_t, uint16x8_t, vaddvq, u16)
-VADDV_IMPL(int16_t, int16x8_t, vaddvq, s16)
-VADDV_IMPL(uint32_t, uint32x4_t, vaddvq, u32)
-VADDV_IMPL(int32_t, int32x4_t, vaddvq, s32)
-VADDV_IMPL(uint64_t, uint64x2_t, vaddvq, u64)
-VADDV_IMPL(int64_t, int64x2_t, vaddvq, s64)
-VADDV_IMPL(float, float32x4_t, vaddvq, f32)
-#undef VADDV_IMPL
-#endif // defined(__aarch64__)
-
-// VPADDL: Signed add long pairwise
-#define VPADDL_IMPL(ltype, vtype, prefix, postfix) \
- inline ltype vpaddl(const vtype &a) \
- { \
- return prefix##_##postfix(a); \
- }
-
-VPADDL_IMPL(uint16x4_t, uint8x8_t, vpaddl, u8)
-VPADDL_IMPL(int16x4_t, int8x8_t, vpaddl, s8)
-VPADDL_IMPL(uint32x2_t, uint16x4_t, vpaddl, u16)
-VPADDL_IMPL(int32x2_t, int16x4_t, vpaddl, s16)
-VPADDL_IMPL(uint64x1_t, uint32x2_t, vpaddl, u32)
-VPADDL_IMPL(int64x1_t, int32x2_t, vpaddl, s32)
-
-VPADDL_IMPL(uint16x8_t, uint8x16_t, vpaddlq, u8)
-VPADDL_IMPL(int16x8_t, int8x16_t, vpaddlq, s8)
-VPADDL_IMPL(uint32x4_t, uint16x8_t, vpaddlq, u16)
-VPADDL_IMPL(int32x4_t, int16x8_t, vpaddlq, s16)
-VPADDL_IMPL(uint64x2_t, uint32x4_t, vpaddlq, u32)
-VPADDL_IMPL(int64x2_t, int32x4_t, vpaddlq, s32)
-#undef VPADDL_IMPL
-
-// VPADD: Add pairwise
-#define VPADD_IMPL(stype, vtype, prefix, postfix) \
- inline vtype vpadd(const vtype &a, const vtype &b) \
- { \
- return prefix##_##postfix(a, b); \
- }
-
-VPADD_IMPL(uint8x8_t, uint8x8_t, vpadd, u8)
-VPADD_IMPL(int8x8_t, int8x8_t, vpadd, s8)
-VPADD_IMPL(uint16x4_t, uint16x4_t, vpadd, u16)
-VPADD_IMPL(int16x4_t, int16x4_t, vpadd, s16)
-VPADD_IMPL(uint32x2_t, uint32x2_t, vpadd, u32)
-VPADD_IMPL(int32x2_t, int32x2_t, vpadd, s32)
-VPADD_IMPL(float32x2_t, float32x2_t, vpadd, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VPADD_IMPL(float16x4_t, float16x4_t, vpadd, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-#undef VPADD_IMPL
-} // namespace wrapper
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_WRAPPER_ADD_H */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/and.h b/arm_compute/core/NEON/wrapper/intrinsics/and.h
deleted file mode 100644
index 1973c5593d..0000000000
--- a/arm_compute/core/NEON/wrapper/intrinsics/and.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_WRAPPER_AND_H
-#define ARM_COMPUTE_WRAPPER_AND_H
-
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace wrapper
-{
-#define VAND_IMPL(stype, vtype, prefix, postfix) \
- inline vtype vand(const vtype &a, const vtype &b) \
- { \
- return prefix##_##postfix(a, b); \
- }
-
-VAND_IMPL(uint8_t, uint8x8_t, vand, u8)
-VAND_IMPL(int8_t, int8x8_t, vand, s8)
-VAND_IMPL(uint16_t, uint16x4_t, vand, u16)
-VAND_IMPL(int16_t, int16x4_t, vand, s16)
-VAND_IMPL(uint32_t, uint32x2_t, vand, u32)
-VAND_IMPL(int32_t, int32x2_t, vand, s32)
-VAND_IMPL(uint64_t, uint64x1_t, vand, u64)
-VAND_IMPL(int64_t, int64x1_t, vand, s64)
-
-VAND_IMPL(uint8_t, uint8x16_t, vandq, u8)
-VAND_IMPL(int8_t, int8x16_t, vandq, s8)
-VAND_IMPL(uint16_t, uint16x8_t, vandq, u16)
-VAND_IMPL(int16_t, int16x8_t, vandq, s16)
-VAND_IMPL(uint32_t, uint32x4_t, vandq, u32)
-VAND_IMPL(int32_t, int32x4_t, vandq, s32)
-VAND_IMPL(uint64_t, uint64x2_t, vandq, u64)
-VAND_IMPL(int64_t, int64x2_t, vandq, s64)
-
-#undef VAND_IMPL
-} // namespace wrapper
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_WRAPPER_AND_H */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/bsl.h b/arm_compute/core/NEON/wrapper/intrinsics/bsl.h
deleted file mode 100644
index 3c26a9c786..0000000000
--- a/arm_compute/core/NEON/wrapper/intrinsics/bsl.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_WRAPPER_BSL_H
-#define ARM_COMPUTE_WRAPPER_BSL_H
-
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace wrapper
-{
-#define VBSL_IMPL(stype, vtype, ctype, prefix, postfix) \
- inline vtype vbsl(const ctype &a, const vtype &b, const vtype &c) \
- { \
- return prefix##_##postfix(a, b, c); \
- }
-
-VBSL_IMPL(uint8_t, uint8x8_t, uint8x8_t, vbsl, u8)
-VBSL_IMPL(int8_t, int8x8_t, uint8x8_t, vbsl, s8)
-VBSL_IMPL(uint16_t, uint16x4_t, uint16x4_t, vbsl, u16)
-VBSL_IMPL(int16_t, int16x4_t, uint16x4_t, vbsl, s16)
-VBSL_IMPL(uint32_t, uint32x2_t, uint32x2_t, vbsl, u32)
-VBSL_IMPL(int32_t, int32x2_t, uint32x2_t, vbsl, s32)
-VBSL_IMPL(float32x2_t, float32x2_t, uint32x2_t, vbsl, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VBSL_IMPL(float16x4_t, float16x4_t, uint16x4_t, vbsl, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-VBSL_IMPL(uint8_t, uint8x16_t, uint8x16_t, vbslq, u8)
-VBSL_IMPL(int8_t, int8x16_t, uint8x16_t, vbslq, s8)
-VBSL_IMPL(uint16_t, uint16x8_t, uint16x8_t, vbslq, u16)
-VBSL_IMPL(int16_t, int16x8_t, uint16x8_t, vbslq, s16)
-VBSL_IMPL(uint32_t, uint32x4_t, uint32x4_t, vbslq, u32)
-VBSL_IMPL(int32_t, int32x4_t, uint32x4_t, vbslq, s32)
-VBSL_IMPL(float32x4_t, float32x4_t, uint32x4_t, vbslq, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VBSL_IMPL(float16x8_t, float16x8_t, uint16x8_t, vbslq, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-#undef VBSL_IMPL
-} // namespace wrapper
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_WRAPPER_BSL_H */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/ceq.h b/arm_compute/core/NEON/wrapper/intrinsics/ceq.h
deleted file mode 100644
index f8a8f91f73..0000000000
--- a/arm_compute/core/NEON/wrapper/intrinsics/ceq.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_WRAPPER_CEQ_H
-#define ARM_COMPUTE_WRAPPER_CEQ_H
-
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace wrapper
-{
-#define VCEQ_IMPL(votype, vtype, prefix, postfix) \
- inline votype vceq(const vtype &a, const vtype &b) \
- { \
- return prefix##_##postfix(a, b); \
- }
-
-VCEQ_IMPL(uint8x8_t, uint8x8_t, vceq, u8)
-VCEQ_IMPL(uint8x8_t, int8x8_t, vceq, s8)
-VCEQ_IMPL(uint16x4_t, uint16x4_t, vceq, u16)
-VCEQ_IMPL(uint16x4_t, int16x4_t, vceq, s16)
-VCEQ_IMPL(uint32x2_t, uint32x2_t, vceq, u32)
-VCEQ_IMPL(uint32x2_t, int32x2_t, vceq, s32)
-VCEQ_IMPL(uint32x2_t, float32x2_t, vceq, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VCEQ_IMPL(uint16x4_t, float16x4_t, vceq, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-VCEQ_IMPL(uint8x16_t, uint8x16_t, vceqq, u8)
-VCEQ_IMPL(uint8x16_t, int8x16_t, vceqq, s8)
-VCEQ_IMPL(uint16x8_t, uint16x8_t, vceqq, u16)
-VCEQ_IMPL(uint16x8_t, int16x8_t, vceqq, s16)
-VCEQ_IMPL(uint32x4_t, uint32x4_t, vceqq, u32)
-VCEQ_IMPL(uint32x4_t, int32x4_t, vceqq, s32)
-VCEQ_IMPL(uint32x4_t, float32x4_t, vceqq, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VCEQ_IMPL(uint16x8_t, float16x8_t, vceqq, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-#undef VCEQ_IMPL
-} // namespace wrapper
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_WRAPPER_CEQ_H */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/cge.h b/arm_compute/core/NEON/wrapper/intrinsics/cge.h
deleted file mode 100644
index bf231b8b46..0000000000
--- a/arm_compute/core/NEON/wrapper/intrinsics/cge.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_WRAPPER_CGE_H
-#define ARM_COMPUTE_WRAPPER_CGE_H
-
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace wrapper
-{
-#define VCGE_IMPL(stype, vtype, rtype, prefix, postfix) \
- inline rtype vcge(const vtype &a, const vtype &b) \
- { \
- return prefix##_##postfix(a, b); \
- }
-
-VCGE_IMPL(uint8_t, uint8x8_t, uint8x8_t, vcge, u8)
-VCGE_IMPL(int8_t, int8x8_t, uint8x8_t, vcge, s8)
-VCGE_IMPL(uint16_t, uint16x4_t, uint16x4_t, vcge, u16)
-VCGE_IMPL(int16_t, int16x4_t, uint16x4_t, vcge, s16)
-VCGE_IMPL(uint32_t, uint32x2_t, uint32x2_t, vcge, u32)
-VCGE_IMPL(int32_t, int32x2_t, uint32x2_t, vcge, s32)
-VCGE_IMPL(float32x2_t, float32x2_t, uint32x2_t, vcge, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VCGE_IMPL(float16x4_t, float16x4_t, uint16x4_t, vcge, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-VCGE_IMPL(uint8_t, uint8x16_t, uint8x16_t, vcgeq, u8)
-VCGE_IMPL(int8_t, int8x16_t, uint8x16_t, vcgeq, s8)
-VCGE_IMPL(uint16_t, uint16x8_t, uint16x8_t, vcgeq, u16)
-VCGE_IMPL(int16_t, int16x8_t, uint16x8_t, vcgeq, s16)
-VCGE_IMPL(uint32_t, uint32x4_t, uint32x4_t, vcgeq, u32)
-VCGE_IMPL(int32_t, int32x4_t, uint32x4_t, vcgeq, s32)
-VCGE_IMPL(float32x4_t, float32x4_t, uint32x4_t, vcgeq, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VCGE_IMPL(float16x8_t, float16x8_t, uint16x8_t, vcgeq, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-#undef VCGE_IMPL
-} // namespace wrapper
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_WRAPPER_CGE_H */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/cgt.h b/arm_compute/core/NEON/wrapper/intrinsics/cgt.h
deleted file mode 100644
index 5202a5b21d..0000000000
--- a/arm_compute/core/NEON/wrapper/intrinsics/cgt.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_WRAPPER_CGT_H
-#define ARM_COMPUTE_WRAPPER_CGT_H
-
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace wrapper
-{
-#define VCGT_IMPL(rtype, vtype, prefix, postfix) \
- inline rtype vcgt(const vtype &a, const vtype &b) \
- { \
- return prefix##_##postfix(a, b); \
- }
-
-VCGT_IMPL(uint8x8_t, uint8x8_t, vcgt, u8)
-VCGT_IMPL(uint8x8_t, int8x8_t, vcgt, s8)
-VCGT_IMPL(uint16x4_t, uint16x4_t, vcgt, u16)
-VCGT_IMPL(uint16x4_t, int16x4_t, vcgt, s16)
-VCGT_IMPL(uint32x2_t, uint32x2_t, vcgt, u32)
-VCGT_IMPL(uint32x2_t, int32x2_t, vcgt, s32)
-VCGT_IMPL(uint32x2_t, float32x2_t, vcgt, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VCGT_IMPL(uint16x4_t, float16x4_t, vcgt, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-VCGT_IMPL(uint8x16_t, uint8x16_t, vcgtq, u8)
-VCGT_IMPL(uint8x16_t, int8x16_t, vcgtq, s8)
-VCGT_IMPL(uint16x8_t, uint16x8_t, vcgtq, u16)
-VCGT_IMPL(uint16x8_t, int16x8_t, vcgtq, s16)
-VCGT_IMPL(uint32x4_t, uint32x4_t, vcgtq, u32)
-VCGT_IMPL(uint32x4_t, int32x4_t, vcgtq, s32)
-VCGT_IMPL(uint32x4_t, float32x4_t, vcgtq, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VCGT_IMPL(uint16x8_t, float16x8_t, vcgtq, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-#undef VCGT_IMPL
-} // namespace wrapper
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_WRAPPER_CGT_H */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/clt.h b/arm_compute/core/NEON/wrapper/intrinsics/clt.h
deleted file mode 100644
index 4701ab7026..0000000000
--- a/arm_compute/core/NEON/wrapper/intrinsics/clt.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_WRAPPER_CLT_H
-#define ARM_COMPUTE_WRAPPER_CLT_H
-
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace wrapper
-{
-#define VCLT_IMPL(votype, vtype, prefix, postfix) \
- inline votype vclt(const vtype &a, const vtype &b) \
- { \
- return prefix##_##postfix(a, b); \
- }
-
-VCLT_IMPL(uint8x8_t, uint8x8_t, vclt, u8)
-VCLT_IMPL(uint8x8_t, int8x8_t, vclt, s8)
-VCLT_IMPL(uint16x4_t, uint16x4_t, vclt, u16)
-VCLT_IMPL(uint16x4_t, int16x4_t, vclt, s16)
-VCLT_IMPL(uint32x2_t, uint32x2_t, vclt, u32)
-VCLT_IMPL(uint32x2_t, int32x2_t, vclt, s32)
-VCLT_IMPL(uint32x2_t, float32x2_t, vclt, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VCLT_IMPL(uint16x4_t, float16x4_t, vclt, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-VCLT_IMPL(uint8x16_t, uint8x16_t, vcltq, u8)
-VCLT_IMPL(uint8x16_t, int8x16_t, vcltq, s8)
-VCLT_IMPL(uint16x8_t, uint16x8_t, vcltq, u16)
-VCLT_IMPL(uint16x8_t, int16x8_t, vcltq, s16)
-VCLT_IMPL(uint32x4_t, uint32x4_t, vcltq, u32)
-VCLT_IMPL(uint32x4_t, int32x4_t, vcltq, s32)
-VCLT_IMPL(uint32x4_t, float32x4_t, vcltq, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VCLT_IMPL(uint16x8_t, float16x8_t, vcltq, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-#undef VCLT_IMPL
-} // namespace wrapper
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_WRAPPER_CLT_H */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/combine.h b/arm_compute/core/NEON/wrapper/intrinsics/combine.h
deleted file mode 100644
index 9099e28fc4..0000000000
--- a/arm_compute/core/NEON/wrapper/intrinsics/combine.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_WRAPPER_COMBINE_H
-#define ARM_COMPUTE_WRAPPER_COMBINE_H
-
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace wrapper
-{
-#define VCOMBINE_IMPL(rtype, vtype, prefix, postfix) \
- inline rtype vcombine(const vtype &a, const vtype &b) \
- { \
- return prefix##_##postfix(a, b); \
- }
-
-VCOMBINE_IMPL(uint8x16_t, uint8x8_t, vcombine, u8)
-VCOMBINE_IMPL(int8x16_t, int8x8_t, vcombine, s8)
-VCOMBINE_IMPL(uint16x8_t, uint16x4_t, vcombine, u16)
-VCOMBINE_IMPL(int16x8_t, int16x4_t, vcombine, s16)
-VCOMBINE_IMPL(uint32x4_t, uint32x2_t, vcombine, u32)
-VCOMBINE_IMPL(int32x4_t, int32x2_t, vcombine, s32)
-VCOMBINE_IMPL(float32x4_t, float32x2_t, vcombine, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VCOMBINE_IMPL(float16x8_t, float16x4_t, vcombine, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-#undef VCOMBINE_IMPL
-} // namespace wrapper
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_WRAPPER_COMBINE_H */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/cvt.h b/arm_compute/core/NEON/wrapper/intrinsics/cvt.h
deleted file mode 100644
index 5ea9a5dedd..0000000000
--- a/arm_compute/core/NEON/wrapper/intrinsics/cvt.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Copyright (c) 2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_WRAPPER_CVT_H
-#define ARM_COMPUTE_WRAPPER_CVT_H
-
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace wrapper
-{
-#define VCVT_TO_F32_IMPL(ptype, vtype, prefix, postfix1, postfix2) \
- template <typename T> \
- inline typename std::enable_if<std::is_same<T, float>::value, float32x4_t>::type \
- vcvt(const vtype &a) \
- { \
- return prefix##_##postfix1##_##postfix2(a); \
- }
-
-VCVT_TO_F32_IMPL(float32x4_t, uint32x4_t, vcvtq, f32, u32)
-VCVT_TO_F32_IMPL(float32x4_t, int32x4_t, vcvtq, f32, s32)
-#undef VCVT_TO_F32_IMPL
-
-template <typename T>
-inline typename std::enable_if<std::is_same<T, uint8_t>::value, uint32x4_t>::type
-vcvt(const float32x4_t &a)
-{
- return vcvtq_u32_f32(a);
-}
-
-template <typename T>
-inline typename std::enable_if<std::is_same<T, int8_t>::value, int32x4_t>::type
-vcvt(const float32x4_t &a)
-{
- return vcvtq_s32_f32(a);
-}
-
-#if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16)
-/** Convert 2x128-bit floating point vectors into 1x128-bit bfloat16 vector
- *
- * @param[in] inptr Pointer to the input memory to load values from
- * @param[in,out] outptr Pointer to the output memory to store values to
- */
-inline void vcvt_bf16_f32(const float *inptr, uint16_t *outptr)
-{
- __asm __volatile(
- "ldp q0, q1, [%[inptr]]\n"
- ".inst 0xea16800\n" // BFCVTN v0, v0
- ".inst 0x4ea16820\n" // BFCVTN2 v0, v1
- "str q0, [%[outptr]]\n"
- : [inptr] "+r"(inptr)
- : [outptr] "r"(outptr)
- : "v0", "v1", "memory");
-}
-#endif /* defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16) */
-
-} // namespace wrapper
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_WRAPPER_CVT_H */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/div.h b/arm_compute/core/NEON/wrapper/intrinsics/div.h
deleted file mode 100644
index d49a9113b0..0000000000
--- a/arm_compute/core/NEON/wrapper/intrinsics/div.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_WRAPPER_DIV_H
-#define ARM_COMPUTE_WRAPPER_DIV_H
-
-#include "arm_compute/core/NEON/NEMath.h"
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace wrapper
-{
-#ifdef __aarch64__
-
-#define VDIV_IMPL(stype, vtype, prefix, postfix) \
- inline vtype vdiv(const vtype &a, const vtype &b) \
- { \
- return prefix##_##postfix(a, b); \
- }
-VDIV_IMPL(float32x2_t, float32x2_t, vdiv, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VDIV_IMPL(float16x4_t, float16x4_t, vdiv, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-VDIV_IMPL(float32x4_t, float32x4_t, vdivq, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VDIV_IMPL(float16x8_t, float16x8_t, vdivq, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-#else // __aarch64__
-
-#define VDIV_IMPL(stype, vtype, mul_prefix, inv_prefix, postfix) \
- inline vtype vdiv(const vtype &a, const vtype &b) \
- { \
- return mul_prefix##_##postfix(a, inv_prefix##_##postfix(b)); \
- }
-VDIV_IMPL(float32x2_t, float32x2_t, vmul, vinv, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VDIV_IMPL(float16x4_t, float16x4_t, vmul, vinv, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-VDIV_IMPL(float32x4_t, float32x4_t, vmulq, vinvq, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VDIV_IMPL(float16x8_t, float16x8_t, vmulq, vinvq, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-#endif // __aarch64__
-
-#undef VDIV_IMPL
-} // namespace wrapper
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_WRAPPER_DIV_H */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/dup_n.h b/arm_compute/core/NEON/wrapper/intrinsics/dup_n.h
deleted file mode 100644
index ffbfde72c5..0000000000
--- a/arm_compute/core/NEON/wrapper/intrinsics/dup_n.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_WRAPPER_DUP_N_H
-#define ARM_COMPUTE_WRAPPER_DUP_N_H
-
-#include "arm_compute/core/NEON/wrapper/traits.h"
-
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace wrapper
-{
-#define VDUP_N_IMPL(stype, vtype, prefix, postfix, tag) \
- inline vtype vdup_n(stype value, tag) \
- { \
- return prefix##_##postfix(value); \
- }
-
-VDUP_N_IMPL(uint8_t, uint8x8_t, vdup_n, u8, traits::vector_64_tag)
-VDUP_N_IMPL(int8_t, int8x8_t, vdup_n, s8, traits::vector_64_tag)
-VDUP_N_IMPL(uint16_t, uint16x4_t, vdup_n, u16, traits::vector_64_tag)
-VDUP_N_IMPL(int16_t, int16x4_t, vdup_n, s16, traits::vector_64_tag)
-VDUP_N_IMPL(uint32_t, uint32x2_t, vdup_n, u32, traits::vector_64_tag)
-VDUP_N_IMPL(int32_t, int32x2_t, vdup_n, s32, traits::vector_64_tag)
-VDUP_N_IMPL(float, float32x2_t, vdup_n, f32, traits::vector_64_tag)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VDUP_N_IMPL(float16_t, float16x4_t, vdup_n, f16, traits::vector_64_tag)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-VDUP_N_IMPL(uint8_t, uint8x16_t, vdupq_n, u8, traits::vector_128_tag)
-VDUP_N_IMPL(int8_t, int8x16_t, vdupq_n, s8, traits::vector_128_tag)
-VDUP_N_IMPL(uint16_t, uint16x8_t, vdupq_n, u16, traits::vector_128_tag)
-VDUP_N_IMPL(int16_t, int16x8_t, vdupq_n, s16, traits::vector_128_tag)
-VDUP_N_IMPL(uint32_t, uint32x4_t, vdupq_n, u32, traits::vector_128_tag)
-VDUP_N_IMPL(int32_t, int32x4_t, vdupq_n, s32, traits::vector_128_tag)
-VDUP_N_IMPL(float, float32x4_t, vdupq_n, f32, traits::vector_128_tag)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VDUP_N_IMPL(float16_t, float16x8_t, vdupq_n, f16, traits::vector_128_tag)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-#undef VDUP_N_IMPL
-} // namespace wrapper
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_WRAPPER_DUP_N_H */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/eor.h b/arm_compute/core/NEON/wrapper/intrinsics/eor.h
deleted file mode 100644
index a0e7b681ab..0000000000
--- a/arm_compute/core/NEON/wrapper/intrinsics/eor.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_WRAPPER_EOR_H
-#define ARM_COMPUTE_WRAPPER_EOR_H
-
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace wrapper
-{
-#define VEOR_IMPL(vtype, prefix, postfix) \
- inline vtype veor(const vtype &a, const vtype &b) \
- { \
- return prefix##_##postfix(a, b); \
- }
-
-VEOR_IMPL(uint8x8_t, veor, u8)
-VEOR_IMPL(int8x8_t, veor, s8)
-VEOR_IMPL(uint16x4_t, veor, u16)
-VEOR_IMPL(int16x4_t, veor, s16)
-VEOR_IMPL(uint32x2_t, veor, u32)
-VEOR_IMPL(int32x2_t, veor, s32)
-
-VEOR_IMPL(uint8x16_t, veorq, u8)
-VEOR_IMPL(int8x16_t, veorq, s8)
-VEOR_IMPL(uint16x8_t, veorq, u16)
-VEOR_IMPL(int16x8_t, veorq, s16)
-VEOR_IMPL(uint32x4_t, veorq, u32)
-VEOR_IMPL(int32x4_t, veorq, s32)
-
-#undef VEOR_IMPL
-} // namespace wrapper
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_WRAPPER_EOR_H */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/exp.h b/arm_compute/core/NEON/wrapper/intrinsics/exp.h
deleted file mode 100644
index 4b17ebd93f..0000000000
--- a/arm_compute/core/NEON/wrapper/intrinsics/exp.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_WRAPPER_EXP_H
-#define ARM_COMPUTE_WRAPPER_EXP_H
-
-#include "arm_compute/core/NEON/NEMath.h"
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace wrapper
-{
-#define VEXPQ_IMPL(vtype, postfix) \
- inline vtype vexpq(const vtype &a) \
- { \
- return vexpq_##postfix(a); \
- }
-
-#define VEXPQ_IMPL_INT(vtype, postfix) \
- inline vtype vexpq(const vtype &a) \
- { \
- ARM_COMPUTE_UNUSED(a); \
- ARM_COMPUTE_ERROR("Not supported"); \
- }
-
-VEXPQ_IMPL(float32x4_t, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VEXPQ_IMPL(float16x8_t, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VEXPQ_IMPL_INT(int32x4_t, s32)
-#undef VEXPQ_IMPL
-
-} // namespace wrapper
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_WRAPPER_EXP_H */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/ext.h b/arm_compute/core/NEON/wrapper/intrinsics/ext.h
deleted file mode 100644
index f2c3dcc901..0000000000
--- a/arm_compute/core/NEON/wrapper/intrinsics/ext.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_WRAPPER_EXT_H
-#define ARM_COMPUTE_WRAPPER_EXT_H
-
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace wrapper
-{
-#define VEXT_IMPL(vtype, prefix, postfix, size) \
- inline vtype vext_##size(vtype value_a, vtype value_b) \
- { \
- return prefix##_##postfix(value_a, value_b, size); \
- }
-
-VEXT_IMPL(uint8x8_t, vext, u8, 1)
-VEXT_IMPL(uint8x8_t, vext, u8, 2)
-VEXT_IMPL(int8x8_t, vext, s8, 1)
-VEXT_IMPL(int8x8_t, vext, s8, 2)
-VEXT_IMPL(uint16x4_t, vext, u16, 1)
-VEXT_IMPL(uint16x4_t, vext, u16, 2)
-VEXT_IMPL(int16x4_t, vext, s16, 1)
-VEXT_IMPL(int16x4_t, vext, s16, 2)
-
-VEXT_IMPL(uint8x16_t, vextq, u8, 1)
-VEXT_IMPL(uint8x16_t, vextq, u8, 2)
-VEXT_IMPL(int8x16_t, vextq, s8, 1)
-VEXT_IMPL(int8x16_t, vextq, s8, 2)
-VEXT_IMPL(uint16x8_t, vextq, u16, 1)
-VEXT_IMPL(uint16x8_t, vextq, u16, 2)
-VEXT_IMPL(int16x8_t, vextq, s16, 1)
-VEXT_IMPL(int16x8_t, vextq, s16, 2)
-VEXT_IMPL(int32x4_t, vextq, s32, 1)
-VEXT_IMPL(int32x4_t, vextq, s32, 2)
-
-#undef VEXT_IMPL
-} // namespace wrapper
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_WRAPPER_EXT_H */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/gethigh.h b/arm_compute/core/NEON/wrapper/intrinsics/gethigh.h
deleted file mode 100644
index 13d29677a6..0000000000
--- a/arm_compute/core/NEON/wrapper/intrinsics/gethigh.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_WRAPPER_GET_HIGH_H
-#define ARM_COMPUTE_WRAPPER_GET_HIGH_H
-
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace wrapper
-{
-#define VGETHIGH_IMPL(half_vtype, vtype, postfix) \
- inline half_vtype vgethigh(const vtype val) \
- { \
- return vget_high_##postfix(val); \
- }
-
-VGETHIGH_IMPL(uint8x8_t, uint8x16_t, u8)
-VGETHIGH_IMPL(int8x8_t, int8x16_t, s8)
-VGETHIGH_IMPL(uint16x4_t, uint16x8_t, u16)
-VGETHIGH_IMPL(int16x4_t, int16x8_t, s16)
-VGETHIGH_IMPL(uint32x2_t, uint32x4_t, u32)
-VGETHIGH_IMPL(int32x2_t, int32x4_t, s32)
-VGETHIGH_IMPL(float32x2_t, float32x4_t, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VGETHIGH_IMPL(float16x4_t, float16x8_t, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-#undef VGETHIGH_IMPL
-} // namespace wrapper
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_WRAPPER_GET_HIGH_H */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/getlane.h b/arm_compute/core/NEON/wrapper/intrinsics/getlane.h
deleted file mode 100644
index 533bf63603..0000000000
--- a/arm_compute/core/NEON/wrapper/intrinsics/getlane.h
+++ /dev/null
@@ -1,223 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_WRAPPER_GET_LANE_H
-#define ARM_COMPUTE_WRAPPER_GET_LANE_H
-
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace wrapper
-{
-#define VGETLANE_IMPL_8(stype, vtype, postfix) \
- inline stype vgetlane(const vtype vector, const unsigned int lane) \
- { \
- switch(lane) \
- { \
- case 0: \
- return vget_lane_##postfix(vector, 0); \
- case 1: \
- return vget_lane_##postfix(vector, 1); \
- case 2: \
- return vget_lane_##postfix(vector, 2); \
- case 3: \
- return vget_lane_##postfix(vector, 3); \
- case 4: \
- return vget_lane_##postfix(vector, 4); \
- case 5: \
- return vget_lane_##postfix(vector, 5); \
- case 6: \
- return vget_lane_##postfix(vector, 6); \
- case 7: \
- return vget_lane_##postfix(vector, 7); \
- default: \
- ARM_COMPUTE_ERROR("Invalid lane"); \
- } \
- }
-
-#define VGETLANE_IMPL_4(stype, vtype, postfix) \
- inline stype vgetlane(const vtype vector, const unsigned int lane) \
- { \
- switch(lane) \
- { \
- case 0: \
- return vget_lane_##postfix(vector, 0); \
- case 1: \
- return vget_lane_##postfix(vector, 1); \
- case 2: \
- return vget_lane_##postfix(vector, 2); \
- case 3: \
- return vget_lane_##postfix(vector, 3); \
- default: \
- ARM_COMPUTE_ERROR("Invalid lane"); \
- } \
- }
-
-#define VGETLANE_IMPL_2(stype, vtype, postfix) \
- inline stype vgetlane(const vtype vector, const unsigned int lane) \
- { \
- switch(lane) \
- { \
- case 0: \
- return vget_lane_##postfix(vector, 0); \
- case 1: \
- return vget_lane_##postfix(vector, 1); \
- default: \
- ARM_COMPUTE_ERROR("Invalid lane"); \
- } \
- }
-
-VGETLANE_IMPL_8(uint8_t, uint8x8_t, u8)
-VGETLANE_IMPL_8(int8_t, int8x8_t, s8)
-VGETLANE_IMPL_4(uint16_t, uint16x4_t, u16)
-VGETLANE_IMPL_4(int16_t, int16x4_t, s16)
-VGETLANE_IMPL_2(uint32_t, uint32x2_t, u32)
-VGETLANE_IMPL_2(int32_t, int32x2_t, s32)
-VGETLANE_IMPL_2(float, float32x2_t, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VGETLANE_IMPL_4(float16_t, float16x4_t, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-#define VGETQLANE_IMPL_16(stype, vtype, postfix) \
- inline stype vgetlane(const vtype vector, const unsigned int lane) \
- { \
- switch(lane) \
- { \
- case 0: \
- return vgetq_lane_##postfix(vector, 0); \
- case 1: \
- return vgetq_lane_##postfix(vector, 1); \
- case 2: \
- return vgetq_lane_##postfix(vector, 2); \
- case 3: \
- return vgetq_lane_##postfix(vector, 3); \
- case 4: \
- return vgetq_lane_##postfix(vector, 4); \
- case 5: \
- return vgetq_lane_##postfix(vector, 5); \
- case 6: \
- return vgetq_lane_##postfix(vector, 6); \
- case 7: \
- return vgetq_lane_##postfix(vector, 7); \
- case 8: \
- return vgetq_lane_##postfix(vector, 8); \
- case 9: \
- return vgetq_lane_##postfix(vector, 9); \
- case 10: \
- return vgetq_lane_##postfix(vector, 10); \
- case 11: \
- return vgetq_lane_##postfix(vector, 11); \
- case 12: \
- return vgetq_lane_##postfix(vector, 12); \
- case 13: \
- return vgetq_lane_##postfix(vector, 13); \
- case 14: \
- return vgetq_lane_##postfix(vector, 14); \
- case 15: \
- return vgetq_lane_##postfix(vector, 15); \
- default: \
- ARM_COMPUTE_ERROR("Invalid lane"); \
- } \
- }
-
-#define VGETQLANE_IMPL_8(stype, vtype, postfix) \
- inline stype vgetlane(const vtype vector, const unsigned int lane) \
- { \
- switch(lane) \
- { \
- case 0: \
- return vgetq_lane_##postfix(vector, 0); \
- case 1: \
- return vgetq_lane_##postfix(vector, 1); \
- case 2: \
- return vgetq_lane_##postfix(vector, 2); \
- case 3: \
- return vgetq_lane_##postfix(vector, 3); \
- case 4: \
- return vgetq_lane_##postfix(vector, 4); \
- case 5: \
- return vgetq_lane_##postfix(vector, 5); \
- case 6: \
- return vgetq_lane_##postfix(vector, 6); \
- case 7: \
- return vgetq_lane_##postfix(vector, 7); \
- default: \
- ARM_COMPUTE_ERROR("Invalid lane"); \
- } \
- }
-
-#define VGETQLANE_IMPL_4(stype, vtype, postfix) \
- inline stype vgetlane(const vtype vector, const unsigned int lane) \
- { \
- switch(lane) \
- { \
- case 0: \
- return vgetq_lane_##postfix(vector, 0); \
- case 1: \
- return vgetq_lane_##postfix(vector, 1); \
- case 2: \
- return vgetq_lane_##postfix(vector, 2); \
- case 3: \
- return vgetq_lane_##postfix(vector, 3); \
- default: \
- ARM_COMPUTE_ERROR("Invalid lane"); \
- } \
- }
-
-#define VGETQLANE_IMPL_2(stype, vtype, postfix) \
- inline stype vgetlane(const vtype vector, const unsigned int lane) \
- { \
- switch(lane) \
- { \
- case 0: \
- return vgetq_lane_##postfix(vector, 0); \
- case 1: \
- return vgetq_lane_##postfix(vector, 1); \
- default: \
- ARM_COMPUTE_ERROR("Invalid lane"); \
- } \
- }
-
-VGETQLANE_IMPL_16(uint8_t, uint8x16_t, u8)
-VGETQLANE_IMPL_16(int8_t, int8x16_t, s8)
-VGETQLANE_IMPL_8(uint16_t, uint16x8_t, u16)
-VGETQLANE_IMPL_8(int16_t, int16x8_t, s16)
-VGETQLANE_IMPL_4(uint32_t, uint32x4_t, u32)
-VGETQLANE_IMPL_4(int32_t, int32x4_t, s32)
-VGETQLANE_IMPL_4(float, float32x4_t, f32)
-VGETQLANE_IMPL_2(int64_t, int64x2_t, s64)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VGETQLANE_IMPL_8(float16_t, float16x8_t, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-#undef VGETLANE_IMPL_8
-#undef VGETLANE_IMPL_4
-#undef VGETLANE_IMPL_2
-
-#undef VGETQLANE_IMPL_16
-#undef VGETQLANE_IMPL_8
-#undef VGETQLANE_IMPL_4
-} // namespace wrapper
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_WRAPPER_GET_LANE_H */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/getlow.h b/arm_compute/core/NEON/wrapper/intrinsics/getlow.h
deleted file mode 100644
index dbc3d869e1..0000000000
--- a/arm_compute/core/NEON/wrapper/intrinsics/getlow.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_WRAPPER_GET_LOW_H
-#define ARM_COMPUTE_WRAPPER_GET_LOW_H
-
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace wrapper
-{
-#define VGETLOW_IMPL(half_vtype, vtype, postfix) \
- inline half_vtype vgetlow(const vtype val) \
- { \
- return vget_low_##postfix(val); \
- }
-
-VGETLOW_IMPL(uint8x8_t, uint8x16_t, u8)
-VGETLOW_IMPL(int8x8_t, int8x16_t, s8)
-VGETLOW_IMPL(uint16x4_t, uint16x8_t, u16)
-VGETLOW_IMPL(int16x4_t, int16x8_t, s16)
-VGETLOW_IMPL(uint32x2_t, uint32x4_t, u32)
-VGETLOW_IMPL(int32x2_t, int32x4_t, s32)
-VGETLOW_IMPL(float32x2_t, float32x4_t, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VGETLOW_IMPL(float16x4_t, float16x8_t, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-#undef VGETLOW_IMPL
-} // namespace wrapper
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_WRAPPER_GET_LOW_H */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h b/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h
deleted file mode 100644
index 1150daa073..0000000000
--- a/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_WRAPPER_INTRINSICS_H
-#define ARM_COMPUTE_WRAPPER_INTRINSICS_H
-
-#include "arm_compute/core/NEON/wrapper/intrinsics/abs.h"
-#include "arm_compute/core/NEON/wrapper/intrinsics/add.h"
-#include "arm_compute/core/NEON/wrapper/intrinsics/and.h"
-#include "arm_compute/core/NEON/wrapper/intrinsics/bsl.h"
-#include "arm_compute/core/NEON/wrapper/intrinsics/ceq.h"
-#include "arm_compute/core/NEON/wrapper/intrinsics/cge.h"
-#include "arm_compute/core/NEON/wrapper/intrinsics/cgt.h"
-#include "arm_compute/core/NEON/wrapper/intrinsics/clt.h"
-#include "arm_compute/core/NEON/wrapper/intrinsics/combine.h"
-#include "arm_compute/core/NEON/wrapper/intrinsics/cvt.h"
-#include "arm_compute/core/NEON/wrapper/intrinsics/div.h"
-#include "arm_compute/core/NEON/wrapper/intrinsics/dup_n.h"
-#include "arm_compute/core/NEON/wrapper/intrinsics/eor.h"
-#include "arm_compute/core/NEON/wrapper/intrinsics/exp.h"
-#include "arm_compute/core/NEON/wrapper/intrinsics/ext.h"
-#include "arm_compute/core/NEON/wrapper/intrinsics/gethigh.h"
-#include "arm_compute/core/NEON/wrapper/intrinsics/getlane.h"
-#include "arm_compute/core/NEON/wrapper/intrinsics/getlow.h"
-#include "arm_compute/core/NEON/wrapper/intrinsics/inv.h"
-#include "arm_compute/core/NEON/wrapper/intrinsics/invsqrt.h"
-#include "arm_compute/core/NEON/wrapper/intrinsics/load.h"
-#include "arm_compute/core/NEON/wrapper/intrinsics/log.h"
-#include "arm_compute/core/NEON/wrapper/intrinsics/max.h"
-#include "arm_compute/core/NEON/wrapper/intrinsics/min.h"
-#include "arm_compute/core/NEON/wrapper/intrinsics/mla.h"
-#include "arm_compute/core/NEON/wrapper/intrinsics/movl.h"
-#include "arm_compute/core/NEON/wrapper/intrinsics/movn.h"
-#include "arm_compute/core/NEON/wrapper/intrinsics/mul.h"
-#include "arm_compute/core/NEON/wrapper/intrinsics/neg.h"
-#include "arm_compute/core/NEON/wrapper/intrinsics/not.h"
-#include "arm_compute/core/NEON/wrapper/intrinsics/orr.h"
-#include "arm_compute/core/NEON/wrapper/intrinsics/pmax.h"
-#include "arm_compute/core/NEON/wrapper/intrinsics/pmin.h"
-#include "arm_compute/core/NEON/wrapper/intrinsics/pow.h"
-#include "arm_compute/core/NEON/wrapper/intrinsics/qmov.h"
-#include "arm_compute/core/NEON/wrapper/intrinsics/qmovun.h"
-#include "arm_compute/core/NEON/wrapper/intrinsics/reinterpret.h"
-#include "arm_compute/core/NEON/wrapper/intrinsics/rev64.h"
-#include "arm_compute/core/NEON/wrapper/intrinsics/round.h"
-#include "arm_compute/core/NEON/wrapper/intrinsics/setlane.h"
-#include "arm_compute/core/NEON/wrapper/intrinsics/sin.h"
-#include "arm_compute/core/NEON/wrapper/intrinsics/store.h"
-#include "arm_compute/core/NEON/wrapper/intrinsics/sub.h"
-#include "arm_compute/core/NEON/wrapper/intrinsics/tanh.h"
-#include "arm_compute/core/NEON/wrapper/intrinsics/tbl.h"
-
-#endif /* ARM_COMPUTE_WRAPPER_INTRINSICS_H */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/inv.h b/arm_compute/core/NEON/wrapper/intrinsics/inv.h
deleted file mode 100644
index 9da66baffa..0000000000
--- a/arm_compute/core/NEON/wrapper/intrinsics/inv.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_WRAPPER_INV_H
-#define ARM_COMPUTE_WRAPPER_INV_H
-
-#include "arm_compute/core/NEON/NEMath.h"
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace wrapper
-{
-#define VINV_IMPL(vtype, prefix, postfix) \
- inline vtype vinv(const vtype &a) \
- { \
- return prefix##_##postfix(a); \
- }
-
-#define VINV_IMPL_INT(vtype, prefix, postfix) \
- inline vtype vinv(const vtype &a) \
- { \
- ARM_COMPUTE_UNUSED(a); \
- ARM_COMPUTE_ERROR("Not supported"); \
- }
-
-VINV_IMPL(float32x2_t, vinv, f32)
-VINV_IMPL_INT(int32x2_t, vinv, s32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VINV_IMPL(float16x4_t, vinv, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-VINV_IMPL(float32x4_t, vinvq, f32)
-VINV_IMPL_INT(int32x4_t, vinvq, s32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VINV_IMPL(float16x8_t, vinvq, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-#undef VINV_IMPL
-} // namespace wrapper
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_WRAPPER_INV_H */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/invsqrt.h b/arm_compute/core/NEON/wrapper/intrinsics/invsqrt.h
deleted file mode 100644
index 77adcf7b8c..0000000000
--- a/arm_compute/core/NEON/wrapper/intrinsics/invsqrt.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_WRAPPER_INVSQRT_H
-#define ARM_COMPUTE_WRAPPER_INVSQRT_H
-
-#include "arm_compute/core/NEON/NEMath.h"
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace wrapper
-{
-#define VINVSQRT_IMPL(stype, vtype, prefix, postfix) \
- inline vtype vinvsqrt(const vtype &a) \
- { \
- return prefix##_##postfix(a); \
- }
-
-#define VINVSQRT_IMPL_INT(stype, vtype, prefix, postfix) \
- inline vtype vinvsqrt(const vtype &a) \
- { \
- ARM_COMPUTE_UNUSED(a); \
- ARM_COMPUTE_ERROR("Not supported"); \
- }
-
-VINVSQRT_IMPL(float, float32x2_t, vinvsqrt, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VINVSQRT_IMPL(float16_t, float16x4_t, vinvsqrt, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VINVSQRT_IMPL_INT(int, int32x4_t, vinvsqrt, s32)
-
-VINVSQRT_IMPL(float, float32x4_t, vinvsqrtq, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VINVSQRT_IMPL(float16_t, float16x8_t, vinvsqrtq, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-#undef VINVSQRT_IMPL
-} // namespace wrapper
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_WRAPPER_INVSQRT_H */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/load.h b/arm_compute/core/NEON/wrapper/intrinsics/load.h
deleted file mode 100644
index d38350f05b..0000000000
--- a/arm_compute/core/NEON/wrapper/intrinsics/load.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_WRAPPER_LOAD_H
-#define ARM_COMPUTE_WRAPPER_LOAD_H
-
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace wrapper
-{
-#define VLOAD_IMPL(stype, vtype, postfix) \
- inline vtype vload(const stype *ptr) \
- { \
- return vld1_##postfix(ptr); \
- }
-
-VLOAD_IMPL(uint8_t, uint8x8_t, u8)
-VLOAD_IMPL(int8_t, int8x8_t, s8)
-VLOAD_IMPL(uint16_t, uint16x4_t, u16)
-VLOAD_IMPL(int16_t, int16x4_t, s16)
-VLOAD_IMPL(uint32_t, uint32x2_t, u32)
-VLOAD_IMPL(int32_t, int32x2_t, s32)
-//VLOAD_IMPL(uint64_t, uint64x1_t, u64)
-//VLOAD_IMPL(int64_t, int64x1_t, s64)
-VLOAD_IMPL(float, float32x2_t, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VLOAD_IMPL(float16_t, float16x4_t, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-#define VLOADQ_IMPL(stype, vtype, postfix) \
- inline vtype vloadq(const stype *ptr) \
- { \
- return vld1q_##postfix(ptr); \
- }
-
-VLOADQ_IMPL(uint8_t, uint8x16_t, u8)
-VLOADQ_IMPL(int8_t, int8x16_t, s8)
-VLOADQ_IMPL(uint16_t, uint16x8_t, u16)
-VLOADQ_IMPL(int16_t, int16x8_t, s16)
-VLOADQ_IMPL(uint32_t, uint32x4_t, u32)
-VLOADQ_IMPL(int32_t, int32x4_t, s32)
-//VLOAD_IMPL(uint64_t, uint64x1_t, u64)
-//VLOAD_IMPL(int64_t, int64x1_t, s64)
-VLOADQ_IMPL(float, float32x4_t, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VLOADQ_IMPL(float16_t, float16x8_t, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-#undef VLOAD_IMPL
-} // namespace wrapper
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_WRAPPER_LOAD_H */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/log.h b/arm_compute/core/NEON/wrapper/intrinsics/log.h
deleted file mode 100644
index 682830c122..0000000000
--- a/arm_compute/core/NEON/wrapper/intrinsics/log.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_WRAPPER_LOG_H
-#define ARM_COMPUTE_WRAPPER_LOG_H
-
-#include "arm_compute/core/NEON/NEMath.h"
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace wrapper
-{
-#define VLOG_IMPL(vtype, prefix, postfix) \
- inline vtype vlog(const vtype &a) \
- { \
- return prefix##_##postfix(a); \
- }
-
-#define VLOG_IMPL_INT(vtype, prefix, postfix) \
- inline vtype vlog(const vtype &a) \
- { \
- ARM_COMPUTE_UNUSED(a); \
- ARM_COMPUTE_ERROR("Not supported"); \
- }
-
-VLOG_IMPL(float32x4_t, vlogq, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VLOG_IMPL(float16x8_t, vlogq, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VLOG_IMPL_INT(int32x4_t, vlogq, s32)
-
-#undef VLOG_IMPL
-} // namespace wrapper
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_WRAPPER_LOG_H */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/max.h b/arm_compute/core/NEON/wrapper/intrinsics/max.h
deleted file mode 100644
index a87b7a32b5..0000000000
--- a/arm_compute/core/NEON/wrapper/intrinsics/max.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_WRAPPER_MAX_H
-#define ARM_COMPUTE_WRAPPER_MAX_H
-
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace wrapper
-{
-#define VMAX_IMPL(stype, vtype, prefix, postfix) \
- inline vtype vmax(const vtype &a, const vtype &b) \
- { \
- return prefix##_##postfix(a, b); \
- }
-
-VMAX_IMPL(uint8_t, uint8x8_t, vmax, u8)
-VMAX_IMPL(int8_t, int8x8_t, vmax, s8)
-VMAX_IMPL(uint16_t, uint16x4_t, vmax, u16)
-VMAX_IMPL(int16_t, int16x4_t, vmax, s16)
-VMAX_IMPL(uint32_t, uint32x2_t, vmax, u32)
-VMAX_IMPL(int32_t, int32x2_t, vmax, s32)
-VMAX_IMPL(float, float32x2_t, vmax, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VMAX_IMPL(float16_t, float16x4_t, vmax, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-VMAX_IMPL(uint8_t, uint8x16_t, vmaxq, u8)
-VMAX_IMPL(int8_t, int8x16_t, vmaxq, s8)
-VMAX_IMPL(uint16_t, uint16x8_t, vmaxq, u16)
-VMAX_IMPL(int16_t, int16x8_t, vmaxq, s16)
-VMAX_IMPL(uint32_t, uint32x4_t, vmaxq, u32)
-VMAX_IMPL(int32_t, int32x4_t, vmaxq, s32)
-VMAX_IMPL(float, float32x4_t, vmaxq, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VMAX_IMPL(float16_t, float16x8_t, vmaxq, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-#undef VMAX_IMPL
-} // namespace wrapper
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_WRAPPER_MAX_H */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/min.h b/arm_compute/core/NEON/wrapper/intrinsics/min.h
deleted file mode 100644
index dc8a127e82..0000000000
--- a/arm_compute/core/NEON/wrapper/intrinsics/min.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_WRAPPER_MIN_H
-#define ARM_COMPUTE_WRAPPER_MIN_H
-
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace wrapper
-{
-#define VMIN_IMPL(stype, vtype, prefix, postfix) \
- inline vtype vmin(const vtype &a, const vtype &b) \
- { \
- return prefix##_##postfix(a, b); \
- }
-
-VMIN_IMPL(uint8_t, uint8x8_t, vmin, u8)
-VMIN_IMPL(int8_t, int8x8_t, vmin, s8)
-VMIN_IMPL(uint16_t, uint16x4_t, vmin, u16)
-VMIN_IMPL(int16_t, int16x4_t, vmin, s16)
-VMIN_IMPL(uint32_t, uint32x2_t, vmin, u32)
-VMIN_IMPL(int32_t, int32x2_t, vmin, s32)
-VMIN_IMPL(float, float32x2_t, vmin, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VMIN_IMPL(float16_t, float16x4_t, vmin, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-VMIN_IMPL(uint8_t, uint8x16_t, vminq, u8)
-VMIN_IMPL(int8_t, int8x16_t, vminq, s8)
-VMIN_IMPL(uint16_t, uint16x8_t, vminq, u16)
-VMIN_IMPL(int16_t, int16x8_t, vminq, s16)
-VMIN_IMPL(uint32_t, uint32x4_t, vminq, u32)
-VMIN_IMPL(int32_t, int32x4_t, vminq, s32)
-VMIN_IMPL(float, float32x4_t, vminq, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VMIN_IMPL(float16_t, float16x8_t, vminq, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-#undef VMIN_IMPL
-} // namespace wrapper
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_WRAPPER_MIN_H */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/mla.h b/arm_compute/core/NEON/wrapper/intrinsics/mla.h
deleted file mode 100644
index dd2f0c0d9d..0000000000
--- a/arm_compute/core/NEON/wrapper/intrinsics/mla.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_WRAPPER_MLA_H
-#define ARM_COMPUTE_WRAPPER_MLA_H
-
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace wrapper
-{
-#define VMLA_IMPL(stype, vtype, prefix, postfix) \
- inline vtype vmla(const vtype &a, const vtype &b, const vtype &c) \
- { \
- return prefix##_##postfix(a, b, c); \
- }
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-#define VMLA_IMPL2(stype, vtype, prefix1, prefix2, postfix) \
- inline vtype vmla(const vtype &a, const vtype &b, const vtype &c) \
- { \
- return prefix1##_##postfix(a, prefix2##_##postfix(b, c)); \
- }
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-VMLA_IMPL(uint8x8_t, uint8x8_t, vmla, u8)
-VMLA_IMPL(int8x8_t, int8x8_t, vmla, s8)
-VMLA_IMPL(uint16x4_t, uint16x4_t, vmla, u16)
-VMLA_IMPL(int16x4_t, int16x4_t, vmla, s16)
-VMLA_IMPL(uint32x2_t, uint32x2_t, vmla, u32)
-VMLA_IMPL(int32x2_t, int32x2_t, vmla, s32)
-VMLA_IMPL(float32x2_t, float32x2_t, vmla, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VMLA_IMPL2(float16x4_t, float16x4_t, vadd, vmul, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-VMLA_IMPL(uint8x16_t, uint8x16_t, vmlaq, u8)
-VMLA_IMPL(int8x16_t, int8x16_t, vmlaq, s8)
-VMLA_IMPL(uint16x8_t, uint16x8_t, vmlaq, u16)
-VMLA_IMPL(int16x8_t, int16x8_t, vmlaq, s16)
-VMLA_IMPL(uint32x4_t, uint32x4_t, vmlaq, u32)
-VMLA_IMPL(int32x4_t, int32x4_t, vmlaq, s32)
-VMLA_IMPL(float32x4_t, float32x4_t, vmlaq, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VMLA_IMPL2(float16x8_t, float16x8_t, vaddq, vmulq, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-#undef VMLA_IMPL
-} // namespace wrapper
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_WRAPPER_MLA_H */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/movl.h b/arm_compute/core/NEON/wrapper/intrinsics/movl.h
deleted file mode 100644
index 982a795924..0000000000
--- a/arm_compute/core/NEON/wrapper/intrinsics/movl.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_WRAPPER_MOVL_H
-#define ARM_COMPUTE_WRAPPER_MOVL_H
-
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace wrapper
-{
-#define VMOVL_IMPL(ptype, vtype, prefix, postfix) \
- inline ptype vmovl(const vtype &a) \
- { \
- return prefix##_##postfix(a); \
- }
-
-VMOVL_IMPL(uint16x8_t, uint8x8_t, vmovl, u8)
-VMOVL_IMPL(int16x8_t, int8x8_t, vmovl, s8)
-VMOVL_IMPL(uint32x4_t, uint16x4_t, vmovl, u16)
-VMOVL_IMPL(int32x4_t, int16x4_t, vmovl, s16)
-VMOVL_IMPL(uint64x2_t, uint32x2_t, vmovl, u32)
-VMOVL_IMPL(int64x2_t, int32x2_t, vmovl, s32)
-
-#undef VMOVL_IMPL
-} // namespace wrapper
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_WRAPPER_MOVL_H */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/movn.h b/arm_compute/core/NEON/wrapper/intrinsics/movn.h
deleted file mode 100644
index 23360e2597..0000000000
--- a/arm_compute/core/NEON/wrapper/intrinsics/movn.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_WRAPPER_MOVN_H
-#define ARM_COMPUTE_WRAPPER_MOVN_H
-
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace wrapper
-{
-#define VMOVN_IMPL(dtype, vtype, prefix, postfix) \
- inline dtype vmovn(const vtype &a) \
- { \
- return prefix##_##postfix(a); \
- }
-
-VMOVN_IMPL(uint32x2_t, uint64x2_t, vmovn, u64)
-VMOVN_IMPL(int32x2_t, int64x2_t, vmovn, s64)
-VMOVN_IMPL(uint16x4_t, uint32x4_t, vmovn, u32)
-VMOVN_IMPL(int16x4_t, int32x4_t, vmovn, s32)
-VMOVN_IMPL(uint8x8_t, uint16x8_t, vmovn, u16)
-VMOVN_IMPL(int8x8_t, int16x8_t, vmovn, s16)
-
-#define VQMOVN_IMPL(dtype, vtype, prefix, postfix) \
- inline dtype vqmovn(const vtype &a) \
- { \
- return prefix##_##postfix(a); \
- }
-
-VQMOVN_IMPL(uint32x2_t, uint64x2_t, vqmovn, u64)
-VQMOVN_IMPL(int32x2_t, int64x2_t, vqmovn, s64)
-VQMOVN_IMPL(uint16x4_t, uint32x4_t, vqmovn, u32)
-VQMOVN_IMPL(int16x4_t, int32x4_t, vqmovn, s32)
-VQMOVN_IMPL(uint8x8_t, uint16x8_t, vqmovn, u16)
-VQMOVN_IMPL(int8x8_t, int16x8_t, vqmovn, s16)
-
-#undef VMOVN_IMPL
-} // namespace wrapper
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_WRAPPER_MOVN_H */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/mul.h b/arm_compute/core/NEON/wrapper/intrinsics/mul.h
deleted file mode 100644
index bbf70abac9..0000000000
--- a/arm_compute/core/NEON/wrapper/intrinsics/mul.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_WRAPPER_MUL_H
-#define ARM_COMPUTE_WRAPPER_MUL_H
-
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace wrapper
-{
-#define VMUL_IMPL(stype, vtype, prefix, postfix) \
- inline vtype vmul(const vtype &a, const vtype &b) \
- { \
- return prefix##_##postfix(a, b); \
- }
-
-VMUL_IMPL(uint8x8_t, uint8x8_t, vmul, u8)
-VMUL_IMPL(int8x8_t, int8x8_t, vmul, s8)
-VMUL_IMPL(uint16x4_t, uint16x4_t, vmul, u16)
-VMUL_IMPL(int16x4_t, int16x4_t, vmul, s16)
-VMUL_IMPL(uint32x2_t, uint32x2_t, vmul, u32)
-VMUL_IMPL(int32x2_t, int32x2_t, vmul, s32)
-VMUL_IMPL(float32x2_t, float32x2_t, vmul, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VMUL_IMPL(float16_t, float16x4_t, vmul, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-VMUL_IMPL(uint8_t, uint8x16_t, vmulq, u8)
-VMUL_IMPL(int8_t, int8x16_t, vmulq, s8)
-VMUL_IMPL(uint16_t, uint16x8_t, vmulq, u16)
-VMUL_IMPL(int16_t, int16x8_t, vmulq, s16)
-VMUL_IMPL(uint32_t, uint32x4_t, vmulq, u32)
-VMUL_IMPL(int32_t, int32x4_t, vmulq, s32)
-VMUL_IMPL(float32x4_t, float32x4_t, vmulq, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VMUL_IMPL(float16_t, float16x8_t, vmulq, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-#undef VMUL_IMPL
-} // namespace wrapper
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_WRAPPER_MUL_H */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/neg.h b/arm_compute/core/NEON/wrapper/intrinsics/neg.h
deleted file mode 100644
index da2f285eca..0000000000
--- a/arm_compute/core/NEON/wrapper/intrinsics/neg.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_WRAPPER_NEG_H
-#define ARM_COMPUTE_WRAPPER_NEG_H
-
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace wrapper
-{
-#define VNEG_IMPL(vtype, prefix, postfix) \
- inline vtype vneg(const vtype &a) \
- { \
- return prefix##_##postfix(a); \
- }
-
-VNEG_IMPL(int8x8_t, vneg, s8)
-VNEG_IMPL(int16x4_t, vneg, s16)
-VNEG_IMPL(int32x2_t, vneg, s32)
-VNEG_IMPL(float32x2_t, vneg, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VNEG_IMPL(float16x4_t, vneg, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-VNEG_IMPL(int8x16_t, vnegq, s8)
-VNEG_IMPL(int16x8_t, vnegq, s16)
-VNEG_IMPL(int32x4_t, vnegq, s32)
-VNEG_IMPL(float32x4_t, vnegq, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VNEG_IMPL(float16x8_t, vnegq, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-#undef VNEG_IMPL
-} // namespace wrapper
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_WRAPPER_NEG_H */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/not.h b/arm_compute/core/NEON/wrapper/intrinsics/not.h
deleted file mode 100644
index 5b1e4056ca..0000000000
--- a/arm_compute/core/NEON/wrapper/intrinsics/not.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_WRAPPER_NOT_H
-#define ARM_COMPUTE_WRAPPER_NOT_H
-
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace wrapper
-{
-#define VNOT_IMPL(stype, vtype, prefix, postfix) \
- inline vtype vnot(const vtype &a) \
- { \
- return prefix##_##postfix(a); \
- }
-
-VNOT_IMPL(uint8_t, uint8x8_t, vmvn, u8)
-VNOT_IMPL(int8_t, int8x8_t, vmvn, s8)
-VNOT_IMPL(uint16_t, uint16x4_t, vmvn, u16)
-VNOT_IMPL(int16_t, int16x4_t, vmvn, s16)
-VNOT_IMPL(uint32_t, uint32x2_t, vmvn, u32)
-VNOT_IMPL(int32_t, int32x2_t, vmvn, s32)
-VNOT_IMPL(float32x2_t, float32x2_t, vinv, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VNOT_IMPL(float16x4_t, float16x4_t, vinv, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-VNOT_IMPL(uint8_t, uint8x16_t, vmvnq, u8)
-VNOT_IMPL(int8_t, int8x16_t, vmvnq, s8)
-VNOT_IMPL(uint16_t, uint16x8_t, vmvnq, u16)
-VNOT_IMPL(int16_t, int16x8_t, vmvnq, s16)
-VNOT_IMPL(uint32_t, uint32x4_t, vmvnq, u32)
-VNOT_IMPL(int32_t, int32x4_t, vmvnq, s32)
-VNOT_IMPL(float32x4_t, float32x4_t, vinvq, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VNOT_IMPL(float16x8_t, float16x8_t, vinvq, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-#undef VNOT_IMPL
-} // namespace wrapper
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_WRAPPER_NOT_H */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/orr.h b/arm_compute/core/NEON/wrapper/intrinsics/orr.h
deleted file mode 100644
index 0fbdd44c76..0000000000
--- a/arm_compute/core/NEON/wrapper/intrinsics/orr.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_WRAPPER_ORR_H
-#define ARM_COMPUTE_WRAPPER_ORR_H
-
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace wrapper
-{
-#define VORR_IMPL(stype, vtype, prefix, postfix) \
- inline vtype vorr(const vtype &a, const vtype &b) \
- { \
- return prefix##_##postfix(a, b); \
- }
-
-VORR_IMPL(uint8_t, uint8x8_t, vorr, u8)
-VORR_IMPL(int8_t, int8x8_t, vorr, s8)
-VORR_IMPL(uint16_t, uint16x4_t, vorr, u16)
-VORR_IMPL(int16_t, int16x4_t, vorr, s16)
-VORR_IMPL(uint32_t, uint32x2_t, vorr, u32)
-VORR_IMPL(int32_t, int32x2_t, vorr, s32)
-VORR_IMPL(uint64_t, uint64x1_t, vorr, u64)
-VORR_IMPL(int64_t, int64x1_t, vorr, s64)
-
-VORR_IMPL(uint8_t, uint8x16_t, vorrq, u8)
-VORR_IMPL(int8_t, int8x16_t, vorrq, s8)
-VORR_IMPL(uint16_t, uint16x8_t, vorrq, u16)
-VORR_IMPL(int16_t, int16x8_t, vorrq, s16)
-VORR_IMPL(uint32_t, uint32x4_t, vorrq, u32)
-VORR_IMPL(int32_t, int32x4_t, vorrq, s32)
-VORR_IMPL(uint64_t, uint64x2_t, vorrq, u64)
-VORR_IMPL(int64_t, int64x2_t, vorrq, s64)
-
-#undef VORR_IMPL
-} // namespace wrapper
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_WRAPPER_ORR_H */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/pmax.h b/arm_compute/core/NEON/wrapper/intrinsics/pmax.h
deleted file mode 100644
index afad27f1e4..0000000000
--- a/arm_compute/core/NEON/wrapper/intrinsics/pmax.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_WRAPPER_PMAX_H
-#define ARM_COMPUTE_WRAPPER_PMAX_H
-
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace wrapper
-{
-#define VPMAX_IMPL(stype, vtype, prefix, postfix) \
- inline vtype vpmax(const vtype &a, const vtype &b) \
- { \
- return prefix##_##postfix(a, b); \
- }
-
-VPMAX_IMPL(uint8_t, uint8x8_t, vpmax, u8)
-VPMAX_IMPL(int8_t, int8x8_t, vpmax, s8)
-VPMAX_IMPL(uint16_t, uint16x4_t, vpmax, u16)
-VPMAX_IMPL(int16_t, int16x4_t, vpmax, s16)
-VPMAX_IMPL(uint32_t, uint32x2_t, vpmax, u32)
-VPMAX_IMPL(int32_t, int32x2_t, vpmax, s32)
-VPMAX_IMPL(float, float32x2_t, vpmax, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VPMAX_IMPL(float16_t, float16x4_t, vpmax, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-#undef VPMAX_IMPL
-} // namespace wrapper
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_WRAPPER_PMAX_H */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/pmin.h b/arm_compute/core/NEON/wrapper/intrinsics/pmin.h
deleted file mode 100644
index 77c5cf61ba..0000000000
--- a/arm_compute/core/NEON/wrapper/intrinsics/pmin.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_WRAPPER_PMIN_H
-#define ARM_COMPUTE_WRAPPER_PMIN_H
-
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace wrapper
-{
-#define VPMIN_IMPL(stype, vtype, prefix, postfix) \
- inline vtype vpmin(const vtype &a, const vtype &b) \
- { \
- return prefix##_##postfix(a, b); \
- }
-
-VPMIN_IMPL(uint8_t, uint8x8_t, vpmin, u8)
-VPMIN_IMPL(int8_t, int8x8_t, vpmin, s8)
-VPMIN_IMPL(uint16_t, uint16x4_t, vpmin, u16)
-VPMIN_IMPL(int16_t, int16x4_t, vpmin, s16)
-VPMIN_IMPL(uint32_t, uint32x2_t, vpmin, u32)
-VPMIN_IMPL(int32_t, int32x2_t, vpmin, s32)
-VPMIN_IMPL(float, float32x2_t, vpmin, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VPMIN_IMPL(float16_t, float16x4_t, vpmin, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-#undef VPMIN_IMPL
-} // namespace wrapper
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_WRAPPER_PMIN_H */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/pow.h b/arm_compute/core/NEON/wrapper/intrinsics/pow.h
deleted file mode 100644
index 1b5d62df5e..0000000000
--- a/arm_compute/core/NEON/wrapper/intrinsics/pow.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_WRAPPER_POW_H
-#define ARM_COMPUTE_WRAPPER_POW_H
-
-#include "arm_compute/core/NEON/NEMath.h"
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace wrapper
-{
-#define VPOW_IMPL(vtype, prefix, postfix) \
- inline vtype vpow(const vtype &a, const vtype &b) \
- { \
- return prefix##_##postfix(a, b); \
- }
-
-VPOW_IMPL(float32x4_t, vpowq, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VPOW_IMPL(float16x8_t, vpowq, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-#undef VPOW_IMPL
-} // namespace wrapper
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_WRAPPER_POW_H */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/qmovun.h b/arm_compute/core/NEON/wrapper/intrinsics/qmovun.h
deleted file mode 100644
index a0347020db..0000000000
--- a/arm_compute/core/NEON/wrapper/intrinsics/qmovun.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (c) 2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_WRAPPER_QMOVUN_H
-#define ARM_COMPUTE_WRAPPER_QMOVUN_H
-
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace wrapper
-{
-#define VQMOVUN_IMPL(dtype, vtype, prefix, postfix) \
- inline dtype vqmovun(const vtype &a) \
- { \
- return prefix##_##postfix(a); \
- }
-
-VQMOVUN_IMPL(uint32x2_t, int64x2_t, vqmovun, s64)
-VQMOVUN_IMPL(uint16x4_t, int32x4_t, vqmovun, s32)
-VQMOVUN_IMPL(uint8x8_t, int16x8_t, vqmovun, s16)
-
-#undef VQMOVUN_IMPL
-} // namespace wrapper
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_WRAPPER_QMOVUN_H */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/reinterpret.h b/arm_compute/core/NEON/wrapper/intrinsics/reinterpret.h
deleted file mode 100644
index 579da344a7..0000000000
--- a/arm_compute/core/NEON/wrapper/intrinsics/reinterpret.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_WRAPPER_REINTERPRET_H
-#define ARM_COMPUTE_WRAPPER_REINTERPRET_H
-
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace wrapper
-{
-#define VREINTERPRET_IMPL(ptype, vtype, prefix, postfix1, postfix2) \
- inline ptype vreinterpret(const vtype &a) \
- { \
- return prefix##_##postfix1##_##postfix2(a); \
- } \
- \
- inline ptype vreinterpret(const ptype &a) \
- { \
- return a; \
- }
-
-VREINTERPRET_IMPL(int16x4_t, uint16x4_t, vreinterpret, s16, u16)
-
-VREINTERPRET_IMPL(int32x4_t, uint32x4_t, vreinterpretq, s32, u32)
-} // namespace wrapper
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_WRAPPER_REINTERPRET_H */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/rev64.h b/arm_compute/core/NEON/wrapper/intrinsics/rev64.h
deleted file mode 100644
index 0385704f3f..0000000000
--- a/arm_compute/core/NEON/wrapper/intrinsics/rev64.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_WRAPPER_REV64_H
-#define ARM_COMPUTE_WRAPPER_REV64_H
-
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace wrapper
-{
-#define VREV64_IMPL(vtype, prefix, postfix) \
- inline vtype vrev64(const vtype &a) \
- { \
- return prefix##_##postfix(a); \
- }
-
-VREV64_IMPL(uint8x8_t, vrev64, u8)
-VREV64_IMPL(int8x8_t, vrev64, s8)
-VREV64_IMPL(uint16x4_t, vrev64, u16)
-VREV64_IMPL(int16x4_t, vrev64, s16)
-VREV64_IMPL(uint32x2_t, vrev64, u32)
-VREV64_IMPL(int32x2_t, vrev64, s32)
-VREV64_IMPL(float32x2_t, vrev64, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VREV64_IMPL(float16x4_t, vrev64, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-VREV64_IMPL(uint8x16_t, vrev64q, u8)
-VREV64_IMPL(int8x16_t, vrev64q, s8)
-VREV64_IMPL(uint16x8_t, vrev64q, u16)
-VREV64_IMPL(int16x8_t, vrev64q, s16)
-VREV64_IMPL(uint32x4_t, vrev64q, u32)
-VREV64_IMPL(int32x4_t, vrev64q, s32)
-VREV64_IMPL(float32x4_t, vrev64q, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VREV64_IMPL(float16x8_t, vrev64q, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-#undef VREV64_IMPL
-} // namespace wrapper
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_WRAPPER_REV64_H */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/round.h b/arm_compute/core/NEON/wrapper/intrinsics/round.h
deleted file mode 100644
index d6f5a88689..0000000000
--- a/arm_compute/core/NEON/wrapper/intrinsics/round.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2019-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_WRAPPER_ROUND_H
-#define ARM_COMPUTE_WRAPPER_ROUND_H
-
-#include "arm_compute/core/NEON/NEMath.h"
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace wrapper
-{
-#define VROUNDQ_IMPL(vtype, postfix) \
- inline vtype vround(const vtype &a) \
- { \
- return vroundq_rte_##postfix(a); \
- }
-
-#define VROUNDQ_IMPL_INT(vtype, postfix) \
- inline vtype vround(const vtype &a) \
- { \
- ARM_COMPUTE_UNUSED(a); \
- ARM_COMPUTE_ERROR("Not supported"); \
- }
-
-VROUNDQ_IMPL(float32x4_t, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VROUNDQ_IMPL(float16x8_t, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VROUNDQ_IMPL_INT(int32x4_t, s32)
-#undef VROUNDQ_IMPL
-
-} // namespace wrapper
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_WRAPPER_ROUND_H */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/setlane.h b/arm_compute/core/NEON/wrapper/intrinsics/setlane.h
deleted file mode 100644
index 6332f3025e..0000000000
--- a/arm_compute/core/NEON/wrapper/intrinsics/setlane.h
+++ /dev/null
@@ -1,208 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_WRAPPER_SET_LANE_H
-#define ARM_COMPUTE_WRAPPER_SET_LANE_H
-
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace wrapper
-{
-#define VSETLANE_IMPL_8(stype, atype, vtype, postfix) \
- inline stype vsetlane(const atype value, const vtype vector, const unsigned int lane) \
- { \
- switch(lane) \
- { \
- case 0: \
- return vset_lane_##postfix(value, vector, 0); \
- case 1: \
- return vset_lane_##postfix(value, vector, 1); \
- case 2: \
- return vset_lane_##postfix(value, vector, 2); \
- case 3: \
- return vset_lane_##postfix(value, vector, 3); \
- case 4: \
- return vset_lane_##postfix(value, vector, 4); \
- case 5: \
- return vset_lane_##postfix(value, vector, 5); \
- case 6: \
- return vset_lane_##postfix(value, vector, 6); \
- case 7: \
- return vset_lane_##postfix(value, vector, 7); \
- default: \
- ARM_COMPUTE_ERROR("Invalid lane"); \
- } \
- }
-
-#define VSETLANE_IMPL_4(stype, atype, vtype, postfix) \
- inline stype vsetlane(const atype value, const vtype vector, const unsigned int lane) \
- { \
- switch(lane) \
- { \
- case 0: \
- return vset_lane_##postfix(value, vector, 0); \
- case 1: \
- return vset_lane_##postfix(value, vector, 1); \
- case 2: \
- return vset_lane_##postfix(value, vector, 2); \
- case 3: \
- return vset_lane_##postfix(value, vector, 3); \
- default: \
- ARM_COMPUTE_ERROR("Invalid lane"); \
- } \
- }
-
-#define VSETLANE_IMPL_2(stype, atype, vtype, postfix) \
- inline stype vsetlane(const atype value, const vtype vector, const unsigned int lane) \
- { \
- switch(lane) \
- { \
- case 0: \
- return vset_lane_##postfix(value, vector, 0); \
- case 1: \
- return vset_lane_##postfix(value, vector, 1); \
- default: \
- ARM_COMPUTE_ERROR("Invalid lane"); \
- } \
- }
-
-VSETLANE_IMPL_8(uint8x8_t, uint8_t, uint8x8_t, u8)
-VSETLANE_IMPL_8(int8x8_t, int8_t, int8x8_t, s8)
-VSETLANE_IMPL_4(uint16x4_t, uint16_t, uint16x4_t, u16)
-VSETLANE_IMPL_4(int16x4_t, int16_t, int16x4_t, s16)
-VSETLANE_IMPL_2(uint32x2_t, uint32_t, uint32x2_t, u32)
-VSETLANE_IMPL_2(int32x2_t, int32_t, int32x2_t, s32)
-VSETLANE_IMPL_2(float32x2_t, float, float32x2_t, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VSETLANE_IMPL_4(float16x4_t, float16_t, float16x4_t, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-#define VSETQLANE_IMPL_16(stype, atype, vtype, postfix) \
- inline stype vsetlane(const atype value, const vtype vector, const unsigned int lane) \
- { \
- switch(lane) \
- { \
- case 0: \
- return vsetq_lane_##postfix(value, vector, 0); \
- case 1: \
- return vsetq_lane_##postfix(value, vector, 1); \
- case 2: \
- return vsetq_lane_##postfix(value, vector, 2); \
- case 3: \
- return vsetq_lane_##postfix(value, vector, 3); \
- case 4: \
- return vsetq_lane_##postfix(value, vector, 4); \
- case 5: \
- return vsetq_lane_##postfix(value, vector, 5); \
- case 6: \
- return vsetq_lane_##postfix(value, vector, 6); \
- case 7: \
- return vsetq_lane_##postfix(value, vector, 7); \
- case 8: \
- return vsetq_lane_##postfix(value, vector, 8); \
- case 9: \
- return vsetq_lane_##postfix(value, vector, 9); \
- case 10: \
- return vsetq_lane_##postfix(value, vector, 10); \
- case 11: \
- return vsetq_lane_##postfix(value, vector, 11); \
- case 12: \
- return vsetq_lane_##postfix(value, vector, 12); \
- case 13: \
- return vsetq_lane_##postfix(value, vector, 13); \
- case 14: \
- return vsetq_lane_##postfix(value, vector, 14); \
- case 15: \
- return vsetq_lane_##postfix(value, vector, 15); \
- default: \
- ARM_COMPUTE_ERROR("Invalid lane"); \
- } \
- }
-
-#define VSETQLANE_IMPL_8(stype, atype, vtype, postfix) \
- inline stype vsetlane(const atype value, const vtype vector, const unsigned int lane) \
- { \
- switch(lane) \
- { \
- case 0: \
- return vsetq_lane_##postfix(value, vector, 0); \
- case 1: \
- return vsetq_lane_##postfix(value, vector, 1); \
- case 2: \
- return vsetq_lane_##postfix(value, vector, 2); \
- case 3: \
- return vsetq_lane_##postfix(value, vector, 3); \
- case 4: \
- return vsetq_lane_##postfix(value, vector, 4); \
- case 5: \
- return vsetq_lane_##postfix(value, vector, 5); \
- case 6: \
- return vsetq_lane_##postfix(value, vector, 6); \
- case 7: \
- return vsetq_lane_##postfix(value, vector, 7); \
- default: \
- ARM_COMPUTE_ERROR("Invalid lane"); \
- } \
- }
-
-#define VSETQLANE_IMPL_4(stype, atype, vtype, postfix) \
- inline stype vsetlane(const atype value, const vtype vector, const unsigned int lane) \
- { \
- switch(lane) \
- { \
- case 0: \
- return vsetq_lane_##postfix(value, vector, 0); \
- case 1: \
- return vsetq_lane_##postfix(value, vector, 1); \
- case 2: \
- return vsetq_lane_##postfix(value, vector, 2); \
- case 3: \
- return vsetq_lane_##postfix(value, vector, 3); \
- default: \
- ARM_COMPUTE_ERROR("Invalid lane"); \
- } \
- }
-
-VSETQLANE_IMPL_16(uint8x16_t, uint8_t, uint8x16_t, u8)
-VSETQLANE_IMPL_16(int8x16_t, int8_t, int8x16_t, s8)
-VSETQLANE_IMPL_8(uint16x8_t, uint16_t, uint16x8_t, u16)
-VSETQLANE_IMPL_8(int16x8_t, int16_t, int16x8_t, s16)
-VSETQLANE_IMPL_4(uint32x4_t, uint32_t, uint32x4_t, u32)
-VSETQLANE_IMPL_4(int32x4_t, int32_t, int32x4_t, s32)
-VSETQLANE_IMPL_4(float32x4_t, float, float32x4_t, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VSETQLANE_IMPL_8(float16x8_t, float16_t, float16x8_t, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-#undef VSETLANE_IMPL_8
-#undef VSETLANE_IMPL_4
-#undef VSETLANE_IMPL_2
-
-#undef VSETQLANE_IMPL_16
-#undef VSETQLANE_IMPL_8
-#undef VSETQLANE_IMPL_4
-} // namespace wrapper
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_WRAPPER_SET_LANE_H */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/sin.h b/arm_compute/core/NEON/wrapper/intrinsics/sin.h
deleted file mode 100644
index bca72db38a..0000000000
--- a/arm_compute/core/NEON/wrapper/intrinsics/sin.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2019-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_WRAPPER_SIN_H
-#define ARM_COMPUTE_WRAPPER_SIN_H
-
-#include "arm_compute/core/NEON/NEMath.h"
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace wrapper
-{
-#define VSIN_IMPL(vtype, prefix, postfix) \
- inline vtype vsin(const vtype &a) \
- { \
- return prefix##_##postfix(a); \
- }
-
-#define VSIN_IMPL_INT(vtype, prefix, postfix) \
- inline vtype vsin(const vtype &a) \
- { \
- ARM_COMPUTE_UNUSED(a); \
- ARM_COMPUTE_ERROR("Not supported"); \
- }
-
-VSIN_IMPL(float32x4_t, vsinq, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VSIN_IMPL(float16x8_t, vsinq, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-VSIN_IMPL_INT(int32x4_t, vsinq, s32)
-
-#undef vsub_IMPL
-} // namespace wrapper
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_WRAPPER_SUB_H */ \ No newline at end of file
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/store.h b/arm_compute/core/NEON/wrapper/intrinsics/store.h
deleted file mode 100644
index eb2ae6a5e1..0000000000
--- a/arm_compute/core/NEON/wrapper/intrinsics/store.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_WRAPPER_STORE_H
-#define ARM_COMPUTE_WRAPPER_STORE_H
-
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace wrapper
-{
-#define VSTORE_IMPL(stype, vtype, prefix, postfix) \
- inline void vstore(stype *ptr, vtype val) \
- { \
- prefix##_##postfix(ptr, val); \
- }
-
-VSTORE_IMPL(uint8_t, uint8x8_t, vst1, u8)
-VSTORE_IMPL(uint8_t, uint8x8x2_t, vst2, u8)
-VSTORE_IMPL(int8_t, int8x8_t, vst1, s8)
-VSTORE_IMPL(int8_t, int8x8x2_t, vst2, s8)
-VSTORE_IMPL(uint16_t, uint16x4_t, vst1, u16)
-VSTORE_IMPL(int16_t, int16x4_t, vst1, s16)
-VSTORE_IMPL(uint32_t, uint32x2_t, vst1, u32)
-VSTORE_IMPL(int32_t, int32x2_t, vst1, s32)
-//VSTORE_IMPL(uint64_t, 1, vst1, u64)
-//VSTORE_IMPL(int64_t, 1, vst1, s64)
-VSTORE_IMPL(float, float32x2_t, vst1, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VSTORE_IMPL(float16_t, float16x4_t, vst1, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-VSTORE_IMPL(uint8_t, uint8x16_t, vst1q, u8)
-VSTORE_IMPL(int8_t, int8x16_t, vst1q, s8)
-VSTORE_IMPL(uint16_t, uint16x8_t, vst1q, u16)
-VSTORE_IMPL(int16_t, int16x8_t, vst1q, s16)
-VSTORE_IMPL(uint32_t, uint32x4_t, vst1q, u32)
-VSTORE_IMPL(int32_t, int32x4_t, vst1q, s32)
-//VSTORE_IMPL(uint64_t, 2, vst1q, u64)
-//VSTORE_IMPL(int64_t, 2, vst1q, s64)
-VSTORE_IMPL(float, float32x4_t, vst1q, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VSTORE_IMPL(float16_t, float16x8_t, vst1q, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-#undef VSTORE_IMPL
-} // namespace wrapper
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_WRAPPER_STORE_H */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/sub.h b/arm_compute/core/NEON/wrapper/intrinsics/sub.h
deleted file mode 100644
index f46b57c815..0000000000
--- a/arm_compute/core/NEON/wrapper/intrinsics/sub.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_WRAPPER_SUB_H
-#define ARM_COMPUTE_WRAPPER_SUB_H
-
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace wrapper
-{
-#define VSUB_IMPL(stype, vtype, prefix, postfix) \
- inline vtype vsub(const vtype &a, const vtype &b) \
- { \
- return prefix##_##postfix(a, b); \
- }
-
-VSUB_IMPL(uint8x8_t, uint8x8_t, vsub, u8)
-VSUB_IMPL(int8x8_t, int8x8_t, vsub, s8)
-VSUB_IMPL(uint16x4_t, uint16x4_t, vsub, u16)
-VSUB_IMPL(int16x4_t, int16x4_t, vsub, s16)
-VSUB_IMPL(uint32x2_t, uint32x2_t, vsub, u32)
-VSUB_IMPL(int32x2_t, int32x2_t, vsub, s32)
-VSUB_IMPL(uint64x1_t, uint64x1_t, vsub, u64)
-VSUB_IMPL(int64x1_t, int64x1_t, vsub, s64)
-VSUB_IMPL(float32x2_t, float32x2_t, vsub, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VSUB_IMPL(float16x4_t, float16x4_t, vsub, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-VSUB_IMPL(uint8x16_t, uint8x16_t, vsubq, u8)
-VSUB_IMPL(int8x16_t, int8x16_t, vsubq, s8)
-VSUB_IMPL(uint16x8_t, uint16x8_t, vsubq, u16)
-VSUB_IMPL(int16x8_t, int16x8_t, vsubq, s16)
-VSUB_IMPL(uint32x4_t, uint32x4_t, vsubq, u32)
-VSUB_IMPL(int32x4_t, int32x4_t, vsubq, s32)
-VSUB_IMPL(uint64x2_t, uint64x2_t, vsubq, u64)
-VSUB_IMPL(int64x2_t, int64x2_t, vsubq, s64)
-VSUB_IMPL(float32x4_t, float32x4_t, vsubq, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VSUB_IMPL(float16x8_t, float16x8_t, vsubq, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-#undef VSUB_IMPL
-
-// VQSUB: Vector saturating sub (No notion of saturation for floating point)
-#define VQSUB_IMPL(stype, vtype, prefix, postfix) \
- inline vtype vqsub(const vtype &a, const vtype &b) \
- { \
- return prefix##_##postfix(a, b); \
- }
-
-VQSUB_IMPL(uint8x8_t, uint8x8_t, vqsub, u8)
-VQSUB_IMPL(int8x8_t, int8x8_t, vqsub, s8)
-VQSUB_IMPL(uint16x4_t, uint16x4_t, vqsub, u16)
-VQSUB_IMPL(int16x4_t, int16x4_t, vqsub, s16)
-VQSUB_IMPL(uint32x2_t, uint32x2_t, vqsub, u32)
-VQSUB_IMPL(int32x2_t, int32x2_t, vqsub, s32)
-VQSUB_IMPL(uint64x1_t, uint64x1_t, vqsub, u64)
-VQSUB_IMPL(int64x1_t, int64x1_t, vqsub, s64)
-VQSUB_IMPL(float32x2_t, float32x2_t, vsub, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VQSUB_IMPL(float16x4_t, float16x4_t, vsub, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-VQSUB_IMPL(uint8x16_t, uint8x16_t, vqsubq, u8)
-VQSUB_IMPL(int8x16_t, int8x16_t, vqsubq, s8)
-VQSUB_IMPL(uint16x8_t, uint16x8_t, vqsubq, u16)
-VQSUB_IMPL(int16x8_t, int16x8_t, vqsubq, s16)
-VQSUB_IMPL(uint32x4_t, uint32x4_t, vqsubq, u32)
-VQSUB_IMPL(int32x4_t, int32x4_t, vqsubq, s32)
-VQSUB_IMPL(uint64x2_t, uint64x2_t, vqsubq, u64)
-VQSUB_IMPL(int64x2_t, int64x2_t, vqsubq, s64)
-VQSUB_IMPL(float32x4_t, float32x4_t, vsubq, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VQSUB_IMPL(float16x8_t, float16x8_t, vsubq, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-#undef VQSUB_IMPL
-
-} // namespace wrapper
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_WRAPPER_SUB_H */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/tanh.h b/arm_compute/core/NEON/wrapper/intrinsics/tanh.h
deleted file mode 100644
index 648a001ca7..0000000000
--- a/arm_compute/core/NEON/wrapper/intrinsics/tanh.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_WRAPPER_TANH_H
-#define ARM_COMPUTE_WRAPPER_TANH_H
-
-#include "arm_compute/core/NEON/NEMath.h"
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace wrapper
-{
-#define VTANH_IMPL(vtype, prefix, postfix) \
- inline vtype vtanh(const vtype &a) \
- { \
- return prefix##_##postfix(a); \
- }
-
-VTANH_IMPL(float32x4_t, vtanhq, f32)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VTANH_IMPL(float16x8_t, vtanhq, f16)
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-#undef VTANH_IMPL
-} // namespace wrapper
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_WRAPPER_TANH_H */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/tbl.h b/arm_compute/core/NEON/wrapper/intrinsics/tbl.h
deleted file mode 100644
index d3d6b72e6a..0000000000
--- a/arm_compute/core/NEON/wrapper/intrinsics/tbl.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_WRAPPER_TBL_H
-#define ARM_COMPUTE_WRAPPER_TBL_H
-
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace wrapper
-{
-#define VTBL_IMPL(stype, vtype, prefix, postfix) \
- inline vtype vtbl(const stype &a, const vtype &b) \
- { \
- return prefix##_##postfix(a, b); \
- }
-
-VTBL_IMPL(uint8x8x2_t, uint8x8_t, vtbl2, u8)
-VTBL_IMPL(int8x8x2_t, int8x8_t, vtbl2, s8)
-
-#undef VTBL_IMPL
-} // namespace wrapper
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_WRAPPER_TBL_H */
diff --git a/arm_compute/core/NEON/wrapper/scalar/add.h b/arm_compute/core/NEON/wrapper/scalar/add.h
deleted file mode 100644
index 5a04fe20fa..0000000000
--- a/arm_compute/core/NEON/wrapper/scalar/add.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_WRAPPER_SCALAR_ADD_H
-#define ARM_COMPUTE_WRAPPER_SCALAR_ADD_H
-
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace wrapper
-{
-inline uint8_t add_sat(const uint8_t &a, const uint8_t &b)
-{
- const uint8x8_t va = { a, 0, 0, 0, 0, 0, 0, 0 };
- const uint8x8_t vb = { b, 0, 0, 0, 0, 0, 0, 0 };
- return vget_lane_u8(vqadd_u8(va, vb), 0);
-}
-
-inline int16_t add_sat(const int16_t &a, const int16_t &b)
-{
- const int16x4_t va = { a, 0, 0, 0 };
- const int16x4_t vb = { b, 0, 0, 0 };
- return vget_lane_s16(vqadd_s16(va, vb), 0);
-}
-
-inline float add_sat(const float &a, const float &b)
-{
- // No notion of saturation exists in floating point
- return a + b;
-}
-
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-inline float16_t add_sat(const float16_t &a, const float16_t &b)
-{
- // No notion of saturation exists in floating point
- return a + b;
-}
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-} // namespace wrapper
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_WRAPPER_SCALAR_ADD_H */
diff --git a/arm_compute/core/NEON/wrapper/scalar/scalar.h b/arm_compute/core/NEON/wrapper/scalar/scalar.h
deleted file mode 100644
index ff2d807c0e..0000000000
--- a/arm_compute/core/NEON/wrapper/scalar/scalar.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_WRAPPER_SCALAR_H
-#define ARM_COMPUTE_WRAPPER_SCALAR_H
-
-#include "arm_compute/core/NEON/wrapper/scalar/add.h"
-#include "arm_compute/core/NEON/wrapper/scalar/sub.h"
-
-#endif /* ARM_COMPUTE_WRAPPER_SCALAR_H */
diff --git a/arm_compute/core/NEON/wrapper/scalar/sub.h b/arm_compute/core/NEON/wrapper/scalar/sub.h
deleted file mode 100644
index 5b4cab93d3..0000000000
--- a/arm_compute/core/NEON/wrapper/scalar/sub.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_WRAPPER_SCALAR_SUB_H
-#define ARM_COMPUTE_WRAPPER_SCALAR_SUB_H
-
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace wrapper
-{
-inline uint8_t sub_sat(const uint8_t &a, const uint8_t &b)
-{
- const uint8x8_t va = { a, 0, 0, 0, 0, 0, 0, 0 };
- const uint8x8_t vb = { b, 0, 0, 0, 0, 0, 0, 0 };
- return vget_lane_u8(vqsub_u8(va, vb), 0);
-}
-
-inline int16_t sub_sat(const int16_t &a, const int16_t &b)
-{
- const int16x4_t va = { a, 0, 0, 0 };
- const int16x4_t vb = { b, 0, 0, 0 };
- return vget_lane_s16(vqsub_s16(va, vb), 0);
-}
-
-inline float sub_sat(const float &a, const float &b)
-{
- // No notion of saturation exists in floating point
- return a - b;
-}
-
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-inline float16_t sub_sat(const float16_t &a, const float16_t &b)
-{
- // No notion of saturation exists in floating point
- return a - b;
-}
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-} // namespace wrapper
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_WRAPPER_SCALAR_SUB_H */
diff --git a/arm_compute/core/NEON/wrapper/traits.h b/arm_compute/core/NEON/wrapper/traits.h
deleted file mode 100644
index ae77d2778c..0000000000
--- a/arm_compute/core/NEON/wrapper/traits.h
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_WRAPPER_TRAITS_H
-#define ARM_COMPUTE_WRAPPER_TRAITS_H
-
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace wrapper
-{
-namespace traits
-{
-// *INDENT-OFF*
-// clang-format off
-
-/** 64-bit vector tag */
-struct vector_64_tag {};
-/** 128-bit vector tag */
-struct vector_128_tag {};
-
-/** Create the appropriate NEON vector given its type and size in terms of elements */
-template <typename T, int S> struct neon_vector;
-
-// Specializations
-#ifndef DOXYGEN_SKIP_THIS
-template <> struct neon_vector<uint8_t, 8>{ using scalar_type = uint8_t; using type = uint8x8_t; using tag_type = vector_64_tag; };
-template <> struct neon_vector<int8_t, 8>{ using scalar_type = int8_t; using type = int8x8_t; using tag_type = vector_64_tag; };
-template <> struct neon_vector<uint8_t, 16>{ using scalar_type = uint8_t; using type = uint8x16_t; using tag_type = vector_128_tag; };
-template <> struct neon_vector<int8_t, 16>{ using scalar_type = int8_t; using type = int8x16_t; using tag_type = vector_128_tag; };
-template <> struct neon_vector<uint16_t, 4>{ using scalar_type = uint16_t; using type = uint16x4_t; using tag_type = vector_64_tag; };
-template <> struct neon_vector<int16_t, 4>{ using scalar_type = int16_t; using type = int16x4_t; using tag_type = vector_64_tag; };
-template <> struct neon_vector<uint16_t, 8>{ using scalar_type = uint16_t; using type = uint16x8_t; using tag_type = vector_128_tag; };
-template <> struct neon_vector<uint16_t, 16>{ using scalar_type = uint16_t; using type = uint16x8x2_t; };
-template <> struct neon_vector<int16_t, 8>{ using scalar_type = int16_t; using type = int16x8_t; using tag_type = vector_128_tag; };
-template <> struct neon_vector<int16_t, 16>{ using scalar_type = int16_t; using type = int16x8x2_t; };
-template <> struct neon_vector<uint32_t, 2>{ using scalar_type = uint32_t; using type = uint32x2_t; using tag_type = vector_64_tag; };
-template <> struct neon_vector<int32_t, 2>{ using scalar_type = int32_t; using type = int32x2_t; using tag_type = vector_64_tag; };
-template <> struct neon_vector<uint32_t, 4>{ using scalar_type = uint32_t; using type = uint32x4_t; using tag_type = vector_128_tag; };
-template <> struct neon_vector<int32_t, 4>{ using scalar_type = int32_t; using type = int32x4_t; using tag_type = vector_128_tag; };
-template <> struct neon_vector<uint64_t, 1>{ using scalar_type = uint64_t;using type = uint64x1_t; using tag_type = vector_64_tag; };
-template <> struct neon_vector<int64_t, 1>{ using scalar_type = int64_t; using type = int64x1_t; using tag_type = vector_64_tag; };
-template <> struct neon_vector<uint64_t, 2>{ using scalar_type = uint64_t; using type = uint64x2_t; using tag_type = vector_128_tag; };
-template <> struct neon_vector<int64_t, 2>{ using scalar_type = int64_t; using type = int64x2_t; using tag_type = vector_128_tag; };
-template <> struct neon_vector<float_t, 2>{ using scalar_type = float_t; using type = float32x2_t; using tag_type = vector_64_tag; };
-template <> struct neon_vector<float_t, 4>{ using scalar_type = float_t; using type = float32x4_t; using tag_type = vector_128_tag; };
-
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-template <> struct neon_vector<float16_t, 4>{ using scalar_type = float16_t; using type = float16x4_t; using tag_type = vector_64_tag; };
-template <> struct neon_vector<float16_t, 8>{ using scalar_type = float16_t; using type = float16x8_t; using tag_type = vector_128_tag; };
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-#endif /* DOXYGEN_SKIP_THIS */
-
-/** Helper type template to get the type of a neon vector */
-template <typename T, int S> using neon_vector_t = typename neon_vector<T, S>::type;
-/** Helper type template to get the tag type of a neon vector */
-template <typename T, int S> using neon_vector_tag_t = typename neon_vector<T, S>::tag_type;
-
-/** Vector bit-width enum class */
-enum class BitWidth
-{
- W64, /**< 64-bit width */
- W128, /**< 128-bit width */
-};
-
-/** Create the appropriate NEON vector given its type and size in terms of bits */
-template <typename T, BitWidth BW> struct neon_bitvector;
-// Specializations
-#ifndef DOXYGEN_SKIP_THIS
-template <> struct neon_bitvector<uint8_t, BitWidth::W64>{ using type = uint8x8_t; using tag_type = vector_64_tag; };
-template <> struct neon_bitvector<int8_t, BitWidth::W64>{ using type = int8x8_t; using tag_type = vector_64_tag; };
-template <> struct neon_bitvector<uint8_t, BitWidth::W128>{ using type = uint8x16_t; using tag_type = vector_128_tag; };
-template <> struct neon_bitvector<int8_t, BitWidth::W128>{ using type = int8x16_t; using tag_type = vector_128_tag; };
-template <> struct neon_bitvector<uint16_t, BitWidth::W64>{ using type = uint16x4_t; using tag_type = vector_64_tag; };
-template <> struct neon_bitvector<int16_t, BitWidth::W64>{ using type = int16x4_t; using tag_type = vector_64_tag; };
-template <> struct neon_bitvector<uint16_t, BitWidth::W128>{ using type = uint16x8_t; using tag_type = vector_128_tag; };
-template <> struct neon_bitvector<int16_t, BitWidth::W128>{ using type = int16x8_t; using tag_type = vector_128_tag; };
-template <> struct neon_bitvector<uint32_t, BitWidth::W64>{ using type = uint32x2_t; using tag_type = vector_64_tag; };
-template <> struct neon_bitvector<int32_t, BitWidth::W64>{ using type = int32x2_t; using tag_type = vector_64_tag; };
-template <> struct neon_bitvector<uint32_t, BitWidth::W128>{ using type = uint32x4_t; using tag_type = vector_128_tag; };
-template <> struct neon_bitvector<int32_t, BitWidth::W128>{ using type = int32x4_t; using tag_type = vector_128_tag; };
-template <> struct neon_bitvector<uint64_t, BitWidth::W64>{ using type = uint64x1_t; using tag_type = vector_64_tag; };
-template <> struct neon_bitvector<int64_t, BitWidth::W64>{ using type = int64x1_t; using tag_type = vector_64_tag; };
-template <> struct neon_bitvector<uint64_t, BitWidth::W128>{ using type = uint64x2_t; using tag_type = vector_128_tag; };
-template <> struct neon_bitvector<int64_t, BitWidth::W128>{ using type = int64x2_t; using tag_type = vector_128_tag; };
-template <> struct neon_bitvector<float_t, BitWidth::W64>{ using type = float32x2_t; using tag_type = vector_64_tag; };
-template <> struct neon_bitvector<float_t, BitWidth::W128>{ using type = float32x4_t; using tag_type = vector_128_tag; };
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-template <> struct neon_bitvector<float16_t, BitWidth::W64>{ using type = float16x4_t; using tag_type = vector_64_tag; };
-template <> struct neon_bitvector<float16_t, BitWidth::W128>{ using type = float16x8_t; using tag_type = vector_128_tag; };
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-#endif /* DOXYGEN_SKIP_THIS */
-
-/** Helper type template to get the type of a neon vector */
-template <typename T, BitWidth BW> using neon_bitvector_t = typename neon_bitvector<T, BW>::type;
-/** Helper type template to get the tag type of a neon vector */
-template <typename T, BitWidth BW> using neon_bitvector_tag_t = typename neon_bitvector<T, BW>::tag_type;
-
-/** Promote a type */
-template <typename T> struct promote { };
-template <> struct promote<uint8_t> { using type = uint16_t; };
-template <> struct promote<int8_t> { using type = int16_t; };
-template <> struct promote<uint16_t> { using type = uint32_t; };
-template <> struct promote<int16_t> { using type = int32_t; };
-template <> struct promote<uint32_t> { using type = uint64_t; };
-template <> struct promote<int32_t> { using type = int64_t; };
-template <> struct promote<float> { using type = float; };
-template <> struct promote<half> { using type = half; };
-
-/** Get promoted type */
-template <typename T>
-using promote_t = typename promote<T>::type;
-
-// clang-format on
-// *INDENT-ON*
-} // namespace traits
-} // namespace wrapper
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_WRAPPER_TRAITS_H */
diff --git a/arm_compute/core/NEON/wrapper/wrapper.h b/arm_compute/core/NEON/wrapper/wrapper.h
deleted file mode 100644
index 99a5909e8b..0000000000
--- a/arm_compute/core/NEON/wrapper/wrapper.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_WRAPPER_H
-#define ARM_COMPUTE_WRAPPER_H
-
-// Traits
-#include "arm_compute/core/NEON/wrapper/traits.h"
-
-// Intrinsics Overloads
-#include "arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h"
-#include "arm_compute/core/NEON/wrapper/scalar/scalar.h"
-
-#endif /* ARM_COMPUTE_WRAPPER_H */
diff --git a/arm_compute/core/PixelValue.h b/arm_compute/core/PixelValue.h
index 337ccbc3f7..0b4df4f2e2 100644
--- a/arm_compute/core/PixelValue.h
+++ b/arm_compute/core/PixelValue.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 ARM Limited.
+ * Copyright (c) 2016-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,6 +24,7 @@
#ifndef ARM_COMPUTE_PIXELVALUE_H
#define ARM_COMPUTE_PIXELVALUE_H
+#include "arm_compute/core/QuantizationInfo.h"
#include "arm_compute/core/Types.h"
#include <cstdint>
@@ -35,8 +36,7 @@ class PixelValue
{
public:
/** Default constructor: value initialized to 0 */
- PixelValue()
- : value{ int64_t(0) }
+ PixelValue() noexcept : value{int64_t(0)}
{
}
/** Initialize the union with a pixel value of chosen datatype
@@ -45,10 +45,9 @@ public:
* @param[in] datatype DataType that @p v have to be stored
* @param[in] qinfo (Optional) QuantizationInfo to apply in case of quantized data types to @p v
*/
- PixelValue(double v, DataType datatype, QuantizationInfo qinfo = QuantizationInfo())
- : PixelValue()
+ PixelValue(double v, DataType datatype, QuantizationInfo qinfo = QuantizationInfo()) : PixelValue()
{
- switch(datatype)
+ switch (datatype)
{
case DataType::U8:
value.u8 = static_cast<uint8_t>(v);
@@ -108,8 +107,7 @@ public:
*
* @param[in] v S8 value.
*/
- PixelValue(int8_t v)
- : PixelValue()
+ PixelValue(int8_t v) : PixelValue()
{
value.s8 = v;
}
@@ -117,8 +115,7 @@ public:
*
* @param[in] v U8 value.
*/
- PixelValue(uint8_t v)
- : PixelValue()
+ PixelValue(uint8_t v) : PixelValue()
{
value.u8 = v;
}
@@ -126,8 +123,7 @@ public:
*
* @param[in] v U16 value.
*/
- PixelValue(uint16_t v)
- : PixelValue()
+ PixelValue(uint16_t v) : PixelValue()
{
value.u16 = v;
}
@@ -135,8 +131,7 @@ public:
*
* @param[in] v S16 value.
*/
- PixelValue(int16_t v)
- : PixelValue()
+ PixelValue(int16_t v) : PixelValue()
{
value.s16 = v;
}
@@ -144,8 +139,7 @@ public:
*
* @param[in] v U32 value.
*/
- PixelValue(uint32_t v)
- : PixelValue()
+ PixelValue(uint32_t v) : PixelValue()
{
value.u32 = v;
}
@@ -153,8 +147,7 @@ public:
*
* @param[in] v S32 value.
*/
- PixelValue(int32_t v)
- : PixelValue()
+ PixelValue(int32_t v) : PixelValue()
{
value.s32 = v;
}
@@ -163,8 +156,7 @@ public:
*
* @param[in] v U64 value.
*/
- PixelValue(uint64_t v)
- : PixelValue()
+ PixelValue(uint64_t v) : PixelValue()
{
value.u64 = v;
}
@@ -172,8 +164,7 @@ public:
*
* @param[in] v S64 value.
*/
- PixelValue(int64_t v)
- : PixelValue()
+ PixelValue(int64_t v) : PixelValue()
{
value.s64 = v;
}
@@ -181,8 +172,7 @@ public:
*
* @param[in] v F16 value.
*/
- PixelValue(bfloat16 v)
- : PixelValue()
+ PixelValue(bfloat16 v) : PixelValue()
{
value.bf16 = v;
}
@@ -190,8 +180,7 @@ public:
*
* @param[in] v F16 value.
*/
- PixelValue(half v)
- : PixelValue()
+ PixelValue(half v) : PixelValue()
{
value.f16 = v;
}
@@ -199,8 +188,7 @@ public:
*
* @param[in] v F32 value.
*/
- PixelValue(float v)
- : PixelValue()
+ PixelValue(float v) : PixelValue()
{
value.f32 = v;
}
@@ -208,8 +196,7 @@ public:
*
* @param[in] v F64 value.
*/
- PixelValue(double v)
- : PixelValue()
+ PixelValue(double v) : PixelValue()
{
value.f64 = v;
}
@@ -217,23 +204,23 @@ public:
* Use the field corresponding to the image format
*/
union
- {
- uint64_t u64; /**< Single channel U64 */
- int64_t s64; /**< Single channel S64 */
- uint8_t rgb[3]; /**< 3 channels: RGB888 */
- uint8_t yuv[3]; /**< 3 channels: Any YUV format */
- uint8_t rgbx[4]; /**< 4 channels: RGBX8888 */
- double f64; /**< Single channel double */
- float f32; /**< Single channel float 32 */
- half f16; /**< Single channel F16 */
- bfloat16 bf16; /**< Single channel brain floating-point number */
- uint8_t u8; /**< Single channel U8 */
- int8_t s8; /**< Single channel S8 */
- uint16_t u16; /**< Single channel U16 */
- int16_t s16; /**< Single channel S16 */
- uint32_t u32; /**< Single channel U32 */
- int32_t s32; /**< Single channel S32 */
- } value;
+ {
+ uint64_t u64; /**< Single channel U64 */
+ int64_t s64; /**< Single channel S64 */
+ uint8_t rgb[3]; /**< 3 channels: RGB888 */
+ uint8_t yuv[3]; /**< 3 channels: Any YUV format */
+ uint8_t rgbx[4]; /**< 4 channels: RGBX8888 */
+ double f64; /**< Single channel double */
+ float f32; /**< Single channel float 32 */
+ half f16; /**< Single channel F16 */
+ bfloat16 bf16; /**< Single channel brain floating-point number */
+ uint8_t u8; /**< Single channel U8 */
+ int8_t s8; /**< Single channel S8 */
+ uint16_t u16; /**< Single channel U16 */
+ int16_t s16; /**< Single channel S16 */
+ uint32_t u32; /**< Single channel U32 */
+ int32_t s32; /**< Single channel S32 */
+ } value;
/** Interpret the pixel value as a U8
*
* @param[out] v Returned value
diff --git a/arm_compute/core/PyramidInfo.h b/arm_compute/core/PyramidInfo.h
deleted file mode 100644
index e8cbe3488a..0000000000
--- a/arm_compute/core/PyramidInfo.h
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_PYRAMIDINFO_H
-#define ARM_COMPUTE_PYRAMIDINFO_H
-
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-
-#include <cstddef>
-
-namespace arm_compute
-{
-/** Store the Pyramid's metadata */
-class PyramidInfo
-{
-public:
- /** Default constructor */
- PyramidInfo();
- /** Default destructor */
- virtual ~PyramidInfo() = default;
- /** Allow instances of this class to be copy constructed */
- PyramidInfo(const PyramidInfo &) = default;
- /** Allow instances of this class to be copied */
- PyramidInfo &operator=(const PyramidInfo &) = default;
- /** Allow instances of this class to be move constructed */
- PyramidInfo(PyramidInfo &&) = default;
- /** Allow instances of this class to be moved */
- PyramidInfo &operator=(PyramidInfo &&) = default;
-
- /** Create pyramid info for 2D tensors
- *
- * @param[in] num_levels The number of pyramid levels. This is required to be a non-zero value
- * @param[in] scale Used to indicate the scale between the pyramid levels.
- * This is required to be a non-zero positive value.
- * @param[in] width The width of the 2D tensor at 0th pyramid level
- * @param[in] height The height of the 2D tensor at 0th pyramid level
- * @param[in] format The format of all 2D tensors in the pyramid
- * NV12, NV21, IYUV, UYVY and YUYV formats are not supported.
- */
- PyramidInfo(size_t num_levels, float scale, size_t width, size_t height, Format format);
-
- /** Create pyramid info using TensorShape
- *
- * @param[in] num_levels The number of pyramid levels. This is required to be a non-zero value
- * @param[in] scale Used to indicate the scale between the pyramid levels.
- * This is required to be a non-zero positive value.
- * @param[in] tensor_shape It specifies the size for each dimension of the tensor 0th pyramid level in number of elements
- * @param[in] format The format of all tensors in the pyramid
- */
- PyramidInfo(size_t num_levels, float scale, const TensorShape &tensor_shape, Format format);
-
- /** Initialize pyramid's metadata for 2D tensors
- *
- * @param[in] num_levels The number of pyramid levels. This is required to be a non-zero value
- * @param[in] scale Used to indicate the scale between the pyramid levels.
- * This is required to be a non-zero positive value.
- * @param[in] width The width of the 2D tensor at 0th pyramid level
- * @param[in] height The height of the 2D tensor at 0th pyramid level
- * @param[in] format The format of all 2D tensors in the pyramid
- * NV12, NV21, IYUV, UYVY and YUYV formats are not supported.
- */
- void init(size_t num_levels, float scale, size_t width, size_t height, Format format);
- /** Initialize pyramid's metadata using TensorShape
- *
- * @param[in] num_levels The number of pyramid levels. This is required to be a non-zero value
- * @param[in] scale Used to indicate the scale between the pyramid levels.
- * This is required to be a non-zero positive value.
- * @param[in] tensor_shape It specifies the size for each dimension of the tensor 0th pyramid level in number of elements
- * @param[in] format The format of all tensors in the pyramid
- */
- void init(size_t num_levels, float scale, const TensorShape &tensor_shape, Format format);
- /** Return the number of the pyramid levels
- *
- * @return The number of the pyramid levels
- */
- size_t num_levels() const;
- /** Return the width of the 0th level tensor
- *
- * @return The width of the 0th level tensor
- */
- size_t width() const;
- /** Return the height of the 0th level tensor
- *
- * @return The height of the 0th level tensor
- */
- size_t height() const;
- /** Return the TensorShape of the o-th level tensor
- *
- * @return
- */
- const TensorShape &tensor_shape() const;
- /** Return the image format of all tensor in the pyramid
- *
- * @return The image format
- */
- Format format() const;
- /** Return the scale factor of the pyramid
- *
- * @return Return the scale factor
- */
- float scale() const;
-
-private:
- size_t _num_levels;
- TensorShape _tensor_shape;
- Format _format;
- float _scale;
-};
-}
-#endif /*ARM_COMPUTE_PYRAMIDINFO_H */
diff --git a/arm_compute/core/QuantizationInfo.h b/arm_compute/core/QuantizationInfo.h
index 52ef149e9b..aecba3712e 100644
--- a/arm_compute/core/QuantizationInfo.h
+++ b/arm_compute/core/QuantizationInfo.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 ARM Limited.
+ * Copyright (c) 2019-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,15 +21,14 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_QUANTIZATION_INFO_H
-#define ARM_COMPUTE_QUANTIZATION_INFO_H
+#ifndef ACL_ARM_COMPUTE_CORE_QUANTIZATIONINFO_H
+#define ACL_ARM_COMPUTE_CORE_QUANTIZATIONINFO_H
#include "arm_compute/core/Rounding.h"
-#include "utils/misc/Utility.h"
-#include "arm_compute/core/Error.h"
+#include "arm_compute/core/utils/misc/Utility.h"
+
+#include "support/ToolchainSupport.h"
-#include <cstddef>
-#include <type_traits>
#include <vector>
namespace arm_compute
@@ -43,8 +42,7 @@ using qasymm16_t = uint16_t; /**< 16 bit quantized asymmetric scalar value
struct UniformQuantizationInfo
{
/** Default constructor */
- UniformQuantizationInfo()
- : scale(0.f), offset(0)
+ UniformQuantizationInfo() : scale(0.f), offset(0)
{
}
/** Constructor
@@ -52,8 +50,7 @@ struct UniformQuantizationInfo
* @param[in] scale Quantization scale
* @param[in] offset Quantization offset
*/
- UniformQuantizationInfo(float scale, int32_t offset)
- : scale(scale), offset(offset)
+ UniformQuantizationInfo(float scale, int32_t offset) : scale(scale), offset(offset)
{
}
/** Checks if the scale and offset are both zero */
@@ -71,9 +68,7 @@ class QuantizationInfo
{
public:
/** Default constructor */
- QuantizationInfo() noexcept
- : _scale(),
- _offset()
+ QuantizationInfo() noexcept : _scale(), _offset()
{
}
/** Construct quantization info.
@@ -82,19 +77,19 @@ public:
*
* @param[in] scale Scale.
*/
- QuantizationInfo(float scale)
- : _scale(1, scale), _offset()
+ QuantizationInfo(float scale) : _scale(1, scale), _offset()
{
}
/** Construct quantization info.
*
* @note Used for asymmetric quantization
*
- * @param[in] scale Scale.
- * @param[in] offset Offset.
+ * @param[in] scale Scale.
+ * @param[in] offset Offset.
+ * @param[in] is_dynamic Whether this QuantizationInfo is dynamic, i.e. the scale and offset may change.
*/
- QuantizationInfo(float scale, int offset)
- : _scale(1, scale), _offset(1, offset)
+ QuantizationInfo(float scale, int offset, bool is_dynamic = false)
+ : _scale(1, scale), _offset(1, offset), _is_dynamic(is_dynamic)
{
}
/** Construct quantization info.
@@ -103,19 +98,19 @@ public:
*
* @param[in] scale Scale.
*/
- QuantizationInfo(std::vector<float> scale)
- : _scale(scale), _offset()
+ QuantizationInfo(std::vector<float> scale) : _scale(scale), _offset()
{
}
/** Construct quantization info.
*
* @note Used for asymmetric per channel quantization
*
- * @param[in] scale Scale.
- * @param[in] offset Offset.
+ * @param[in] scale Scale.
+ * @param[in] offset Offset.
+ * @param[in] is_dynamic Whether this QuantizationInfo is dynamic, i.e. the scale and offset may change.
*/
- QuantizationInfo(std::vector<float> scale, std::vector<int32_t> offset)
- : _scale(scale), _offset(offset)
+ QuantizationInfo(std::vector<float> scale, std::vector<int32_t> offset, bool is_dynamic = false)
+ : _scale(scale), _offset(offset), _is_dynamic(is_dynamic)
{
}
/** Scale vector accessor
@@ -134,6 +129,14 @@ public:
{
return _offset;
}
+ /** is_dynamic accessor
+ *
+ * @return If true, the scale and offset may change, so operators will need to read on every run
+ */
+ bool is_dynamic() const
+ {
+ return _is_dynamic;
+ }
/** Indicates whether this QuantizationInfo has valid settings or not
*
* @return True if the this has invalid settings.
@@ -158,6 +161,8 @@ public:
private:
std::vector<float> _scale; /**< Vector containing scaling factors */
std::vector<int32_t> _offset; /**< Vector containing zero offsets */
+ bool _is_dynamic =
+ false; /**< If true, the scale and offset may change, so operators will need to read on every run */
};
/** Check whether two quantization info are equal.
@@ -210,20 +215,39 @@ inline bool operator!=(const UniformQuantizationInfo &lhs, const UniformQuantiza
template <typename QUANTIZED_TYPE = uint8_t>
struct Qasymm8QuantizationHelper
{
- static_assert(std::is_same<QUANTIZED_TYPE, uint8_t>::value
- || std::is_same<QUANTIZED_TYPE, int8_t>::value,
+ static_assert(std::is_same<QUANTIZED_TYPE, uint8_t>::value || std::is_same<QUANTIZED_TYPE, int8_t>::value,
"quantized type should be either uint8_t or int8_t.");
/** Quantize a value given a 8-bit asymmetric quantization scheme
*
+ * @param[in] value Value to quantize
+ * @param[in] qinfo Quantization information to use for quantizing
+ *
+ * @return Quantized value
+ */
+ static inline QUANTIZED_TYPE quantize(float value, const UniformQuantizationInfo &qinfo)
+ {
+ ARM_COMPUTE_ERROR_ON(qinfo.scale == 0);
+ const int quantized = support::cpp11::lround(value / qinfo.scale) + qinfo.offset;
+ return static_cast<QUANTIZED_TYPE>(arm_compute::utility::clamp<decltype(quantized), QUANTIZED_TYPE>(quantized));
+ }
+
+ /** Quantize a value given a 8-bit asymmetric quantization scheme using a specific rounding policy
+ *
* @param[in] value Value to quantize
* @param[in] qinfo Quantization information to use for quantizing
- * @param[in] rounding_policy (Optional) Rounding policy to use. Default: nearest up
+ * @param[in] rounding_policy Rounding policy to use
*
* @return Quantized value
*/
- static inline QUANTIZED_TYPE quantize(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
+ static inline QUANTIZED_TYPE
+ quantize(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy)
{
+ if (rounding_policy == RoundingPolicy::TO_NEAREST_UP)
+ {
+ return quantize(value, qinfo);
+ }
+
ARM_COMPUTE_ERROR_ON(qinfo.scale == 0);
const int quantized = arm_compute::round(value / qinfo.scale, rounding_policy) + qinfo.offset;
return static_cast<QUANTIZED_TYPE>(arm_compute::utility::clamp<decltype(quantized), QUANTIZED_TYPE>(quantized));
@@ -237,7 +261,8 @@ struct Qasymm8QuantizationHelper
*
* @return Quantized value
*/
- static inline QUANTIZED_TYPE quantize(float value, const QuantizationInfo &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
+ static inline QUANTIZED_TYPE
+ quantize(float value, const QuantizationInfo &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
{
const UniformQuantizationInfo uqinfo = qinfo.uniform();
ARM_COMPUTE_ERROR_ON(uqinfo.scale == 0);
@@ -280,7 +305,8 @@ struct Qasymm8QuantizationHelper
* @return Quantized value
*/
template <typename INFO_TYPE>
-inline uint8_t quantize_qasymm8(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
+inline uint8_t
+quantize_qasymm8(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
{
return Qasymm8QuantizationHelper<uint8_t>::quantize(value, qinfo, rounding_policy);
}
@@ -294,7 +320,9 @@ inline uint8_t quantize_qasymm8(float value, const INFO_TYPE &qinfo, RoundingPol
* @return Quantized value
*/
template <typename INFO_TYPE>
-inline int8_t quantize_qasymm8_signed(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
+inline int8_t quantize_qasymm8_signed(float value,
+ const INFO_TYPE &qinfo,
+ RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
{
return Qasymm8QuantizationHelper<int8_t>::quantize(value, qinfo, rounding_policy);
}
@@ -416,6 +444,19 @@ inline float dequantize(uint16_t value, float scale, int32_t offset)
return (static_cast<int>(value) - offset) * scale;
}
+/** Dequantize a value given a 32-bit asymmetric quantization scheme
+ *
+ * @param[in] value Value to dequantize
+ * @param[in] scale Scale to use for dequantization
+ * @param[in] offset Zero-offset to use for dequantization
+ *
+ * @return Dequantized value
+ */
+inline float dequantize(int32_t value, float scale, int32_t offset)
+{
+ return (static_cast<int>(value) - offset) * scale;
+}
+
/** Quantize a value given a 16-bit symmetric quantization scheme
*
* @param[in] value Value to quantize
@@ -424,7 +465,9 @@ inline float dequantize(uint16_t value, float scale, int32_t offset)
*
* @return Quantized value
*/
-inline int16_t quantize_qsymm16(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
+inline int16_t quantize_qsymm16(float value,
+ const UniformQuantizationInfo &qinfo,
+ RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
{
int quantized = arm_compute::round(value / qinfo.scale, rounding_policy);
quantized = arm_compute::utility::clamp<int, int16_t>(quantized);
@@ -475,7 +518,9 @@ inline float dequantize_qsymm16(int16_t value, const QuantizationInfo &qinfo)
*
* @return Quantized value
*/
-inline uint16_t quantize_qasymm16(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
+inline uint16_t quantize_qasymm16(float value,
+ const UniformQuantizationInfo &qinfo,
+ RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
{
int quantized = arm_compute::round(value / qinfo.scale, rounding_policy) + qinfo.offset;
quantized = arm_compute::utility::clamp<int, uint16_t>(quantized);
@@ -518,6 +563,31 @@ inline float dequantize_qasymm16(uint16_t value, const QuantizationInfo &qinfo)
return dequantize_qasymm16(value, qinfo.uniform());
}
+/** Dequantize a value given a 32-bit asymmetric quantization scheme
+ *
+ * @param[in] value Value to dequantize
+ * @param[in] qinfo Quantization information to use for dequantizing
+ *
+ * @return Dequantized value
+ */
+inline float dequantize_s32(int32_t value, const UniformQuantizationInfo &qinfo)
+{
+ return (static_cast<int>(value) - qinfo.offset) * qinfo.scale;
+}
+
+/** Dequantize a value given a 32-bit asymmetric quantization scheme
+ *
+ * @param[in] value Value to dequantize
+ * @param[in] qinfo Quantization information to use for dequantizing
+ *
+ * @return Dequantized value
+ */
+
+inline float dequantize_s32(int32_t value, const QuantizationInfo &qinfo)
+{
+ return dequantize_s32(value, qinfo.uniform());
+}
+
/*
* In case of requantization of a quantized input tensor to an output tensor with another quantization
* instead of applying dequantization and then a quantization functions, we just compute new scale and
@@ -548,7 +618,8 @@ inline float dequantize_qasymm16(uint16_t value, const QuantizationInfo &qinfo)
* z_n = - z_i * s_i / s_o + z_o
*
*/
-inline UniformQuantizationInfo compute_requantization_scale_offset(const UniformQuantizationInfo &uqinfo_in, const UniformQuantizationInfo &uqinfo_out)
+inline UniformQuantizationInfo compute_requantization_scale_offset(const UniformQuantizationInfo &uqinfo_in,
+ const UniformQuantizationInfo &uqinfo_out)
{
float scale_to_apply = uqinfo_out.scale;
int32_t offset_to_apply = uqinfo_out.offset;
@@ -562,4 +633,4 @@ inline UniformQuantizationInfo compute_requantization_scale_offset(const Uniform
}
} // namespace arm_compute
-#endif /* ARM_COMPUTE_QUANTIZATION_INFO_H */
+#endif // ACL_ARM_COMPUTE_CORE_QUANTIZATIONINFO_H
diff --git a/arm_compute/core/Rounding.h b/arm_compute/core/Rounding.h
index 68d742907b..30a5a0fe9d 100644
--- a/arm_compute/core/Rounding.h
+++ b/arm_compute/core/Rounding.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2019 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -42,5 +42,5 @@ enum class RoundingPolicy
* @return Rounded value of the argument x.
*/
int round(float x, RoundingPolicy rounding_policy);
-}
+} // namespace arm_compute
#endif /*ARM_COMPUTE_ROUNDING_H */
diff --git a/arm_compute/core/Size2D.h b/arm_compute/core/Size2D.h
index 722d7450f6..672b392050 100644
--- a/arm_compute/core/Size2D.h
+++ b/arm_compute/core/Size2D.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 ARM Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -41,8 +41,7 @@ public:
* @param[in] w Width of the image or rectangle
* @param[in] h Height of the image or rectangle
*/
- Size2D(size_t w, size_t h)
- : width(w), height(h)
+ Size2D(size_t w, size_t h) noexcept : width(w), height(h)
{
}
/** The area of the image or rectangle calculated as (width * height)
@@ -89,5 +88,5 @@ public:
size_t width = {}; /**< Width of the image region or rectangle */
size_t height = {}; /**< Height of the image region or rectangle */
};
-}
+} // namespace arm_compute
#endif /*ARM_COMPUTE_SIZE2D_H */
diff --git a/arm_compute/core/Size3D.h b/arm_compute/core/Size3D.h
new file mode 100644
index 0000000000..e2dc6fe012
--- /dev/null
+++ b/arm_compute/core/Size3D.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_SIZE3D_H
+#define ARM_COMPUTE_SIZE3D_H
+
+#include <string>
+
+namespace arm_compute
+{
+/** Class for specifying the size of a 3D shape or object */
+class Size3D
+{
+public:
+ /** Default constructor */
+ Size3D() = default;
+ /** Constructor. Initializes "width", "height" and "depth" respectively with "w", "h" and "d"
+ *
+ * @param[in] w Width of the 3D shape or object
+ * @param[in] h Height of the 3D shape or object
+ * @param[in] d Depth of the 3D shape or object
+ */
+ Size3D(size_t w, size_t h, size_t d) noexcept : width(w), height(h), depth(d)
+ {
+ }
+
+ /** Convert the values stored to string
+ *
+ * @return string of (width x height x depth).
+ */
+ std::string to_string() const;
+
+ /** Semantic accessor for width as x.
+ *
+ * @return x.
+ */
+ size_t x() const
+ {
+ return width;
+ }
+
+ /** Semantic accessor for height as y.
+ *
+ * @return y.
+ */
+ size_t y() const
+ {
+ return height;
+ }
+
+ /** Semantic accessor for depth as z.
+ *
+ * @return z.
+ */
+ size_t z() const
+ {
+ return depth;
+ }
+
+ bool operator!=(const Size3D &other) const
+ {
+ return !(*this == other);
+ }
+
+ bool operator==(const Size3D &other) const
+ {
+ return (width == other.width) && (height == other.height) && (depth == other.depth);
+ }
+
+public:
+ size_t width = {}; /**< Width of the 3D shape or object */
+ size_t height = {}; /**< Height of the 3D shape or object */
+ size_t depth = {}; /**< Depth of the 3D shape or object */
+};
+
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_SIZE3D_H */
diff --git a/arm_compute/core/Steps.h b/arm_compute/core/Steps.h
index 6c89185a1f..6b261becc0 100644
--- a/arm_compute/core/Steps.h
+++ b/arm_compute/core/Steps.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2019 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -45,8 +45,7 @@ public:
* @param[in] steps Values to initialize the steps.
*/
template <typename... Ts>
- Steps(Ts... steps)
- : Dimensions{ steps... }
+ Steps(Ts... steps) : Dimensions{steps...}
{
// Initialize empty dimensions to 1
std::fill(_id.begin() + _num_dimensions, _id.end(), 1);
@@ -62,5 +61,5 @@ public:
/** Default destructor */
~Steps() = default;
};
-}
+} // namespace arm_compute
#endif /*ARM_COMPUTE_STEPS_H*/
diff --git a/arm_compute/core/Strides.h b/arm_compute/core/Strides.h
index a2a73377ea..627b219987 100644
--- a/arm_compute/core/Strides.h
+++ b/arm_compute/core/Strides.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2019, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -30,6 +30,7 @@
#include <algorithm>
#include <array>
#include <cstddef>
+#include <cstdint>
namespace arm_compute
{
@@ -42,8 +43,7 @@ public:
* @param[in] strides Values to initialize the strides.
*/
template <typename... Ts>
- constexpr Strides(Ts... strides)
- : Dimensions{ strides... }
+ constexpr Strides(Ts... strides) : Dimensions{strides...}
{
}
/** Allow instances of this class to be copy constructed */
diff --git a/arm_compute/core/SubTensorInfo.h b/arm_compute/core/SubTensorInfo.h
index bcb570ae7f..7a3ee2cfd0 100644
--- a/arm_compute/core/SubTensorInfo.h
+++ b/arm_compute/core/SubTensorInfo.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,14 +24,12 @@
#ifndef ARM_COMPUTE_SUBTENSORINFO_H
#define ARM_COMPUTE_SUBTENSORINFO_H
-#include "arm_compute/core/ITensorInfo.h"
-
#include "arm_compute/core/Coordinates.h"
#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/ITensorInfo.h"
#include "arm_compute/core/Strides.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Validate.h"
#include <cstddef>
#include <memory>
@@ -74,7 +72,7 @@ public:
// Inherited methods overridden:
std::unique_ptr<ITensorInfo> clone() const override;
- ITensorInfo &set_data_type(DataType data_type) override
+ ITensorInfo &set_data_type(DataType data_type) override
{
ARM_COMPUTE_ERROR_ON(_parent == nullptr);
_parent->set_data_type(data_type);
@@ -99,6 +97,7 @@ public:
return *this;
};
ITensorInfo &set_tensor_shape(const TensorShape &shape) override;
+ ITensorInfo &set_tensor_dims_state(const TensorDimsState &state) override;
ITensorInfo &set_quantization_info(const QuantizationInfo &quantization_info) override
{
ARM_COMPUTE_ERROR_ON(_parent == nullptr);
@@ -116,7 +115,13 @@ public:
ARM_COMPUTE_ERROR_ON(_parent == nullptr);
return _parent->auto_padding();
};
+
+ ITensorInfo &set_lock_paddings(bool flag) override;
+
+ bool lock_paddings() const override;
+
bool extend_padding(const PaddingSize &padding) override;
+
size_t dimension(size_t index) const override
{
return _tensor_shape[index];
@@ -137,7 +142,7 @@ public:
return _parent->offset_element_in_bytes(_coords);
}
int32_t offset_element_in_bytes(const Coordinates &pos) const override;
- size_t element_size() const override
+ size_t element_size() const override
{
ARM_COMPUTE_ERROR_ON(_parent == nullptr);
return _parent->element_size();
@@ -156,6 +161,11 @@ public:
ARM_COMPUTE_ERROR_ON(_parent == nullptr);
return _tensor_shape;
}
+ const TensorDimsState &tensor_dims_state() const override
+ {
+ ARM_COMPUTE_ERROR_ON(_parent == nullptr);
+ return _dims_state;
+ }
DataType data_type() const override
{
ARM_COMPUTE_ERROR_ON(_parent == nullptr);
@@ -191,16 +201,21 @@ public:
ARM_COMPUTE_ERROR_ON(_parent == nullptr);
return _parent->is_dynamic();
}
+ bool are_values_constant() const override
+ {
+ ARM_COMPUTE_ERROR_ON(_parent == nullptr);
+ return _parent->are_values_constant();
+ }
ITensorInfo &set_is_resizable(bool is_resizable) override
{
ARM_COMPUTE_ERROR_ON(_parent == nullptr);
_parent->set_is_resizable(is_resizable);
return *this;
}
- ITensorInfo &set_is_dynamic(bool is_dynamic) override
+ ITensorInfo &set_are_values_constant(bool are_values_constant) override
{
ARM_COMPUTE_ERROR_ON(_parent == nullptr);
- _parent->set_is_dynamic(is_dynamic);
+ _parent->set_are_values_constant(are_values_constant);
return *this;
}
ValidRegion valid_region() const override
@@ -211,7 +226,7 @@ public:
{
ARM_COMPUTE_ERROR_ON(_parent == nullptr);
// Check if subtensor is valid if parent is configured
- if(_parent->tensor_shape().total_size() != 0)
+ if (_parent->tensor_shape().total_size() != 0)
{
ARM_COMPUTE_ERROR_ON_INVALID_SUBTENSOR_VALID_REGION(_parent->valid_region(), valid_region);
}
@@ -227,13 +242,26 @@ public:
ARM_COMPUTE_ERROR_ON(_parent == nullptr);
return _parent->data_layout();
}
+ ITensorInfo::Id id() const override
+ {
+ ARM_COMPUTE_ERROR_ON(_parent == nullptr);
+ return _parent->id();
+ }
+ ITensorInfo &set_id(ITensorInfo::Id id) override
+ {
+ ARM_COMPUTE_ERROR_ON(_parent == nullptr);
+ _parent->set_id(id);
+ return *this;
+ }
private:
- ITensorInfo *_parent;
- TensorShape _tensor_shape;
- Coordinates _coords;
- ValidRegion _valid_region;
- bool _extend_parent;
+ ITensorInfo *_parent;
+ TensorShape _tensor_shape;
+ TensorDimsState _dims_state;
+ Coordinates _coords;
+ ValidRegion _valid_region;
+ bool _extend_parent;
+ bool _lock_paddings;
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_SUBTENSORINFO_H */
diff --git a/arm_compute/core/TensorInfo.h b/arm_compute/core/TensorInfo.h
index 68570d58db..b18f750427 100644
--- a/arm_compute/core/TensorInfo.h
+++ b/arm_compute/core/TensorInfo.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,23 +24,19 @@
#ifndef ARM_COMPUTE_TENSORINFO_H
#define ARM_COMPUTE_TENSORINFO_H
-#include "arm_compute/core/ITensorInfo.h"
-
-#include "ITensorInfo.h"
#include "arm_compute/core/Coordinates.h"
#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/ITensorInfo.h"
#include "arm_compute/core/Strides.h"
#include "arm_compute/core/TensorShape.h"
#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Utils.h"
+#include "ITensorInfo.h"
#include <cstddef>
#include <memory>
namespace arm_compute
{
-class HOGInfo;
-
/** Store the tensor's metadata */
class TensorInfo final : public ITensorInfo
{
@@ -52,7 +48,7 @@ public:
/** Allow instances of this class to be copy constructed */
TensorInfo(const ITensorInfo &info);
/** Allow instances of this class to be copy constructed */
- TensorInfo(const TensorInfo &) = default;
+ TensorInfo(const TensorInfo &);
/** Allow instances of this class to be copied */
TensorInfo &operator=(const TensorInfo &) = default;
/** Allow instances of this class to be move constructed */
@@ -115,15 +111,10 @@ public:
* @param[in] data_type Data type to use for each tensor element
* @param[in] quantization_info The quantization settings for the tensor data.
*/
- TensorInfo(const TensorShape &tensor_shape, size_t num_channels, DataType data_type, QuantizationInfo quantization_info);
-
- /** Constructor
- *
- * @param[in] hog_info HOG's metadata used to allocate normalized HOG space
- * @param[in] width Width of the 2D tensor where the HOG descriptor will be computed on
- * @param[in] height Height of the 2D tensor where the HOG descriptor will be computed on
- */
- TensorInfo(const HOGInfo &hog_info, unsigned int width, unsigned int height);
+ TensorInfo(const TensorShape &tensor_shape,
+ size_t num_channels,
+ DataType data_type,
+ QuantizationInfo quantization_info);
/** Initialize the tensor info with just a format.
*
@@ -147,7 +138,11 @@ public:
* @param[in] offset_first_element_in_bytes Offset in bytes from the beginning of memory allocation to access the first element.
* @param[in] total_size_in_bytes Size in bytes of the memory allocation (including the offset to the first element).
*/
- void init(const TensorShape &tensor_shape, Format format, const Strides &strides_in_bytes, size_t offset_first_element_in_bytes, size_t total_size_in_bytes);
+ void init(const TensorShape &tensor_shape,
+ Format format,
+ const Strides &strides_in_bytes,
+ size_t offset_first_element_in_bytes,
+ size_t total_size_in_bytes);
/** Initialize the tensor info with just a format.
*
@@ -175,15 +170,12 @@ public:
* @param[in] offset_first_element_in_bytes Offset in bytes from the beginning of memory allocation to access the first element.
* @param[in] total_size_in_bytes Size in bytes of the memory allocation (including the offset to the first element).
*/
- void init(const TensorShape &tensor_shape, size_t num_channels, DataType data_type, const Strides &strides_in_bytes, size_t offset_first_element_in_bytes,
- size_t total_size_in_bytes);
- /** Initialize the metadata structure for the given HOG's metadata
- *
- * @param[in] hog_info HOG's metadata used to allocate normalized HOG space
- * @param[in] width Width of the 2D tensor where the HOG descriptor will be computed on
- * @param[in] height Height of the 2D tensor where the HOG descriptor will be computed on
- */
- void init(const HOGInfo &hog_info, unsigned int width, unsigned int height);
+ void init(const TensorShape &tensor_shape,
+ size_t num_channels,
+ DataType data_type,
+ const Strides &strides_in_bytes,
+ size_t offset_first_element_in_bytes,
+ size_t total_size_in_bytes);
/** Initialize the metadata structure for the given tensor shape and single-plane format, (Padding is automatically calculated)
*
* @note The padding used by this method is really conservative so that the tensor can be used for most functions.
@@ -206,30 +198,22 @@ public:
* @return Total allocation size including padding in bytes.
*/
size_t init_auto_padding(const TensorShape &tensor_shape, size_t num_channels, DataType data_type);
- /** Initialize the metadata structure for the given HOG's metadata
- *
- * @note init_auto_padding will be used for the tensor initialization.
- *
- * @param[in] hog_info HOG's metadata used to allocate normalized HOG space
- * @param[in] width Width of the 2D tensor where the HOG descriptor will be computed on
- * @param[in] height Height of the 2D tensor where the HOG descriptor will be computed on
- *
- * @return Total allocation size including padding in bytes.
- */
- size_t init_auto_padding(const HOGInfo &hog_info, unsigned int width, unsigned int height);
// Inherited methods overridden:
std::unique_ptr<ITensorInfo> clone() const override;
- ITensorInfo &set_data_type(DataType data_type) override;
- ITensorInfo &set_num_channels(int num_channels) override;
- ITensorInfo &set_format(Format format) override;
- ITensorInfo &set_tensor_shape(const TensorShape &shape) override;
- ITensorInfo &set_quantization_info(const QuantizationInfo &quantization_info) override;
- ITensorInfo &set_data_layout(const DataLayout &data_layout) override;
- ITensorInfo &reset_padding() override;
- bool auto_padding() override;
- bool extend_padding(const PaddingSize &padding) override;
- size_t dimension(size_t index) const override
+ ITensorInfo &set_data_type(DataType data_type) override;
+ ITensorInfo &set_num_channels(int num_channels) override;
+ ITensorInfo &set_format(Format format) override;
+ ITensorInfo &set_tensor_shape(const TensorShape &shape) override;
+ ITensorInfo &set_tensor_dims_state(const TensorDimsState &state) override;
+ ITensorInfo &set_quantization_info(const QuantizationInfo &quantization_info) override;
+ ITensorInfo &set_data_layout(const DataLayout &data_layout) override;
+ ITensorInfo &reset_padding() override;
+ bool auto_padding() override;
+ ITensorInfo &set_lock_paddings(bool flag) override;
+ bool lock_paddings() const override;
+ bool extend_padding(const PaddingSize &padding) override;
+ size_t dimension(size_t index) const override
{
return _tensor_shape[index];
}
@@ -246,7 +230,7 @@ public:
return _offset_first_element_in_bytes;
}
int32_t offset_element_in_bytes(const Coordinates &pos) const override;
- size_t element_size() const override
+ size_t element_size() const override
{
return data_size_from_type(_data_type) * _num_channels;
}
@@ -262,6 +246,10 @@ public:
{
return _tensor_shape;
}
+ const TensorDimsState &tensor_dims_state() const override
+ {
+ return _dims_state;
+ }
DataType data_type() const override
{
return _data_type;
@@ -288,16 +276,16 @@ public:
}
bool is_dynamic() const override
{
- return _is_dynamic;
+ return std::find(std::cbegin(_dims_state), std::cend(_dims_state), get_dynamic_state_value()) !=
+ std::cend(_dims_state);
}
- ITensorInfo &set_is_resizable(bool is_resizable) override
+ bool are_values_constant() const override
{
- _is_resizable = is_resizable;
- return *this;
+ return _are_values_constant;
}
- ITensorInfo &set_is_dynamic(bool is_dynamic) override
+ ITensorInfo &set_is_resizable(bool is_resizable) override
{
- _is_dynamic = is_dynamic;
+ _is_resizable = is_resizable;
return *this;
}
ValidRegion valid_region() const override
@@ -316,6 +304,21 @@ public:
{
return _data_layout;
}
+ ITensorInfo &set_are_values_constant(bool are_values_constant) override
+ {
+ _are_values_constant = are_values_constant;
+ return *this;
+ }
+ ITensorInfo::Id id() const override
+ {
+ return _id;
+ }
+ ITensorInfo &set_id(ITensorInfo::Id id) override
+ {
+ _id = id;
+ return *this;
+ }
+ inline friend bool operator==(const TensorInfo &lhs, const TensorInfo &rhs);
private:
/** Calculates strides, offset and total size resulting from the specified padding around the XY plane.
@@ -329,14 +332,37 @@ private:
Strides _strides_in_bytes;
size_t _num_channels;
TensorShape _tensor_shape;
+ TensorDimsState _dims_state;
DataType _data_type;
Format _format;
bool _is_resizable;
- bool _is_dynamic;
ValidRegion _valid_region;
PaddingSize _padding;
QuantizationInfo _quantization_info;
DataLayout _data_layout;
+ bool _are_values_constant;
+ ITensorInfo::Id _id;
+ bool _lock_paddings;
};
+
+/** Check whether two tensor info are equal.
+ *
+ * @param[in] lhs LHS tensor info.
+ * @param[in] rhs RHS tensor info.
+ *
+ * @return True if the given tensor infos are the same.
+ */
+inline bool operator==(const TensorInfo &lhs, const TensorInfo &rhs)
+{
+ return (lhs._total_size == rhs._total_size) &&
+ (lhs._offset_first_element_in_bytes == rhs._offset_first_element_in_bytes) &&
+ (lhs._strides_in_bytes == rhs._strides_in_bytes) && (lhs._num_channels == rhs._num_channels) &&
+ (lhs._tensor_shape == rhs._tensor_shape) && (lhs._dims_state == rhs._dims_state) &&
+ (lhs._data_type == rhs._data_type) && (lhs._format == rhs._format) &&
+ (lhs._is_resizable == rhs._is_resizable) && (lhs._valid_region == rhs._valid_region) &&
+ (lhs._padding == rhs._padding) && (lhs._quantization_info == rhs._quantization_info) &&
+ (lhs._data_layout == rhs._data_layout) && (lhs._are_values_constant == rhs._are_values_constant) &&
+ (lhs._id == rhs._id);
+}
} // namespace arm_compute
#endif /*ARM_COMPUTE_TENSORINFO_H */
diff --git a/arm_compute/core/TensorShape.h b/arm_compute/core/TensorShape.h
index 57d8f6cf63..c1707e262f 100644
--- a/arm_compute/core/TensorShape.h
+++ b/arm_compute/core/TensorShape.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -36,7 +36,7 @@
namespace arm_compute
{
/** Shape of a tensor */
-class TensorShape : public Dimensions<uint32_t>
+class TensorShape : public Dimensions<size_t>
{
public:
/** Constructor to initialize the tensor shape.
@@ -44,11 +44,10 @@ public:
* @param[in] dims Values to initialize the dimensions.
*/
template <typename... Ts>
- TensorShape(Ts... dims)
- : Dimensions{ dims... }
+ TensorShape(Ts... dims) : Dimensions{dims...}
{
// Initialize unspecified dimensions to 1
- if(_num_dimensions > 0)
+ if (_num_dimensions > 0)
{
std::fill(_id.begin() + _num_dimensions, _id.end(), 1);
}
@@ -71,14 +70,15 @@ public:
*
* @param[in] dimension Dimension for which the value is set.
* @param[in] value Value to be set for the dimension.
- * @param[in] apply_dim_correction Flag to state whether apply dimension correction after setting one dimension. E.g. when permuting NCHW -> NHWC, 1x1x2 would become 2x1x1, but _num_dimensions should be 3 rather than 1.
+ * @param[in] apply_dim_correction (Optional) Flag to state whether apply dimension correction after setting one dimension. E.g. when permuting NCHW -> NHWC, 1x1x2 would become 2x1x1, but _num_dimensions should be 3 rather than 1.
+ * @param[in] increase_dim_unit (Optional) Set to true if new unit dimensions increase the number of dimensions of the shape.
*
* @return *this.
*/
- TensorShape &set(size_t dimension, size_t value, bool apply_dim_correction = true)
+ TensorShape &set(size_t dimension, size_t value, bool apply_dim_correction = true, bool increase_dim_unit = true)
{
// Clear entire shape if one dimension is zero
- if(value == 0)
+ if (value == 0)
{
_num_dimensions = 0;
std::fill(_id.begin(), _id.end(), 0);
@@ -90,10 +90,10 @@ public:
// Set the specified dimension and increase the number of dimensions if
// necessary
- Dimensions::set(dimension, value);
+ Dimensions::set(dimension, value, increase_dim_unit);
// Correct number dimensions to ignore trailing dimensions of size 1
- if(apply_dim_correction)
+ if (apply_dim_correction)
{
apply_dimension_correction();
}
@@ -105,9 +105,10 @@ public:
*
* @note The upper dimensions of the tensor shape will be shifted down by 1
*
- * @param[in] n Dimension to remove
+ * @param[in] n Dimension to remove
+ * @param[in] apply_dim_correction (Optional) Flag to state whether apply dimension correction (removing trailing dimensions with size of 1) after removing a dimension.
*/
- void remove_dimension(size_t n)
+ void remove_dimension(size_t n, bool apply_dim_correction = true)
{
ARM_COMPUTE_ERROR_ON(_num_dimensions < 1);
ARM_COMPUTE_ERROR_ON(n >= _num_dimensions);
@@ -121,7 +122,10 @@ public:
std::fill(_id.begin() + _num_dimensions, _id.end(), 1);
// Correct number dimensions to ignore trailing dimensions of size 1
- apply_dimension_correction();
+ if (apply_dim_correction)
+ {
+ apply_dimension_correction();
+ }
}
/** Collapse the first n dimensions.
@@ -207,26 +211,26 @@ public:
* @return The broadcasted shape or an empty shape if the shapes are not broadcast compatible.
*/
template <typename... Shapes>
- static TensorShape broadcast_shape(const Shapes &... shapes)
+ static TensorShape broadcast_shape(const Shapes &...shapes)
{
TensorShape bc_shape;
- auto broadcast = [&bc_shape](const TensorShape & other)
+ auto broadcast = [&bc_shape](const TensorShape &other)
{
- if(bc_shape.num_dimensions() == 0)
+ if (bc_shape.num_dimensions() == 0)
{
bc_shape = other;
}
- else if(other.num_dimensions() != 0)
+ else if (other.num_dimensions() != 0)
{
- for(size_t d = 0; d < TensorShape::num_max_dimensions; ++d)
+ for (size_t d = 0; d < TensorShape::num_max_dimensions; ++d)
{
const size_t dim_min = std::min(bc_shape[d], other[d]);
const size_t dim_max = std::max(bc_shape[d], other[d]);
- if((dim_min != 1) && (dim_min != dim_max))
+ if ((dim_min != 1) && (dim_min != dim_max))
{
- bc_shape = TensorShape{ 0U };
+ bc_shape = TensorShape{0U};
break;
}
@@ -244,9 +248,9 @@ private:
/** Remove trailing dimensions of size 1 from the reported number of dimensions. */
void apply_dimension_correction()
{
- for(int i = static_cast<int>(_num_dimensions) - 1; i > 0; --i)
+ for (int i = static_cast<int>(_num_dimensions) - 1; i > 0; --i)
{
- if(_id[i] == 1)
+ if (_id[i] == 1)
{
--_num_dimensions;
}
@@ -257,5 +261,5 @@ private:
}
}
};
-}
+} // namespace arm_compute
#endif /*ARM_COMPUTE_TENSORSHAPE_H*/
diff --git a/arm_compute/core/TracePoint.h b/arm_compute/core/TracePoint.h
deleted file mode 100644
index 6951d6d5ef..0000000000
--- a/arm_compute/core/TracePoint.h
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Copyright (c) 2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_TRACEPOINT_H
-#define ARM_COMPUTE_TRACEPOINT_H
-
-#include <string>
-#include <type_traits>
-#include <vector>
-
-namespace arm_compute
-{
-#ifdef ARM_COMPUTE_TRACING_ENABLED
-#define CREATE_TRACEPOINT(...) TracePoint __tp(__VA_ARGS__)
-
-/** Class used to dump configuration values in functions and kernels */
-class TracePoint final
-{
-public:
- /** Layer types */
- enum class Layer
- {
- CORE,
- RUNTIME
- };
- /** struct describing the arguments for a tracepoint */
- struct Args final
- {
- std::vector<std::string> args{};
- };
- /** Constructor
- *
- * @param[in] source type of layer for the tracepoint
- * @param[in] class_name the name of the class creating the tracepoint
- * @param[in] object a pointer to the actual object owning the tracepoint
- * @param[in] args a struct describing all the arguments used in the call to the configure() method
- *
- */
- TracePoint(Layer source, const std::string &class_name, void *object, Args &&args);
- /** Destructor */
- ~TracePoint();
-
-private:
- static int g_depth; /**< current depth */
- int _depth; /**< tracepoint depth */
-};
-
-/** Operator to write an argument to a @ref TracePoint
- *
- * @param[in] tp Tracepoint to be used for writing
- * @param[in] arg Argument to be written in the tracepoint
- *
- * @return A referece to the updated tracepoint
- */
-template <typename T>
-TracePoint::Args &&operator<<(typename std::enable_if < !std::is_pointer<T>::value, TracePoint::Args >::type &&tp, const T &arg);
-template <typename T>
-TracePoint::Args &&operator<<(TracePoint::Args &&tp, const T *arg);
-
-#define CONST_REF_CLASS(type) \
- template <> \
- TracePoint::Args &&operator<<(TracePoint::Args &&tp, const type &arg) \
- { \
- ARM_COMPUTE_UNUSED(tp); \
- tp.args.push_back(#type "(" + to_string(arg) + ")"); \
- return std::move(tp); \
- }
-
-#define CONST_PTR_ADDRESS(type) \
- template <> \
- TracePoint::Args &&operator<<(TracePoint::Args &&tp, const type *arg) \
- { \
- ARM_COMPUTE_UNUSED(tp); \
- tp.args.push_back(#type "*(" + to_ptr_string(arg) + ")"); \
- return std::move(tp); \
- }
-#define CONST_PTR_CLASS(type) \
- template <> \
- TracePoint::Args &&operator<<(TracePoint::Args &&tp, const type *arg) \
- { \
- ARM_COMPUTE_UNUSED(tp); \
- if(arg) \
- tp.args.push_back(#type "(" + to_string(*arg) + ")"); \
- else \
- tp.args.push_back(#type "( nullptr )"); \
- return std::move(tp); \
- }
-
-#define CONST_REF_SIMPLE(type) \
- template <> \
- TracePoint::Args &&operator<<(TracePoint::Args &&tp, const type &arg) \
- { \
- ARM_COMPUTE_UNUSED(tp); \
- tp.args.push_back(#type "(" + support::cpp11::to_string(arg) + ")"); \
- return std::move(tp); \
- }
-
-#define TRACE_TO_STRING(type) \
- std::string to_string(const type &arg) \
- { \
- ARM_COMPUTE_UNUSED(arg); \
- return ""; \
- }
-#else /* ARM_COMPUTE_TRACING_ENABLED */
-#define CREATE_TRACEPOINT(...)
-#define CONST_REF_CLASS(type)
-#define CONST_PTR_ADDRESS(type)
-#define CONST_PTR_CLASS(type)
-#define CONST_REF_SIMPLE(type)
-#define TRACE_TO_STRING(type)
-#endif /* ARM_COMPUTE_TRACING_ENABLED */
-} //namespace arm_compute
-
-#endif /* ARM_COMPUTE_TRACEPOINT_H */
diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h
index 4e73edba4b..f2f60c150e 100644
--- a/arm_compute/core/Types.h
+++ b/arm_compute/core/Types.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 ARM Limited.
+ * Copyright (c) 2016-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,17 +21,52 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_TYPES_H
-#define ARM_COMPUTE_TYPES_H
-
+#ifndef ACL_ARM_COMPUTE_CORE_TYPES_H
+#define ACL_ARM_COMPUTE_CORE_TYPES_H
+
+/** The following symbols have been moved to:
+ * half
+ * PermutationVector
+ * Format
+ * DataType
+ * DataLayout
+ * DataLayoutDimension
+ * PadStrideInfo
+ * WeightFormat
+ * Channel
+ * DimensionRoundingType
+ */
+#include "arm_compute/core/CoreTypes.h"
+/** The following symbols have been moved to:
+ * ActivationFunction
+ * ActivationLayerInfo
+ */
+#include "arm_compute/function_info/ActivationLayerInfo.h"
+/** The following symbols have been moved to:
+ * ConvolutionInfo
+ */
+#include "arm_compute/function_info/ConvolutionInfo.h"
+/** The following symbols have been moved to:
+ * FullyConnectedLayerInfo
+ */
+#include "arm_compute/function_info/FullyConnectedLayerInfo.h"
+/** The following symbols have been moved to:
+ * GEMMLowpOutputStageType
+ * GEMMLowpOutputStageInfo
+ * GEMMInfo
+ */
+#include "arm_compute/function_info/GEMMInfo.h"
+/** The following symbols have been moved to:
+ * MatMulInfo
+ */
#include "arm_compute/core/Coordinates.h"
-#include "arm_compute/core/QuantizationInfo.h"
#include "arm_compute/core/Size2D.h"
-#include "arm_compute/core/Strides.h"
+#include "arm_compute/core/Size3D.h"
#include "arm_compute/core/TensorShape.h"
#include "arm_compute/core/utils/misc/Macros.h"
+#include "arm_compute/function_info/MatMulInfo.h"
+
#include "support/Bfloat16.h"
-#include "support/Half.h"
#include <cmath>
#include <cstddef>
@@ -42,62 +77,9 @@
namespace arm_compute
{
-/** 16-bit floating point type */
-using half = half_float::half;
-
-/** Permutation vector */
-using PermutationVector = Strides;
/** Bidirectional strides */
using BiStrides = Coordinates;
-/** Image colour formats */
-enum class Format
-{
- UNKNOWN, /**< Unknown image format */
- U8, /**< 1 channel, 1 U8 per channel */
- S16, /**< 1 channel, 1 S16 per channel */
- U16, /**< 1 channel, 1 U16 per channel */
- S32, /**< 1 channel, 1 S32 per channel */
- U32, /**< 1 channel, 1 U32 per channel */
- BFLOAT16, /**< 16-bit brain floating-point number */
- F16, /**< 1 channel, 1 F16 per channel */
- F32, /**< 1 channel, 1 F32 per channel */
- UV88, /**< 2 channel, 1 U8 per channel */
- RGB888, /**< 3 channels, 1 U8 per channel */
- RGBA8888, /**< 4 channels, 1 U8 per channel */
- YUV444, /**< A 3 plane of 8 bit 4:4:4 sampled Y, U, V planes */
- YUYV422, /**< A single plane of 32-bit macro pixel of Y0, U0, Y1, V0 bytes */
- NV12, /**< A 2 plane YUV format of Luma (Y) and interleaved UV data at 4:2:0 sampling */
- NV21, /**< A 2 plane YUV format of Luma (Y) and interleaved VU data at 4:2:0 sampling */
- IYUV, /**< A 3 plane of 8-bit 4:2:0 sampled Y, U, V planes */
- UYVY422 /**< A single plane of 32-bit macro pixel of U0, Y0, V0, Y1 byte */
-};
-
-/** Available data types */
-enum class DataType
-{
- UNKNOWN, /**< Unknown data type */
- U8, /**< unsigned 8-bit number */
- S8, /**< signed 8-bit number */
- QSYMM8, /**< quantized, symmetric fixed-point 8-bit number */
- QASYMM8, /**< quantized, asymmetric fixed-point 8-bit number unsigned */
- QASYMM8_SIGNED, /**< quantized, asymmetric fixed-point 8-bit number signed */
- QSYMM8_PER_CHANNEL, /**< quantized, symmetric per channel fixed-point 8-bit number */
- U16, /**< unsigned 16-bit number */
- S16, /**< signed 16-bit number */
- QSYMM16, /**< quantized, symmetric fixed-point 16-bit number */
- QASYMM16, /**< quantized, asymmetric fixed-point 16-bit number */
- U32, /**< unsigned 32-bit number */
- S32, /**< signed 32-bit number */
- U64, /**< unsigned 64-bit number */
- S64, /**< signed 64-bit number */
- BFLOAT16, /**< 16-bit brain floating-point number */
- F16, /**< 16-bit floating-point number */
- F32, /**< 32-bit floating-point number */
- F64, /**< 64-bit floating-point number */
- SIZET /**< size_t */
-};
-
/** Available Sampling Policies */
enum class SamplingPolicy
{
@@ -105,42 +87,15 @@ enum class SamplingPolicy
TOP_LEFT /**< Samples are taken at pixel top left corner */
};
-/** Constant value of the border pixels when using BorderMode::CONSTANT */
-constexpr uint8_t CONSTANT_BORDER_VALUE = 199;
-
-/** Constant value used to indicate a half-scale pyramid */
-constexpr float SCALE_PYRAMID_HALF = 0.5f;
-
-/** Constant value used to indicate a ORB scaled pyramid */
-constexpr float SCALE_PYRAMID_ORB = 8.408964152537146130583778358414e-01;
-
-/** [DataLayout enum definition] **/
-
-/** Supported tensor data layouts */
-enum class DataLayout
-{
- UNKNOWN, /**< Unknown data layout */
- NCHW, /**< Num samples, channels, height, width */
- NHWC /**< Num samples, height, width, channels */
-};
-/** [DataLayout enum definition] **/
-
-/** Supported tensor data layout dimensions */
-enum class DataLayoutDimension
-{
- CHANNEL, /**< channel */
- HEIGHT, /**< height */
- WIDTH, /**< width */
- BATCHES /**< batches */
-};
-
/** Available ConvolutionMethod*/
enum class ConvolutionMethod
{
- GEMM, /**< Convolution using GEMM */
- DIRECT, /**< Direct convolution */
- WINOGRAD, /**< Convolution using Winograd */
- FFT /**< Convolution using FFT */
+ GEMM, /**< Convolution using GEMM */
+ GEMM_CONV2D, /**< Direct 2D GEMM convolution */
+ DIRECT, /**< Direct convolution */
+ INDIRECT, /**< Indirect convolution */
+ WINOGRAD, /**< Convolution using Winograd */
+ FFT /**< Convolution using FFT */
};
/** Available DepthwiseConvolutionFunction*/
@@ -153,8 +108,9 @@ enum class DepthwiseConvolutionFunction
/** Available DeconvolutionMethod*/
enum class DeconvolutionMethod
{
- GEMM, /**< Deconvolution using GEMM */
- DIRECT, /**< Direct deconvolution */
+ GEMM, /**< Deconvolution using GEMM */
+ DIRECT, /**< Direct deconvolution */
+ UPSCALE_CONV2D /**< Deconvolution with Upscaling */
};
/** Available FuseBatchNormalizationType*/
@@ -187,8 +143,7 @@ enum class ComparisonOperation
struct ValidRegion
{
/** Default constructor */
- ValidRegion()
- : anchor{}, shape{}
+ ValidRegion() : anchor{}, shape{}
{
}
@@ -209,8 +164,7 @@ struct ValidRegion
* @param[in] a_shape Shape of the valid region.
*
*/
- ValidRegion(const Coordinates &an_anchor, const TensorShape &a_shape)
- : anchor{ an_anchor }, shape{ a_shape }
+ ValidRegion(const Coordinates &an_anchor, const TensorShape &a_shape) : anchor{an_anchor}, shape{a_shape}
{
anchor.set_num_dimensions(std::max(anchor.num_dimensions(), shape.num_dimensions()));
}
@@ -223,7 +177,7 @@ struct ValidRegion
*
*/
ValidRegion(const Coordinates &an_anchor, const TensorShape &a_shape, size_t num_dimensions)
- : anchor{ an_anchor }, shape{ a_shape }
+ : anchor{an_anchor}, shape{a_shape}
{
ARM_COMPUTE_ERROR_ON(num_dimensions < std::max(anchor.num_dimensions(), shape.num_dimensions()));
anchor.set_num_dimensions(num_dimensions);
@@ -256,9 +210,22 @@ struct ValidRegion
return *this;
}
+ /** Check whether two valid regions are equal.
+ *
+ * @param[in] lhs LHS valid region
+ * @param[in] rhs RHS valid region
+ *
+ * @return True if the valid regions are the same.
+ */
+ inline friend bool operator==(const ValidRegion &lhs, const ValidRegion &rhs);
+
Coordinates anchor; /**< Anchor for the start of the valid region. */
TensorShape shape; /**< Shape of the valid region. */
};
+inline bool operator==(const ValidRegion &lhs, const ValidRegion &rhs)
+{
+ return (lhs.anchor == rhs.anchor) && (lhs.shape == rhs.shape);
+}
/** Methods available to handle borders */
enum class BorderMode
@@ -272,26 +239,24 @@ enum class BorderMode
struct BorderSize
{
/** Empty border, i.e. no border */
- constexpr BorderSize()
- : top{ 0 }, right{ 0 }, bottom{ 0 }, left{ 0 }
+ constexpr BorderSize() noexcept : top{0}, right{0}, bottom{0}, left{0}
{
}
/** Border with equal size around the 2D plane */
- explicit constexpr BorderSize(unsigned int size)
- : top{ size }, right{ size }, bottom{ size }, left{ size }
+ explicit constexpr BorderSize(unsigned int size) noexcept : top{size}, right{size}, bottom{size}, left{size}
{
}
/** Border with same size for top/bottom and left/right */
constexpr BorderSize(unsigned int top_bottom, unsigned int left_right)
- : top{ top_bottom }, right{ left_right }, bottom{ top_bottom }, left{ left_right }
+ : top{top_bottom}, right{left_right}, bottom{top_bottom}, left{left_right}
{
}
/** Border with different sizes */
constexpr BorderSize(unsigned int top, unsigned int right, unsigned int bottom, unsigned int left)
- : top{ top }, right{ right }, bottom{ bottom }, left{ left }
+ : top{top}, right{right}, bottom{bottom}, left{left}
{
}
@@ -337,6 +302,28 @@ struct BorderSize
return size;
}
+ /** Check equality with another BorderSize struct
+ *
+ * @param[in] rhs other struct to check against
+ *
+ * @return true if they are equal
+ */
+ bool operator==(const BorderSize &rhs) const
+ {
+ return (top == rhs.top) && (right == rhs.right) && (bottom == rhs.bottom) && (left == rhs.left);
+ }
+
+ /** Check non-equality with another BorderSize struct
+ *
+ * @param[in] rhs other struct to check against
+ *
+ * @return true if they are different
+ */
+ bool operator!=(const BorderSize &rhs) const
+ {
+ return !(*this == rhs);
+ }
+
/** Limit this border size.
*
* @param[in] limit Border size to limit this border size to.
@@ -358,7 +345,11 @@ struct BorderSize
/** Container for 2D padding size */
using PaddingSize = BorderSize;
-/** Policy to handle overflow */
+/** Policy to handle integer overflow
+ * @note: This is ignored by floating point operations where the overflow behavior adheres to the IEEE-754 standard
+ * which states that in case of overflow ±infinity is returned for the round-to-nearest modes (and follows the
+ * rounding rules for the directed rounding modes) by default.
+ */
enum class ConvertPolicy
{
WRAP, /**< Wrap around */
@@ -370,7 +361,7 @@ enum class InterpolationPolicy
{
NEAREST_NEIGHBOR, /**< Output values are defined to match the source pixel whose center is nearest to the sample position */
BILINEAR, /**< Output values are defined by bilinear interpolation between the pixels */
- AREA, /**< Output values are determined by averaging the source pixels whose areas fall under the area of the destination pixel, projected onto the source image */
+ AREA, /**< Output values are determined by averaging the source pixels whose areas fall under the area of the destination pixel, projected onto the source image */
};
/** Bilinear Interpolation method used by LKTracker */
@@ -380,53 +371,6 @@ enum class BilinearInterpolation
BILINEAR_SCHARR /**< Scharr method */
};
-/** Threshold mode */
-enum class ThresholdType
-{
- BINARY, /**< Threshold with one value */
- RANGE /**< Threshold with two values*/
-};
-
-/** Termination criteria */
-enum class Termination
-{
- TERM_CRITERIA_EPSILON, /**< Terminate when within epsilon of a threshold */
- TERM_CRITERIA_ITERATIONS, /**< Terminate after a maximum number of iterations */
- TERM_CRITERIA_BOTH /**< Terminate on whichever of the other conditions occurs first */
-};
-
-/** Magnitude calculation type. */
-enum class MagnitudeType
-{
- L1NORM, /**< L1 normalization type */
- L2NORM /**< L2 normalization type */
-};
-
-/** Phase calculation type.
- *
- * @note When PhaseType == SIGNED, each angle is mapped to the range 0 to 255 inclusive otherwise angles between 0 and 180
- */
-enum class PhaseType
-{
- SIGNED, /**< Angle range: [0, 360] */
- UNSIGNED /**< Angle range: [0, 180] */
-};
-
-/** Keypoint type */
-struct KeyPoint
-{
- int32_t x{ 0 }; /**< X coordinates */
- int32_t y{ 0 }; /**< Y coordinates */
- float strength{ 0.f }; /**< Strength of the point */
- float scale{ 0.f }; /**< Scale initialized to 0 by the corner detector */
- float orientation{ 0.f }; /**< Orientation initialized to 0 by the corner detector */
- int32_t tracking_status{ 0 }; /**< Status initialized to 1 by the corner detector, set to 0 when the point is lost */
- float error{ 0.f }; /**< Tracking error initialized to 0 by the corner detector */
-};
-
-/** Internal key point */
-using InternalKeypoint = std::tuple<float, float, float>; /* x,y,strength */
-
/** Rectangle type */
struct Rectangle
{
@@ -460,40 +404,6 @@ using PaddingList = std::vector<PaddingInfo>;
/** Information to produce a tiled version of a Tensor */
using Multiples = std::vector<uint32_t>;
-/** Available channels */
-enum class Channel
-{
- UNKNOWN, /** Unknown channel format */
- C0, /**< First channel (used by formats with unknown channel types). */
- C1, /**< Second channel (used by formats with unknown channel types). */
- C2, /**< Third channel (used by formats with unknown channel types). */
- C3, /**< Fourth channel (used by formats with unknown channel types). */
- R, /**< Red channel. */
- G, /**< Green channel. */
- B, /**< Blue channel. */
- A, /**< Alpha channel. */
- Y, /**< Luma channel. */
- U, /**< Cb/U channel. */
- V /**< Cr/V/Value channel. */
-};
-
-/** Available matrix patterns */
-enum class MatrixPattern
-{
- BOX, /**< Box pattern matrix. */
- CROSS, /**< Cross pattern matrix. */
- DISK, /**< Disk pattern matrix. */
- OTHER /**< Any other matrix pattern. */
-};
-
-/** Available non linear functions. */
-enum class NonLinearFilterFunction : unsigned
-{
- MEDIAN = 0, /**< Non linear median filter. */
- MIN = 1, /**< Non linear erode. */
- MAX = 2, /**< Non linear dilate. */
-};
-
/** Available reduction operations */
enum class ReductionOperation
{
@@ -523,13 +433,23 @@ enum class ArithmeticOperation
/** Available element wise unary operations */
enum class ElementWiseUnary
{
- RSQRT, /**< Reverse square root */
- EXP, /**< Exponential */
- NEG, /**< Negate */
- LOG, /**< Natural Logarithm */
- ABS, /**< Absolute value */
- SIN, /**< Sine */
- ROUND, /**< Round */
+ RSQRT, /**< Reverse square root */
+ EXP, /**< Exponential */
+ NEG, /**< Negate */
+ LOG, /**< Natural Logarithm */
+ ABS, /**< Absolute value */
+ SIN, /**< Sine */
+ ROUND, /**< Round */
+ LOGICAL_NOT, /**< Logical Not */
+};
+
+/** Available bitwise operations */
+enum class BitwiseOperation
+{
+ AND, /**< Bitwise AND operation */
+ NOT, /**< Bitwise NOT operation */
+ OR, /**< Bitwise OR operation */
+ XOR, /**< Bitwise XOR operation */
};
/** The normalization type used for the normalization layer */
@@ -540,14 +460,6 @@ enum class NormType
CROSS_MAP /**< Normalization applied cross maps */
};
-/** Normalization type for Histogram of Oriented Gradients (HOG) */
-enum class HOGNormType
-{
- L2_NORM = 1, /**< L2-norm */
- L2HYS_NORM = 2, /**< L2-norm followed by clipping */
- L1_NORM = 3 /**< L1 norm */
-};
-
/** Detection window used for the object detection. The detection window keeps the following information:
*
* -# Geometry of the rectangular window (x/y of top-left corner and width/height)
@@ -556,21 +468,12 @@ enum class HOGNormType
*/
struct DetectionWindow
{
- uint16_t x{ 0 }; /**< Top-left x coordinate */
- uint16_t y{ 0 }; /**< Top-left y coordinate */
- uint16_t width{ 0 }; /**< Width of the detection window */
- uint16_t height{ 0 }; /**< Height of the detection window */
- uint16_t idx_class{ 0 }; /**< Index of the class */
- float score{ 0.f }; /**< Confidence value for the detection window */
-};
-
-/** Dimension rounding type when down-scaling on CNNs
- * @note Used in pooling and convolution layer
- */
-enum class DimensionRoundingType
-{
- FLOOR, /**< Floor rounding */
- CEIL /**< Ceil rounding */
+ uint16_t x{0}; /**< Top-left x coordinate */
+ uint16_t y{0}; /**< Top-left y coordinate */
+ uint16_t width{0}; /**< Width of the detection window */
+ uint16_t height{0}; /**< Height of the detection window */
+ uint16_t idx_class{0}; /**< Index of the class */
+ float score{0.f}; /**< Confidence value for the detection window */
};
/** Available pooling types */
@@ -607,12 +510,28 @@ public:
* @param[in] im_width (Optional) Boxes whose centers (on the x axis) is beyond im_width will be filtered. Defaults to 1
* @param[in] im_height (Optional) Boxes whose centers (on the y axis) is beyond im_height will be filtered. Defaults to 1
*/
- BoxNMSLimitInfo(float score_thresh = 0.05f, float nms = 0.3f,
- int detections = 100, bool soft_nms_enabled = false,
- NMSType soft_nms_method = NMSType::LINEAR,
- float soft_nms_sigma = 0.5f, float soft_nms_min_score_thres = 0.001f, bool suppress_size = false, float min_size = 1.0f, float im_width = 1.0f, float im_height = 1.0f)
- : _score_thresh(score_thresh), _nms(nms), _detections_per_im(detections), _soft_nms_enabled(soft_nms_enabled), _soft_nms_method(soft_nms_method), _soft_nms_sigma(soft_nms_sigma),
- _soft_nms_min_score_thres(soft_nms_min_score_thres), _suppress_size(suppress_size), _min_size(min_size), _im_width(im_width), _im_height(im_height)
+ BoxNMSLimitInfo(float score_thresh = 0.05f,
+ float nms = 0.3f,
+ int detections = 100,
+ bool soft_nms_enabled = false,
+ NMSType soft_nms_method = NMSType::LINEAR,
+ float soft_nms_sigma = 0.5f,
+ float soft_nms_min_score_thres = 0.001f,
+ bool suppress_size = false,
+ float min_size = 1.0f,
+ float im_width = 1.0f,
+ float im_height = 1.0f)
+ : _score_thresh(score_thresh),
+ _nms(nms),
+ _detections_per_im(detections),
+ _soft_nms_enabled(soft_nms_enabled),
+ _soft_nms_method(soft_nms_method),
+ _soft_nms_sigma(soft_nms_sigma),
+ _soft_nms_min_score_thres(soft_nms_min_score_thres),
+ _suppress_size(suppress_size),
+ _min_size(min_size),
+ _im_width(im_width),
+ _im_height(im_height)
{
}
/** Get the score threshold */
@@ -686,120 +605,42 @@ private:
};
/** Padding and stride information class */
-class PadStrideInfo
+/** Padding information for 2D operations like Conv2d */
+struct Padding2D
{
-public:
- /** Constructor
- *
- * @param[in] stride_x (Optional) Stride, in elements, across x. Defaults to 1.
- * @param[in] stride_y (Optional) Stride, in elements, across y. Defaults to 1.
- * @param[in] pad_x (Optional) Padding, in elements, across x. Defaults to 0.
- * @param[in] pad_y (Optional) Padding, in elements, across y. Defaults to 0.
- * @param[in] round (Optional) Dimensions rounding. Defaults to @ref FLOOR.
- */
- PadStrideInfo(unsigned int stride_x = 1, unsigned int stride_y = 1,
- unsigned int pad_x = 0, unsigned int pad_y = 0,
- DimensionRoundingType round = DimensionRoundingType::FLOOR)
- : _stride(std::make_pair(stride_x, stride_y)),
- _pad_left(pad_x),
- _pad_top(pad_y),
- _pad_right(pad_x),
- _pad_bottom(pad_y),
- _round_type(round)
+ Padding2D() = default;
+ Padding2D(size_t left, size_t right, size_t top, size_t bottom) : left(left), right(right), top(top), bottom(bottom)
{
}
- /** Constructor
- *
- * @param[in] stride_x Stride, in elements, across x.
- * @param[in] stride_y Stride, in elements, across y.
- * @param[in] pad_left Padding across x on the left, in elements.
- * @param[in] pad_top Padding across y on the top, in elements.
- * @param[in] pad_right Padding across x on the right, in elements.
- * @param[in] pad_bottom Padding across y on the bottom, in elements.
- * @param[in] round Dimensions rounding.
- */
- PadStrideInfo(unsigned int stride_x, unsigned int stride_y,
- unsigned int pad_left, unsigned int pad_right,
- unsigned int pad_top, unsigned int pad_bottom,
- DimensionRoundingType round)
- : _stride(std::make_pair(stride_x, stride_y)),
- _pad_left(pad_left),
- _pad_top(pad_top),
- _pad_right(pad_right),
- _pad_bottom(pad_bottom),
- _round_type(round)
- {
- }
- /** Get the stride.
- *
- * @return a pair: stride x, stride y.
- */
- std::pair<unsigned int, unsigned int> stride() const
- {
- return _stride;
- }
- /** Check whether the padding is symmetric.
- *
- * @return True if the padding is symmetric.
- */
- bool padding_is_symmetric() const
- {
- return (_pad_left == _pad_right) && (_pad_top == _pad_bottom);
- }
- /** Get the padding.
- *
- * @note This should only be used when the padding is symmetric.
- *
- * @return a pair: padding left/right, padding top/bottom
- */
- std::pair<unsigned int, unsigned int> pad() const
- {
- //this accessor should be used only when padding is symmetric
- ARM_COMPUTE_ERROR_ON(!padding_is_symmetric());
- return std::make_pair(_pad_left, _pad_top);
- }
+ size_t left = {0}; /**< Padding across the width dimension on the left, in elements. */
+ size_t right = {0}; /**< Padding across the width dimension on the right, in elements. */
+ size_t top = {0}; /**< Padding across the height dimension on the top, in elements. */
+ size_t bottom = {0}; /**< Padding across the height dimension on the bottom, in elements. */
+};
- /** Get the left padding */
- unsigned int pad_left() const
- {
- return _pad_left;
- }
- /** Get the right padding */
- unsigned int pad_right() const
- {
- return _pad_right;
- }
- /** Get the top padding */
- unsigned int pad_top() const
- {
- return _pad_top;
- }
- /** Get the bottom padding */
- unsigned int pad_bottom() const
+/** Padding information for 3D operations like Conv3d */
+struct Padding3D
+{
+ Padding3D() noexcept
{
- return _pad_bottom;
}
- /** Get the rounding type */
- DimensionRoundingType round() const
+ Padding3D(size_t pad_x, size_t pad_y, size_t pad_z)
+ : left(pad_x), right(pad_x), top(pad_y), bottom(pad_y), front(pad_z), back(pad_z)
{
- return _round_type;
}
- /** Check whether this has any padding */
- bool has_padding() const
+ Padding3D(size_t left, size_t right, size_t top, size_t bottom, size_t front, size_t back)
+ : left(left), right(right), top(top), bottom(bottom), front(front), back(back)
{
- return (_pad_left != 0 || _pad_top != 0 || _pad_right != 0 || _pad_bottom != 0);
}
-private:
- std::pair<unsigned int, unsigned int> _stride;
- unsigned int _pad_left;
- unsigned int _pad_top;
- unsigned int _pad_right;
- unsigned int _pad_bottom;
-
- DimensionRoundingType _round_type;
+ size_t left = {0}; /**< Padding across the width dimenstion on the left, in elements. */
+ size_t right = {0}; /**< Padding across the width dimenstion on the right, in elements. */
+ size_t top = {0}; /**< Padding across the height dimenstion on the top, in elements. */
+ size_t bottom = {0}; /**< Padding across the height dimenstion on the bottom, in elements. */
+ size_t front = {0}; /**< Padding across the depth dimenstion on the front, in elements. */
+ size_t back = {0}; /**< Padding across the depth dimenstion on the back, in elements. */
};
/** PriorBox layer info */
@@ -831,9 +672,15 @@ public:
* @param[in] img_size (Optional) Image size.
* @param[in] steps (Optional) Step values.
*/
- PriorBoxLayerInfo(const std::vector<float> &min_sizes, const std::vector<float> &variances, float offset, bool flip = true, bool clip = false,
- const std::vector<float> &max_sizes = {}, const std::vector<float> &aspect_ratios = {},
- const Coordinates2D &img_size = Coordinates2D{ 0, 0 }, const std::array<float, 2> &steps = { { 0.f, 0.f } })
+ PriorBoxLayerInfo(const std::vector<float> &min_sizes,
+ const std::vector<float> &variances,
+ float offset,
+ bool flip = true,
+ bool clip = false,
+ const std::vector<float> &max_sizes = {},
+ const std::vector<float> &aspect_ratios = {},
+ const Coordinates2D &img_size = Coordinates2D{0, 0},
+ const std::array<float, 2> &steps = {{0.f, 0.f}})
: _min_sizes(min_sizes),
_variances(variances),
_offset(offset),
@@ -845,22 +692,22 @@ public:
_steps(steps)
{
_aspect_ratios.push_back(1.);
- for(unsigned int i = 0; i < aspect_ratios.size(); ++i)
+ for (unsigned int i = 0; i < aspect_ratios.size(); ++i)
{
float ar = aspect_ratios[i];
bool already_exist = false;
- for(auto ar_new : _aspect_ratios)
+ for (auto ar_new : _aspect_ratios)
{
- if(fabs(ar - ar_new) < 1e-6)
+ if (fabs(ar - ar_new) < 1e-6)
{
already_exist = true;
break;
}
}
- if(!already_exist)
+ if (!already_exist)
{
_aspect_ratios.push_back(ar);
- if(flip)
+ if (flip)
{
_aspect_ratios.push_back(1.f / ar);
}
@@ -914,14 +761,14 @@ public:
}
private:
- std::vector<float> _min_sizes;
- std::vector<float> _variances;
- float _offset;
- bool _flip;
- bool _clip;
- std::vector<float> _max_sizes;
- std::vector<float> _aspect_ratios;
- Coordinates2D _img_size;
+ std::vector<float> _min_sizes;
+ std::vector<float> _variances;
+ float _offset;
+ bool _flip;
+ bool _clip;
+ std::vector<float> _max_sizes;
+ std::vector<float> _aspect_ratios;
+ Coordinates2D _img_size;
std::array<float, 2> _steps;
};
@@ -972,8 +819,16 @@ public:
* @param[in] variance_encoded_in_target (Optional) If true, variance is encoded in target. Otherwise we need to adjust the predicted offset accordingly.Default set to false.
* @param[in] eta (Optional) Eta.
*/
- DetectionOutputLayerInfo(int num_classes, bool share_location, DetectionOutputLayerCodeType code_type, int keep_top_k, float nms_threshold, int top_k = -1, int background_label_id = -1,
- float confidence_threshold = std::numeric_limits<float>::lowest(), bool variance_encoded_in_target = false, float eta = 1)
+ DetectionOutputLayerInfo(int num_classes,
+ bool share_location,
+ DetectionOutputLayerCodeType code_type,
+ int keep_top_k,
+ float nms_threshold,
+ int top_k = -1,
+ int background_label_id = -1,
+ float confidence_threshold = std::numeric_limits<float>::lowest(),
+ bool variance_encoded_in_target = false,
+ float eta = 1)
: _num_classes(num_classes),
_share_location(share_location),
_code_type(code_type),
@@ -1087,8 +942,15 @@ public:
* @param[in] detection_per_class (Optional) Number of detection per class. Used in the Regular Non-Max-Suppression. Defaults to 100.
* @param[in] dequantize_scores (Optional) If the scores need to be dequantized. Defaults to true.
*/
- DetectionPostProcessLayerInfo(unsigned int max_detections, unsigned int max_classes_per_detection, float nms_score_threshold, float iou_threshold, unsigned int num_classes,
- std::array<float, 4> scales_values, bool use_regular_nms = false, unsigned int detection_per_class = 100, bool dequantize_scores = true)
+ DetectionPostProcessLayerInfo(unsigned int max_detections,
+ unsigned int max_classes_per_detection,
+ float nms_score_threshold,
+ float iou_threshold,
+ unsigned int num_classes,
+ std::array<float, 4> scales_values,
+ bool use_regular_nms = false,
+ unsigned int detection_per_class = 100,
+ bool dequantize_scores = true)
: _max_detections(max_detections),
_max_classes_per_detection(max_classes_per_detection),
_nms_score_threshold(nms_score_threshold),
@@ -1166,15 +1028,15 @@ public:
}
private:
- unsigned int _max_detections;
- unsigned int _max_classes_per_detection;
- float _nms_score_threshold;
- float _iou_threshold;
- unsigned int _num_classes;
+ unsigned int _max_detections;
+ unsigned int _max_classes_per_detection;
+ float _nms_score_threshold;
+ float _iou_threshold;
+ unsigned int _num_classes;
std::array<float, 4> _scales_values;
- bool _use_regular_nms;
- unsigned int _detection_per_class;
- bool _dequantize_scores;
+ bool _use_regular_nms;
+ unsigned int _detection_per_class;
+ bool _dequantize_scores;
};
/** Pooling Layer Information struct*/
@@ -1188,7 +1050,9 @@ struct PoolingLayerInfo
pad_stride_info(PadStrideInfo()),
exclude_padding(false),
is_global_pooling(false),
- fp_mixed_precision(false)
+ fp_mixed_precision(false),
+ use_inf_as_limit(true),
+ use_kernel_indices(false)
{
}
/** Constructor
@@ -1201,20 +1065,26 @@ struct PoolingLayerInfo
* True will exclude padding while false will not (Used in AVG/L2 pooling to determine the pooling area).
* Defaults to false;
* @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy.
+ * @param[in] use_inf_as_limit (Optional) Use inf to represent the limits of datatypes range, instead of using "lowest" property of the data type.
+ * @param[in] use_kernel_indices (Optional) Use kernel indices instead of using source indices while computing indices tensor.
*/
explicit PoolingLayerInfo(PoolingType pool_type,
unsigned int pool_size,
DataLayout data_layout,
PadStrideInfo pad_stride_info = PadStrideInfo(),
bool exclude_padding = false,
- bool fp_mixed_precision = false)
+ bool fp_mixed_precision = false,
+ bool use_inf_as_limit = true,
+ bool use_kernel_indices = false)
: pool_type(pool_type),
pool_size(Size2D(pool_size, pool_size)),
data_layout(data_layout),
pad_stride_info(pad_stride_info),
exclude_padding(exclude_padding),
is_global_pooling(false),
- fp_mixed_precision(fp_mixed_precision)
+ fp_mixed_precision(fp_mixed_precision),
+ use_inf_as_limit(use_inf_as_limit),
+ use_kernel_indices(use_kernel_indices)
{
}
@@ -1228,20 +1098,26 @@ struct PoolingLayerInfo
* True will exclude padding while false will not (Used in AVG/L2 pooling to determine the pooling area).
* Defaults to false;
* @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy.
+ * @param[in] use_inf_as_limit (Optional) Use inf to represent the limits of datatypes range, instead of using "lowest" property of the data type.
+ * @param[in] use_kernel_indices (Optional) Use kernel indices instead of using source indices while computing indices tensor.
*/
explicit PoolingLayerInfo(PoolingType pool_type,
Size2D pool_size,
DataLayout data_layout,
PadStrideInfo pad_stride_info = PadStrideInfo(),
bool exclude_padding = false,
- bool fp_mixed_precision = false)
+ bool fp_mixed_precision = false,
+ bool use_inf_as_limit = true,
+ bool use_kernel_indices = false)
: pool_type(pool_type),
pool_size(pool_size),
data_layout(data_layout),
pad_stride_info(pad_stride_info),
exclude_padding(exclude_padding),
is_global_pooling(false),
- fp_mixed_precision(fp_mixed_precision)
+ fp_mixed_precision(fp_mixed_precision),
+ use_inf_as_limit(use_inf_as_limit),
+ use_kernel_indices(use_kernel_indices)
{
}
@@ -1259,7 +1135,9 @@ struct PoolingLayerInfo
pad_stride_info(PadStrideInfo(1, 1, 0, 0)),
exclude_padding(false),
is_global_pooling(true),
- fp_mixed_precision(false)
+ fp_mixed_precision(false),
+ use_inf_as_limit(true),
+ use_kernel_indices(false)
{
}
@@ -1270,6 +1148,111 @@ struct PoolingLayerInfo
bool exclude_padding;
bool is_global_pooling;
bool fp_mixed_precision;
+ bool use_inf_as_limit;
+ bool use_kernel_indices;
+};
+
+/** Pooling Layer Information struct*/
+struct Pooling3dLayerInfo
+{
+ /** Default Constructor */
+ Pooling3dLayerInfo() noexcept
+ : pool_type(PoolingType::MAX),
+ pool_size(Size3D()),
+ stride(Size3D()),
+ padding(Padding3D()),
+ exclude_padding(false),
+ is_global_pooling(false),
+ fp_mixed_precision(false),
+ round_type(DimensionRoundingType::FLOOR)
+ {
+ }
+ /** Constructor
+ *
+ * @param[in] pool_type Pooling type @ref PoolingType.
+ * @param[in] pool_size Pooling size, in elements, across x, y and z.
+ * @param[in] stride (Optional) stride information @ref Size3D
+ * @param[in] padding (Optional) padding information @ref Padding3D
+ * @param[in] exclude_padding (Optional) Strategy when accounting padding in calculations.
+ * True will exclude padding while false will not (Used in AVG/L2 pooling to determine the pooling area).
+ * Defaults to false;
+ * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy.
+ * @param[in] round_type (Optional) Dimensions rounding. Defaults to @ref DimensionRoundingType::FLOOR
+ */
+ explicit Pooling3dLayerInfo(PoolingType pool_type,
+ unsigned int pool_size,
+ Size3D stride = Size3D(1U, 1U, 1U),
+ Padding3D padding = Padding3D(),
+ bool exclude_padding = false,
+ bool fp_mixed_precision = false,
+ DimensionRoundingType round_type = DimensionRoundingType::FLOOR)
+ : pool_type(pool_type),
+ pool_size(Size3D(pool_size, pool_size, pool_size)),
+ stride(stride),
+ padding(padding),
+ exclude_padding(exclude_padding),
+ is_global_pooling(false),
+ fp_mixed_precision(fp_mixed_precision),
+ round_type(round_type)
+ {
+ }
+
+ /** Constructor
+ *
+ * @param[in] pool_type Pooling type @ref PoolingType.
+ * @param[in] pool_size Pooling size, in elements, across x, y and z.
+ * @param[in] stride (Optional) stride information @ref Size3D
+ * @param[in] padding (Optional) padding information @ref Padding3D
+ * @param[in] exclude_padding (Optional) Strategy when accounting padding in calculations.
+ * True will exclude padding while false will not (Used in AVG/L2 pooling to determine the pooling area).
+ * Defaults to false;
+ * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy.
+ * @param[in] round_type (Optional) Dimensions rounding. Defaults to @ref DimensionRoundingType::FLOOR
+ */
+ explicit Pooling3dLayerInfo(PoolingType pool_type,
+ Size3D pool_size,
+ Size3D stride = Size3D(1U, 1U, 1U),
+ Padding3D padding = Padding3D(),
+ bool exclude_padding = false,
+ bool fp_mixed_precision = false,
+ DimensionRoundingType round_type = DimensionRoundingType::FLOOR)
+ : pool_type(pool_type),
+ pool_size(pool_size),
+ stride(stride),
+ padding(padding),
+ exclude_padding(exclude_padding),
+ is_global_pooling(false),
+ fp_mixed_precision(fp_mixed_precision),
+ round_type(round_type)
+ {
+ }
+
+ /** Constructor
+ *
+ * @note This constructor is used for global pooling
+ *
+ * @param[in] pool_type Pooling type @ref PoolingType.
+ */
+ explicit Pooling3dLayerInfo(PoolingType pool_type)
+ : pool_type(pool_type),
+ pool_size(Size3D()),
+ stride(Size3D(1U, 1U, 1U)),
+ padding(Padding3D(0, 0, 0)),
+ exclude_padding(false),
+ is_global_pooling(true),
+ fp_mixed_precision(false),
+ round_type(DimensionRoundingType::FLOOR)
+ {
+ }
+
+ PoolingType pool_type;
+ Size3D pool_size;
+ Size3D stride;
+ Padding3D padding;
+ bool exclude_padding;
+ bool is_global_pooling;
+ bool fp_mixed_precision;
+ DimensionRoundingType round_type;
};
/** ROI Pooling Layer Information class */
@@ -1283,8 +1266,14 @@ public:
* @param[in] spatial_scale Spatial scale to be applied to the ROI coordinates and dimensions.
* @param[in] sampling_ratio Number of samples to include in each pooling region (if set to zero, a ceil(roi_dims/pooling_dims))
*/
- ROIPoolingLayerInfo(unsigned int pooled_width, unsigned int pooled_height, float spatial_scale, unsigned int sampling_ratio = 0)
- : _pooled_width(pooled_width), _pooled_height(pooled_height), _spatial_scale(spatial_scale), _sampling_ratio(sampling_ratio)
+ ROIPoolingLayerInfo(unsigned int pooled_width,
+ unsigned int pooled_height,
+ float spatial_scale,
+ unsigned int sampling_ratio = 0)
+ : _pooled_width(pooled_width),
+ _pooled_height(pooled_height),
+ _spatial_scale(spatial_scale),
+ _sampling_ratio(sampling_ratio)
{
}
/** Get the pooled width of the layer */
@@ -1331,10 +1320,24 @@ public:
* @param[in] min_size (Optional)Size used to validate the anchors produced. Defaults to 16.
* @param[in] values_per_roi (Optional)Values used to represent a ROI(Region of interest). Defaults to 4.
*/
- GenerateProposalsInfo(float im_width, float im_height, float im_scale, float spatial_scale = 1.0, int pre_nms_topN = 6000, int post_nms_topN = 300, float nms_thres = 0.7, float min_size = 16.0,
+ GenerateProposalsInfo(float im_width,
+ float im_height,
+ float im_scale,
+ float spatial_scale = 1.0,
+ int pre_nms_topN = 6000,
+ int post_nms_topN = 300,
+ float nms_thres = 0.7,
+ float min_size = 16.0,
size_t values_per_roi = 4)
- : _im_height(im_height), _im_width(im_width), _im_scale(im_scale), _spatial_scale(spatial_scale), _pre_nms_topN(pre_nms_topN), _post_nms_topN(post_nms_topN), _nms_thres(nms_thres),
- _min_size(min_size), _values_per_roi(values_per_roi)
+ : _im_height(im_height),
+ _im_width(im_width),
+ _im_scale(im_scale),
+ _spatial_scale(spatial_scale),
+ _pre_nms_topN(pre_nms_topN),
+ _post_nms_topN(post_nms_topN),
+ _nms_thres(nms_thres),
+ _min_size(min_size),
+ _values_per_roi(values_per_roi)
{
}
@@ -1460,11 +1463,20 @@ public:
* @param[in] correct_transform_coords (Optional)Correct bounding box transform coordinates. Defaults to false
* @param[in] bbox_xform_clip (Optional)Minimum bounding box width and height after bounding box transformation in log-space. Defaults to log(1000/16)
*/
- BoundingBoxTransformInfo(float img_width, float img_height, float scale, bool apply_scale = false, const std::array<float, 4> weights = { { 1.f, 1.f, 1.f, 1.f } }, bool correct_transform_coords =
- false,
- float bbox_xform_clip =
- 4.135166556742356f)
- : _img_width(img_width), _img_height(img_height), _scale(scale), _apply_scale(apply_scale), _correct_transform_coords(correct_transform_coords), _weights(weights), _bbox_xform_clip(bbox_xform_clip)
+ BoundingBoxTransformInfo(float img_width,
+ float img_height,
+ float scale,
+ bool apply_scale = false,
+ const std::array<float, 4> weights = {{1.f, 1.f, 1.f, 1.f}},
+ bool correct_transform_coords = false,
+ float bbox_xform_clip = 4.135166556742356f)
+ : _img_width(img_width),
+ _img_height(img_height),
+ _scale(scale),
+ _apply_scale(apply_scale),
+ _correct_transform_coords(correct_transform_coords),
+ _weights(weights),
+ _bbox_xform_clip(bbox_xform_clip)
{
}
@@ -1504,110 +1516,13 @@ public:
}
private:
- float _img_width;
- float _img_height;
- float _scale;
- bool _apply_scale;
- bool _correct_transform_coords;
+ float _img_width;
+ float _img_height;
+ float _scale;
+ bool _apply_scale;
+ bool _correct_transform_coords;
std::array<float, 4> _weights;
- float _bbox_xform_clip;
-};
-
-/** Activation Layer Information class */
-class ActivationLayerInfo
-{
-public:
- /** Available activation functions */
- enum class ActivationFunction
- {
- LOGISTIC, /**< Logistic ( \f$ f(x) = \frac{1}{1 + e^{-x}} \f$ ) */
- TANH, /**< Hyperbolic tangent ( \f$ f(x) = a \cdot tanh(b \cdot x) \f$ ) */
- RELU, /**< Rectifier ( \f$ f(x) = max(0,x) \f$ ) */
- BOUNDED_RELU, /**< Upper Bounded Rectifier ( \f$ f(x) = min(a, max(0,x)) \f$ ) */
- LU_BOUNDED_RELU, /**< Lower and Upper Bounded Rectifier ( \f$ f(x) = min(a, max(b,x)) \f$ ) */
- LEAKY_RELU, /**< Leaky Rectifier ( \f$ f(x) = \begin{cases} \alpha x & \quad \text{if } x \text{ < 0}\\ x & \quad \text{if } x \geq \text{ 0 } \end{cases} \f$ ) */
- SOFT_RELU, /**< Soft Rectifier ( \f$ f(x)= log(1+e^x) \f$ ) */
- ELU, /**< Exponential Linear Unit ( \f$ f(x) = \begin{cases} \alpha (exp(x) - 1) & \quad \text{if } x \text{ < 0}\\ x & \quad \text{if } x \geq \text{ 0 } \end{cases} \f$ ) */
- ABS, /**< Absolute ( \f$ f(x)= |x| \f$ ) */
- SQUARE, /**< Square ( \f$ f(x)= x^2 \f$ )*/
- SQRT, /**< Square root ( \f$ f(x) = \sqrt{x} \f$ )*/
- LINEAR, /**< Linear ( \f$ f(x)= ax + b \f$ ) */
- IDENTITY, /**< Identity ( \f$ f(x)= x \f$ ) */
- HARD_SWISH /**< Hard-swish ( \f$ f(x) = (x * relu6(x+3))/6 \f$ ) */
- };
-
- ActivationLayerInfo() = default;
- /** Default Constructor
- *
- * @param[in] f The activation function to use.
- * @param[in] a (Optional) The alpha parameter used by some activation functions
- * (@ref ActivationFunction::BOUNDED_RELU, @ref ActivationFunction::LU_BOUNDED_RELU, @ref ActivationFunction::LINEAR, @ref ActivationFunction::TANH).
- * @param[in] b (Optional) The beta parameter used by some activation functions (@ref ActivationFunction::LINEAR, @ref ActivationFunction::LU_BOUNDED_RELU, @ref ActivationFunction::TANH).
- */
- ActivationLayerInfo(ActivationFunction f, float a = 0.0f, float b = 0.0f)
- : _act(f), _a(a), _b(b), _enabled(true)
- {
- }
- /** Get the type of activation function */
- ActivationFunction activation() const
- {
- return _act;
- }
- /** Get the alpha value */
- float a() const
- {
- return _a;
- }
- /** Get the beta value */
- float b() const
- {
- return _b;
- }
- /** Check if initialised */
- bool enabled() const
- {
- return _enabled;
- }
-
-private:
- ActivationFunction _act = { ActivationLayerInfo::ActivationFunction::IDENTITY };
- float _a = {};
- float _b = {};
- bool _enabled = { false };
-};
-
-/** Fully connected layer info */
-struct FullyConnectedLayerInfo
-{
- DataLayout weights_trained_layout{ DataLayout::NCHW }; /**< Layout that the weights have been trained with. */
- bool transpose_weights{ true }; /**< Transpose weights if true. */
- bool are_weights_reshaped{ false }; /**< Reshape the weights tensor if false. */
- bool retain_internal_weights{ false }; /**< Retain internal reshaped weights. */
- bool fp_mixed_precision{ false }; /**< Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy. */
- ActivationLayerInfo activation_info{}; /**< Fused activation to apply after the matrix multiplication. */
-
- /** Sets the weights trained data layout
- *
- * @param[in] layout Data layout that the weights were trained with
- *
- * @return Updated object
- */
- FullyConnectedLayerInfo &set_weights_trained_layout(DataLayout layout)
- {
- weights_trained_layout = layout;
- return *this;
- }
- /** Sets the transpose weights flag
- *
- * @param[in] should_transpose_weights Boolean flag indicating if weights should be transposed
- *
- * @return Updated object
- */
- FullyConnectedLayerInfo &set_transpose_weights(bool should_transpose_weights)
- {
- transpose_weights = should_transpose_weights;
- return *this;
- }
+ float _bbox_xform_clip;
};
/** Normalization Layer Information class */
@@ -1624,7 +1539,12 @@ public:
* @param[in] is_scaled (Optional) Boolean that specifies if alpha will be scaled by the normalization size or not.
* Should be false to follow [Krichevksy 2012].
*/
- NormalizationLayerInfo(NormType type, uint32_t norm_size = 5, float alpha = 0.0001f, float beta = 0.5f, float kappa = 1.f, bool is_scaled = true)
+ NormalizationLayerInfo(NormType type,
+ uint32_t norm_size = 5,
+ float alpha = 0.0001f,
+ float beta = 0.5f,
+ float kappa = 1.f,
+ bool is_scaled = true)
: _type(type), _norm_size(norm_size), _alpha(alpha), _beta(beta), _kappa(kappa), _is_scaled(is_scaled)
{
}
@@ -1690,13 +1610,74 @@ private:
bool _is_scaled;
};
+class StridedSliceLayerInfo
+{
+public:
+ /** Default Constructor
+ *
+ * @param[in] begin_mask (Optional) If the ith bit of begin_mask is set, starts[i] is ignored and the fullest possible range in that dimension is used instead.
+ * @param[in] end_mask (Optional) If the ith bit of end_mask is set, ends[i] is ignored and the fullest possible range in that dimension is used instead.
+ * @param[in] shrink_axis_mask (Optional) If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1.
+ */
+ StridedSliceLayerInfo(int32_t begin_mask = 0, int32_t end_mask = 0, int32_t shrink_axis_mask = 0)
+ : _begin_mask(begin_mask), _end_mask(end_mask), _shrink_axis_mask(shrink_axis_mask)
+ {
+ }
+
+ /* Get the begin mask value */
+ int32_t begin_mask() const
+ {
+ return _begin_mask;
+ }
+
+ /* Get the end mask value */
+ int32_t end_mask() const
+ {
+ return _end_mask;
+ }
+
+ /* Get the shrink axis mask value */
+ int32_t shrink_axis_mask() const
+ {
+ return _shrink_axis_mask;
+ }
+
+private:
+ int32_t _begin_mask;
+ int32_t _end_mask;
+ int32_t _shrink_axis_mask;
+};
+
+// OHWIo<interleave_by>i<block_by>
+inline int interleave_by(const WeightFormat wf)
+{
+ return (static_cast<int>(wf) >> 8) & 0xFFF;
+}
+inline int block_by(const WeightFormat wf)
+{
+ return (static_cast<int>(wf) >> 20) & 0xF;
+}
+inline bool is_fixed_format(const WeightFormat &wf)
+{
+ return wf != WeightFormat::UNSPECIFIED && wf != WeightFormat::ANY;
+}
+inline bool is_fixed_format_fast_math(const WeightFormat &wf)
+{
+ return (static_cast<int>(wf) >> 4) & 0x1;
+}
+
/** Convolution Layer Weights Information class. This class stores the necessary information to compute convolution layer when the weights are already reshaped */
class WeightsInfo
{
public:
/** Default constructor */
WeightsInfo()
- : _are_reshaped(false), _kernel_width(0), _kernel_height(0), _num_kernels(0), _retain_internal_weights(false)
+ : _are_reshaped(false),
+ _kernel_width(0),
+ _kernel_height(0),
+ _num_kernels(0),
+ _retain_internal_weights(false),
+ _weight_format(arm_compute::WeightFormat::UNSPECIFIED)
{
}
/** Constructor
@@ -1706,9 +1687,20 @@ public:
* @param[in] kernel_height Kernel height.
* @param[in] num_kernels Number of convolution kernels.
* @param[in] retain_internal_weights (Optional) True if internal reshaped weights must be retained. Used for reconfiguration purposes. Default is false.
+ * @param[in] weight_format (Optional) arm_gemm:WeightFormat enumeration requested by the user. Default is arm_compute::WeightFormat::UNSPECIFIED.
*/
- WeightsInfo(bool are_reshaped, unsigned int kernel_width, unsigned int kernel_height, unsigned int num_kernels, bool retain_internal_weights = false)
- : _are_reshaped(are_reshaped), _kernel_width(kernel_width), _kernel_height(kernel_height), _num_kernels(num_kernels), _retain_internal_weights(retain_internal_weights)
+ WeightsInfo(bool are_reshaped,
+ unsigned int kernel_width,
+ unsigned int kernel_height,
+ unsigned int num_kernels,
+ bool retain_internal_weights = false,
+ arm_compute::WeightFormat weight_format = arm_compute::WeightFormat::UNSPECIFIED)
+ : _are_reshaped(are_reshaped),
+ _kernel_width(kernel_width),
+ _kernel_height(kernel_height),
+ _num_kernels(num_kernels),
+ _retain_internal_weights(retain_internal_weights),
+ _weight_format(weight_format)
{
}
/** Flag which specifies if the weights tensor has been reshaped.
@@ -1739,22 +1731,40 @@ public:
{
return _retain_internal_weights;
}
+ arm_compute::WeightFormat weight_format() const
+ {
+ return _weight_format;
+ }
+ void set_weight_format(arm_compute::WeightFormat weight_format)
+ {
+ _weight_format = weight_format;
+ }
+
+ unsigned int kernel_width() const
+ {
+ return _kernel_width;
+ }
+ unsigned int kernel_height() const
+ {
+ return _kernel_height;
+ }
private:
- const bool _are_reshaped;
- const unsigned int _kernel_width;
- const unsigned int _kernel_height;
- const unsigned int _num_kernels;
- const bool _retain_internal_weights;
+ bool _are_reshaped;
+ unsigned int _kernel_width;
+ unsigned int _kernel_height;
+ unsigned int _num_kernels;
+ bool _retain_internal_weights;
+ arm_compute::WeightFormat _weight_format;
};
/** GEMM reshape information class. This class stores the necessary information about matrix A and matrix B reshape.
*
- * The matrix A can only be reshaped through @ref CLGEMMReshapeLHSMatrixKernel or @ref NEGEMMInterleave4x4Kernel or @ref GCGEMMInterleave4x4Kernel
- * Note: Optionally just for @ref CLGEMMReshapeLHSMatrixKernel is it possible to set mult_interleave4x4_height, the multiplication factor for the height of the 4x4 interleaved block
+ * The matrix A can only be reshaped through @ref opencl::kernels::ClGemmReshapeLhsMatrixKernel or @ref cpu::kernels::CpuGemmInterleave4x4Kernel
+ * Note: Optionally just for @ref opencl::kernels::ClGemmReshapeLhsMatrixKernel is it possible to set mult_interleave4x4_height, the multiplication factor for the height of the 4x4 interleaved block
*
- * The matrix B can only be reshaped through @ref CLGEMMReshapeRHSMatrixKernel or @ref NEGEMMTranspose1xWKernel or @ref GCGEMMTranspose1xWKernel
- * Note: Optionally just for @ref CLGEMMReshapeRHSMatrixKernel is it possible to set mult_transpose1xW_width, the multiplication factor for the width of the 1xW transposed block
+ * The matrix B can only be reshaped through @ref opencl::kernels::ClGemmReshapeRhsMatrixKernel or @ref cpu::kernels::CpuGemmTranspose1xWKernel
+ * Note: Optionally just for @ref opencl::kernels::ClGemmReshapeRhsMatrixKernel is it possible to set mult_transpose1xW_width, the multiplication factor for the width of the 1xW transposed block
*
*/
class GEMMReshapeInfo final
@@ -1762,7 +1772,14 @@ class GEMMReshapeInfo final
public:
/** Default constructor */
GEMMReshapeInfo()
- : _m(1), _n(1), _k(1), _mult_transpose1xW_width(1), _mult_interleave4x4_height(1), _depth_output_gemm3d(0), _reinterpret_input_as_3d(false), _broadcast_bias(false)
+ : _m(1),
+ _n(1),
+ _k(1),
+ _mult_transpose1xW_width(1),
+ _mult_interleave4x4_height(1),
+ _depth_output_gemm3d(0),
+ _reinterpret_input_as_3d(false),
+ _broadcast_bias(false)
{
}
/** Constructor
@@ -1778,9 +1795,22 @@ public:
* to perform 1x1 convolutions with the NHWC data layout)
* @param[in] broadcast_bias (Optional) Broadcast the shape of the bias tensor from a vector to a matrix.
*/
- GEMMReshapeInfo(int m, int n, int k, int mult_transpose1xW_width = 1, int mult_interleave4x4_height = 1, int depth_output_gemm3d = 0, bool reinterpret_input_as_3d = false, bool broadcast_bias = false)
- : _m(m), _n(n), _k(k), _mult_transpose1xW_width(mult_transpose1xW_width), _mult_interleave4x4_height(mult_interleave4x4_height), _depth_output_gemm3d(depth_output_gemm3d),
- _reinterpret_input_as_3d(reinterpret_input_as_3d), _broadcast_bias(broadcast_bias)
+ GEMMReshapeInfo(int m,
+ int n,
+ int k,
+ int mult_transpose1xW_width = 1,
+ int mult_interleave4x4_height = 1,
+ int depth_output_gemm3d = 0,
+ bool reinterpret_input_as_3d = false,
+ bool broadcast_bias = false)
+ : _m(m),
+ _n(n),
+ _k(k),
+ _mult_transpose1xW_width(mult_transpose1xW_width),
+ _mult_interleave4x4_height(mult_interleave4x4_height),
+ _depth_output_gemm3d(depth_output_gemm3d),
+ _reinterpret_input_as_3d(reinterpret_input_as_3d),
+ _broadcast_bias(broadcast_bias)
{
}
/** Number of matrix A rows
@@ -1852,45 +1882,14 @@ public:
};
private:
- const int _m;
- const int _n;
- const int _k;
- const int _mult_transpose1xW_width;
- const int _mult_interleave4x4_height;
- const int _depth_output_gemm3d;
- const bool _reinterpret_input_as_3d;
- const bool _broadcast_bias;
-};
-
-struct DepthwiseConvolutionReshapeInfo
-{
- unsigned int c0{ 1 }; /**< Number of channels processed by the depth-wise convolution */
- bool transpose{ false }; /**< True if the block MxC0 (where M is the area of the filter i.e. KwxKh) has to be transposed */
-};
-
-/** GEMMLowp output stage type */
-enum class GEMMLowpOutputStageType
-{
- NONE, /**< No quantization */
- QUANTIZE_DOWN, /**< Quantize using an integer multiplication */
- QUANTIZE_DOWN_FIXEDPOINT, /**< Quantize using a fixed point multiplication */
- QUANTIZE_DOWN_FLOAT /**< Quantize using a floating point multiplication */
-};
-
-/** GEMMLowp output stage info */
-struct GEMMLowpOutputStageInfo
-{
- GEMMLowpOutputStageType type{ GEMMLowpOutputStageType::NONE }; /**< GEMMLowp output stage type */
- int32_t gemmlowp_offset{ 0 }; /**< GEMMLowp output stage offset used for quantizing to QASYMM8 */
- int32_t gemmlowp_multiplier{ 0 }; /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */
- int32_t gemmlowp_shift{ 0 }; /**< GEMMLowp output stage shift used for quantizing to uint8 */
- int32_t gemmlowp_min_bound{ std::numeric_limits<int32_t>::lowest() }; /**< GEMMLowp min value used to saturate down the output result before converting back to QASYMM8 */
- int32_t gemmlowp_max_bound{ std::numeric_limits<int32_t>::max() }; /**< GEMMLowp max value used to saturate down the output result before converting back to QASYMM8 */
- std::vector<int32_t> gemmlowp_multipliers{}; /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */
- std::vector<int32_t> gemmlowp_shifts{}; /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */
- float gemmlowp_real_multiplier{ 0 }; /**< GEMMLowp output stage real multiplier used for quantizing to QASYMM8 */
- bool is_quantized_per_channel{ false }; /**< GEMMLowp quantized per-channel flag */
- DataType output_data_type{ DataType::UNKNOWN }; /**< Output tensor data type to use if the output is not initialized */
+ int _m;
+ int _n;
+ int _k;
+ int _mult_transpose1xW_width;
+ int _mult_interleave4x4_height;
+ int _depth_output_gemm3d;
+ bool _reinterpret_input_as_3d;
+ bool _broadcast_bias;
};
/** GEMM LHS (Left Hand Side) matrix information */
@@ -1901,211 +1900,31 @@ struct GEMMLHSMatrixInfo
: m0(m), k0(k), v0(v), transpose(trans), interleave(inter)
{
}
- unsigned int m0{ 1 }; /**< Number of rows processed by the matrix multiplication */
- unsigned int k0{ 1 }; /**< Number of partial accumulations performed by the matrix multiplication */
- unsigned int v0{ 1 }; /**< Number of vertical blocks of size (m0xk0) stored on the same output row */
- bool transpose{ true }; /**< True if the (m0xk0) block has to be transposed before been stored */
- bool interleave{ true }; /**< True if the v0 (m0xk0) blocks have to be interleaved in the output row */
+ unsigned int m0{1}; /**< Number of rows processed by the matrix multiplication */
+ unsigned int k0{1}; /**< Number of partial accumulations performed by the matrix multiplication */
+ unsigned int v0{1}; /**< Number of vertical blocks of size (m0xk0) stored on the same output row */
+ bool transpose{true}; /**< True if the (m0xk0) block has to be transposed before been stored */
+ bool interleave{true}; /**< True if the v0 (m0xk0) blocks have to be interleaved in the output row */
};
/** GEMM RHS (Right Hand Side) matrix information */
struct GEMMRHSMatrixInfo
{
GEMMRHSMatrixInfo() = default;
- GEMMRHSMatrixInfo(unsigned int n, unsigned int k, unsigned int h, bool trans, bool inter)
- : n0(n), k0(k), h0(h), transpose(trans), interleave(inter)
+ GEMMRHSMatrixInfo(unsigned int n, unsigned int k, unsigned int h, bool trans, bool inter, bool export_to_cl_img)
+ : n0(n), k0(k), h0(h), transpose(trans), interleave(inter), export_to_cl_image(export_to_cl_img)
{
}
- unsigned int n0{ 1 }; /**< Number of columns processed by the matrix multiplication */
- unsigned int k0{ 1 }; /**< Number of partial accumulations performed by the matrix multiplication */
- unsigned int h0{ 1 }; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row */
- bool transpose{ true }; /**< True if the (k0xn0) block has to be transposed before been stored */
- bool interleave{ true }; /**< True if the h0 (k0xn0) blocks have to be interleaved in the output row */
- bool export_to_cl_image{ false }; /**< True if the reshaped rhs has to be exported to cl_image. n0 must be equal to 4 */
+ unsigned int n0{1}; /**< Number of columns processed by the matrix multiplication */
+ unsigned int k0{1}; /**< Number of partial accumulations performed by the matrix multiplication */
+ unsigned int h0{1}; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row */
+ bool transpose{true}; /**< True if the (k0xn0) block has to be transposed before been stored */
+ bool interleave{true}; /**< True if the h0 (k0xn0) blocks have to be interleaved in the output row */
+ bool export_to_cl_image{
+ false}; /**< True if the reshaped rhs has to be exported to cl_image. n0 must be equal to 4 */
};
-/** GEMM information class. This class stores the necessary information to compute GEMM functions
- *
- * This object also contains the information about how matrix A and matrix B have been reshaped
- *
- */
-class GEMMInfo
-{
-public:
- /** Default constructor */
- GEMMInfo() noexcept
- : _is_a_reshaped(false),
- _is_b_reshaped(false),
- _reshape_b_only_on_first_run(true),
- _depth_output_gemm3d(0),
- _reinterpret_input_as_3d(false),
- _retain_internal_weights(false),
- _gemmlowp_output_stage(),
- _fp_mixed_precision(false),
- _broadcast_bias(false),
- _pretranpose_B(true),
- _activation_info()
- {
- }
- /** Constructor
- *
- * @param[in] is_a_reshaped True if the matrix A has been reshaped
- * @param[in] is_b_reshaped True if the matrix B has been reshaped
- * @param[in] reshape_b_only_on_first_run Reshape matrix B only for the first run
- * @param[in] depth_output_gemm3d (Optional) Depth (third dimension) of the output tensor to be used with the GEMM3D kernel
- * If 0 the output will not be reinterpreted as 3D. Default 0
- * @param[in] reinterpret_input_as_3d (Optional) Reinterpret the input as 3D tensor. (i.e. this flag should be set to true when GEMM is used
- * to perform 1x1 convolutions with the NHWC data layout)
- * @param[in] retain_internal_weights (Optional) Retain the weights tensor from previous run
- * @param[in] gemmlowp_output_stage (Optional) GEMMLowp Output stage info
- * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy.
- * @param[in] broadcast_bias (Optional) Broadcast the shape of the bias tensor from a vector to a matrix.
- * @param[in] activation_info (Optional) Activation to apply after the matrix multiplication
- */
- GEMMInfo(bool is_a_reshaped, bool is_b_reshaped, bool reshape_b_only_on_first_run, int depth_output_gemm3d = 0, bool reinterpret_input_as_3d = false, bool retain_internal_weights = false,
- GEMMLowpOutputStageInfo gemmlowp_output_stage = GEMMLowpOutputStageInfo(), bool fp_mixed_precision = false, bool broadcast_bias = false,
- const ActivationLayerInfo &activation_info = ActivationLayerInfo()) noexcept
- : _is_a_reshaped(is_a_reshaped),
- _is_b_reshaped(is_b_reshaped),
- _reshape_b_only_on_first_run(reshape_b_only_on_first_run),
- _depth_output_gemm3d(depth_output_gemm3d),
- _reinterpret_input_as_3d(reinterpret_input_as_3d),
- _retain_internal_weights(retain_internal_weights),
- _gemmlowp_output_stage(gemmlowp_output_stage),
- _fp_mixed_precision(fp_mixed_precision),
- _broadcast_bias(broadcast_bias),
- _pretranpose_B(reshape_b_only_on_first_run),
- _activation_info(activation_info)
- {
- }
- /** Flag which specifies if the matrix A has been reshaped
- *
- * @return True if the matrix A has been reshaped
- */
- bool is_a_reshaped() const
- {
- return _is_a_reshaped;
- };
- /** Flag which specifies if the matrix B has been reshaped
- *
- * @return True if the matrix B has been reshaped
- */
- bool is_b_reshaped() const
- {
- return _is_b_reshaped;
- };
- /** Flag which specifies if the reshape of matrix B should executed only for the first
- *
- * @note This flag could be set to TRUE when GEMM is used to accelerate convolution layer
- *
- * @return True if the reshaped of matrix B happens only for the first run
- */
- bool reshape_b_only_on_first_run() const
- {
- return _reshape_b_only_on_first_run;
- };
- /** Depth of the output when GEMM output is reinterpreted as 3D tensor
- *
- * @return the depth of the output tensor
- */
- int depth_output_gemm3d() const
- {
- return _depth_output_gemm3d;
- };
- /** Flag which specifies if the input tensor has to be reinterpreted as 3D
- *
- * @return True if the input tensor has to be reinterpreted as 3D tensor
- */
- bool reinterpret_input_as_3d() const
- {
- return _reinterpret_input_as_3d;
- };
- /** Flag which specifies if the weights tensor has to be retained from previous run
- *
- * @return True if the weights tensor has to be retained
- */
- bool retain_internal_weights() const
- {
- return _retain_internal_weights;
- };
- /** GEMMLowp output stage
- *
- * @return the GEMMLowp output stage info
- */
- GEMMLowpOutputStageInfo gemmlowp_output_stage() const
- {
- return _gemmlowp_output_stage;
- };
- /** Sets GEMMLowp output stage
- *
- * @param[in] output_stage Output stage to set
- */
- void set_gemmlowp_output_stage(GEMMLowpOutputStageInfo &output_stage)
- {
- _gemmlowp_output_stage = output_stage;
- };
- /** Flag which specifies if a wider accumulator should be used.
- *
- * @return True if a wider accumulator has to be used
- */
- bool fp_mixed_precision() const
- {
- return _fp_mixed_precision;
- };
- /** Flag which specifies whether to broadcast the shape of the bias tensor.
- *
- * @return True if the shape of the bias tensor is to be broadcasted.
- */
- bool broadcast_bias() const
- {
- return _broadcast_bias;
- };
- /** Flag which specifies whether b should be pre-transposed if supported.
- *
- * @return True if b should be pre-transposed else false.
- */
- bool pretranpose_B() const
- {
- return _pretranpose_B;
- };
- /** Set pre-transpose b flag
- *
- * @param[in] flag Flag to set
- */
- void set_pretranpose_B(bool flag)
- {
- _pretranpose_B = flag;
- }
- /** Activation layer to apply after the matrix multiplication
- *
- * @return ActivationLayerInfo object
- */
- ActivationLayerInfo activation_info() const
- {
- return _activation_info;
- }
- /** Set activation layer info
- *
- * @param[in] activation_info ActivationLayerInfo object to set
- */
- void set_activation_info(const ActivationLayerInfo &activation_info)
- {
- _activation_info = activation_info;
- }
-
-private:
- bool _is_a_reshaped;
- bool _is_b_reshaped;
- bool _reshape_b_only_on_first_run;
- int _depth_output_gemm3d;
- bool _reinterpret_input_as_3d;
- bool _retain_internal_weights;
- GEMMLowpOutputStageInfo _gemmlowp_output_stage;
- bool _fp_mixed_precision;
- bool _broadcast_bias;
- bool _pretranpose_B;
- ActivationLayerInfo _activation_info;
-};
+class ITensorInfo;
/** Winograd information */
struct WinogradInfo
@@ -2118,16 +1937,23 @@ struct WinogradInfo
* @param[in] conv_info Convolution info (Pads, strides)
* @param[in] data_layout Data layout to use for the output tensor once the convolution has been applied
*/
- WinogradInfo(Size2D output_tile_sz, Size2D kernel_sz, Size2D input_dims, PadStrideInfo conv_info, DataLayout data_layout)
- : output_tile_size(output_tile_sz), kernel_size(kernel_sz), input_dimensions(input_dims), convolution_info(conv_info), output_data_layout(data_layout)
- {
- }
-
- Size2D output_tile_size{}; /**< Width and height of the output tile */
- Size2D kernel_size{}; /**< Width and height of the kernel*/
- Size2D input_dimensions{}; /**< Width and height of the input tensor before the convolution is applied */
- PadStrideInfo convolution_info{}; /**< Convolution info (Pads, strides,...) */
- DataLayout output_data_layout{ DataLayout::NCHW }; /**< Data layout to use for the output tensor once the convolution has been applied (NCHW or NHWC) */
+ WinogradInfo(
+ Size2D output_tile_sz, Size2D kernel_sz, Size2D input_dims, PadStrideInfo conv_info, DataLayout data_layout)
+ : output_tile_size(output_tile_sz),
+ kernel_size(kernel_sz),
+ input_dimensions(input_dims),
+ convolution_info(conv_info),
+ output_data_layout(data_layout)
+ {
+ }
+
+ Size2D output_tile_size{}; /**< Width and height of the output tile */
+ Size2D kernel_size{}; /**< Width and height of the kernel*/
+ Size2D input_dimensions{}; /**< Width and height of the input tensor before the convolution is applied */
+ PadStrideInfo convolution_info{}; /**< Convolution info (Pads, strides,...) */
+ DataLayout output_data_layout{
+ DataLayout::
+ NCHW}; /**< Data layout to use for the output tensor once the convolution has been applied (NCHW or NHWC) */
};
/** IO formatting information class*/
@@ -2186,5 +2012,8 @@ struct IOFormatInfo
/** Align columns */
bool align_columns;
};
+
+/** Class for holding information related to cropping */
+using CropInfo = Padding2D;
} // namespace arm_compute
-#endif /* ARM_COMPUTE_TYPES_H */
+#endif // ACL_ARM_COMPUTE_CORE_TYPES_H
diff --git a/arm_compute/core/Utils.h b/arm_compute/core/Utils.h
index eff6157b1f..a2146522f7 100644
--- a/arm_compute/core/Utils.h
+++ b/arm_compute/core/Utils.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 ARM Limited.
+ * Copyright (c) 2016-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,63 +26,29 @@
#include "arm_compute/core/Error.h"
#include "arm_compute/core/PixelValue.h"
-#include "arm_compute/core/Rounding.h"
#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Version.h"
-#include <algorithm>
-#include <cstdint>
-#include <cstdlib>
-#include <iomanip>
+#include <cmath>
#include <numeric>
#include <sstream>
#include <string>
#include <type_traits>
+#include <unordered_map>
#include <utility>
-#include <vector>
-namespace arm_compute
-{
-/** Calculate the rounded up quotient of val / m.
- *
- * @param[in] val Value to divide and round up.
- * @param[in] m Value to divide by.
- *
- * @return the result.
- */
-template <typename S, typename T>
-constexpr auto DIV_CEIL(S val, T m) -> decltype((val + m - 1) / m)
-{
- return (val + m - 1) / m;
-}
-
-/** Computes the smallest number larger or equal to value that is a multiple of divisor.
- *
- * @param[in] value Lower bound value
- * @param[in] divisor Value to compute multiple of.
- *
- * @return the result.
- */
-template <typename S, typename T>
-inline auto ceil_to_multiple(S value, T divisor) -> decltype(((value + divisor - 1) / divisor) * divisor)
-{
- ARM_COMPUTE_ERROR_ON(value < 0 || divisor <= 0);
- return DIV_CEIL(value, divisor) * divisor;
-}
+/* Convenience / backwards compatibility includes */
+#include "arm_compute/core/utils/ActivationFunctionUtils.h"
+#include "arm_compute/core/utils/DataLayoutUtils.h"
+#include "arm_compute/core/utils/DataTypeUtils.h"
+#include "arm_compute/core/utils/FormatUtils.h"
+#include "arm_compute/core/utils/InterpolationPolicyUtils.h"
+#include "arm_compute/core/utils/StringUtils.h"
-/** Computes the largest number smaller or equal to value that is a multiple of divisor.
- *
- * @param[in] value Upper bound value
- * @param[in] divisor Value to compute multiple of.
- *
- * @return the result.
- */
-template <typename S, typename T>
-inline auto floor_to_multiple(S value, T divisor) -> decltype((value / divisor) * divisor)
+namespace arm_compute
{
- ARM_COMPUTE_ERROR_ON(value < 0 || divisor <= 0);
- return (value / divisor) * divisor;
-}
+class ITensor;
+class ITensorInfo;
+class ActivationLayerInfo;
/** Load an entire file in memory
*
@@ -93,814 +59,6 @@ inline auto floor_to_multiple(S value, T divisor) -> decltype((value / divisor)
*/
std::string read_file(const std::string &filename, bool binary);
-/** The size in bytes of the data type
- *
- * @param[in] data_type Input data type
- *
- * @return The size in bytes of the data type
- */
-inline size_t data_size_from_type(DataType data_type)
-{
- switch(data_type)
- {
- case DataType::U8:
- case DataType::S8:
- case DataType::QSYMM8:
- case DataType::QASYMM8:
- case DataType::QASYMM8_SIGNED:
- case DataType::QSYMM8_PER_CHANNEL:
- return 1;
- case DataType::U16:
- case DataType::S16:
- case DataType::QSYMM16:
- case DataType::QASYMM16:
- case DataType::BFLOAT16:
- case DataType::F16:
- return 2;
- case DataType::F32:
- case DataType::U32:
- case DataType::S32:
- return 4;
- case DataType::F64:
- case DataType::U64:
- case DataType::S64:
- return 8;
- case DataType::SIZET:
- return sizeof(size_t);
- default:
- ARM_COMPUTE_ERROR("Invalid data type");
- return 0;
- }
-}
-
-/** The size in bytes of the pixel format
- *
- * @param[in] format Input format
- *
- * @return The size in bytes of the pixel format
- */
-inline size_t pixel_size_from_format(Format format)
-{
- switch(format)
- {
- case Format::U8:
- return 1;
- case Format::U16:
- case Format::S16:
- case Format::BFLOAT16:
- case Format::F16:
- case Format::UV88:
- case Format::YUYV422:
- case Format::UYVY422:
- return 2;
- case Format::RGB888:
- return 3;
- case Format::RGBA8888:
- return 4;
- case Format::U32:
- case Format::S32:
- case Format::F32:
- return 4;
- //Doesn't make sense for planar formats:
- case Format::NV12:
- case Format::NV21:
- case Format::IYUV:
- case Format::YUV444:
- default:
- ARM_COMPUTE_ERROR("Undefined pixel size for given format");
- return 0;
- }
-}
-
-/** The size in bytes of the data type
- *
- * @param[in] dt Input data type
- *
- * @return The size in bytes of the data type
- */
-inline size_t element_size_from_data_type(DataType dt)
-{
- switch(dt)
- {
- case DataType::S8:
- case DataType::U8:
- case DataType::QSYMM8:
- case DataType::QASYMM8:
- case DataType::QASYMM8_SIGNED:
- case DataType::QSYMM8_PER_CHANNEL:
- return 1;
- case DataType::U16:
- case DataType::S16:
- case DataType::QSYMM16:
- case DataType::QASYMM16:
- case DataType::BFLOAT16:
- case DataType::F16:
- return 2;
- case DataType::U32:
- case DataType::S32:
- case DataType::F32:
- return 4;
- default:
- ARM_COMPUTE_ERROR("Undefined element size for given data type");
- return 0;
- }
-}
-
-/** Return the data type used by a given single-planar pixel format
- *
- * @param[in] format Input format
- *
- * @return The size in bytes of the pixel format
- */
-inline DataType data_type_from_format(Format format)
-{
- switch(format)
- {
- case Format::U8:
- case Format::UV88:
- case Format::RGB888:
- case Format::RGBA8888:
- case Format::YUYV422:
- case Format::UYVY422:
- return DataType::U8;
- case Format::U16:
- return DataType::U16;
- case Format::S16:
- return DataType::S16;
- case Format::U32:
- return DataType::U32;
- case Format::S32:
- return DataType::S32;
- case Format::BFLOAT16:
- return DataType::BFLOAT16;
- case Format::F16:
- return DataType::F16;
- case Format::F32:
- return DataType::F32;
- //Doesn't make sense for planar formats:
- case Format::NV12:
- case Format::NV21:
- case Format::IYUV:
- case Format::YUV444:
- default:
- ARM_COMPUTE_ERROR("Not supported data_type for given format");
- return DataType::UNKNOWN;
- }
-}
-
-/** Return the plane index of a given channel given an input format.
- *
- * @param[in] format Input format
- * @param[in] channel Input channel
- *
- * @return The plane index of the specific channel of the specific format
- */
-inline int plane_idx_from_channel(Format format, Channel channel)
-{
- switch(format)
- {
- // Single planar formats have a single plane
- case Format::U8:
- case Format::U16:
- case Format::S16:
- case Format::U32:
- case Format::S32:
- case Format::BFLOAT16:
- case Format::F16:
- case Format::F32:
- case Format::UV88:
- case Format::RGB888:
- case Format::RGBA8888:
- case Format::YUYV422:
- case Format::UYVY422:
- return 0;
- // Multi planar formats
- case Format::NV12:
- case Format::NV21:
- {
- // Channel U and V share the same plane of format UV88
- switch(channel)
- {
- case Channel::Y:
- return 0;
- case Channel::U:
- case Channel::V:
- return 1;
- default:
- ARM_COMPUTE_ERROR("Not supported channel");
- return 0;
- }
- }
- case Format::IYUV:
- case Format::YUV444:
- {
- switch(channel)
- {
- case Channel::Y:
- return 0;
- case Channel::U:
- return 1;
- case Channel::V:
- return 2;
- default:
- ARM_COMPUTE_ERROR("Not supported channel");
- return 0;
- }
- }
- default:
- ARM_COMPUTE_ERROR("Not supported format");
- return 0;
- }
-}
-
-/** Return the channel index of a given channel given an input format.
- *
- * @param[in] format Input format
- * @param[in] channel Input channel
- *
- * @return The channel index of the specific channel of the specific format
- */
-inline int channel_idx_from_format(Format format, Channel channel)
-{
- switch(format)
- {
- case Format::RGB888:
- {
- switch(channel)
- {
- case Channel::R:
- return 0;
- case Channel::G:
- return 1;
- case Channel::B:
- return 2;
- default:
- ARM_COMPUTE_ERROR("Not supported channel");
- return 0;
- }
- }
- case Format::RGBA8888:
- {
- switch(channel)
- {
- case Channel::R:
- return 0;
- case Channel::G:
- return 1;
- case Channel::B:
- return 2;
- case Channel::A:
- return 3;
- default:
- ARM_COMPUTE_ERROR("Not supported channel");
- return 0;
- }
- }
- case Format::YUYV422:
- {
- switch(channel)
- {
- case Channel::Y:
- return 0;
- case Channel::U:
- return 1;
- case Channel::V:
- return 3;
- default:
- ARM_COMPUTE_ERROR("Not supported channel");
- return 0;
- }
- }
- case Format::UYVY422:
- {
- switch(channel)
- {
- case Channel::Y:
- return 1;
- case Channel::U:
- return 0;
- case Channel::V:
- return 2;
- default:
- ARM_COMPUTE_ERROR("Not supported channel");
- return 0;
- }
- }
- case Format::NV12:
- {
- switch(channel)
- {
- case Channel::Y:
- return 0;
- case Channel::U:
- return 0;
- case Channel::V:
- return 1;
- default:
- ARM_COMPUTE_ERROR("Not supported channel");
- return 0;
- }
- }
- case Format::NV21:
- {
- switch(channel)
- {
- case Channel::Y:
- return 0;
- case Channel::U:
- return 1;
- case Channel::V:
- return 0;
- default:
- ARM_COMPUTE_ERROR("Not supported channel");
- return 0;
- }
- }
- case Format::YUV444:
- case Format::IYUV:
- {
- switch(channel)
- {
- case Channel::Y:
- return 0;
- case Channel::U:
- return 0;
- case Channel::V:
- return 0;
- default:
- ARM_COMPUTE_ERROR("Not supported channel");
- return 0;
- }
- }
- default:
- ARM_COMPUTE_ERROR("Not supported format");
- return 0;
- }
-}
-
-/** Return the number of planes for a given format
- *
- * @param[in] format Input format
- *
- * @return The number of planes for a given image format.
- */
-inline size_t num_planes_from_format(Format format)
-{
- switch(format)
- {
- case Format::U8:
- case Format::S16:
- case Format::U16:
- case Format::S32:
- case Format::U32:
- case Format::BFLOAT16:
- case Format::F16:
- case Format::F32:
- case Format::RGB888:
- case Format::RGBA8888:
- case Format::YUYV422:
- case Format::UYVY422:
- return 1;
- case Format::NV12:
- case Format::NV21:
- return 2;
- case Format::IYUV:
- case Format::YUV444:
- return 3;
- default:
- ARM_COMPUTE_ERROR("Not supported format");
- return 0;
- }
-}
-
-/** Return the number of channels for a given single-planar pixel format
- *
- * @param[in] format Input format
- *
- * @return The number of channels for a given image format.
- */
-inline size_t num_channels_from_format(Format format)
-{
- switch(format)
- {
- case Format::U8:
- case Format::U16:
- case Format::S16:
- case Format::U32:
- case Format::S32:
- case Format::BFLOAT16:
- case Format::F16:
- case Format::F32:
- return 1;
- // Because the U and V channels are subsampled
- // these formats appear like having only 2 channels:
- case Format::YUYV422:
- case Format::UYVY422:
- return 2;
- case Format::UV88:
- return 2;
- case Format::RGB888:
- return 3;
- case Format::RGBA8888:
- return 4;
- //Doesn't make sense for planar formats:
- case Format::NV12:
- case Format::NV21:
- case Format::IYUV:
- case Format::YUV444:
- default:
- return 0;
- }
-}
-
-/** Return the promoted data type of a given data type.
- *
- * @note If promoted data type is not supported an error will be thrown
- *
- * @param[in] dt Data type to get the promoted type of.
- *
- * @return Promoted data type
- */
-inline DataType get_promoted_data_type(DataType dt)
-{
- switch(dt)
- {
- case DataType::U8:
- return DataType::U16;
- case DataType::S8:
- return DataType::S16;
- case DataType::U16:
- return DataType::U32;
- case DataType::S16:
- return DataType::S32;
- case DataType::QSYMM8:
- case DataType::QASYMM8:
- case DataType::QASYMM8_SIGNED:
- case DataType::QSYMM8_PER_CHANNEL:
- case DataType::QSYMM16:
- case DataType::QASYMM16:
- case DataType::BFLOAT16:
- case DataType::F16:
- case DataType::U32:
- case DataType::S32:
- case DataType::F32:
- ARM_COMPUTE_ERROR("Unsupported data type promotions!");
- default:
- ARM_COMPUTE_ERROR("Undefined data type!");
- }
- return DataType::UNKNOWN;
-}
-
-/** Compute the mininum and maximum values a data type can take
- *
- * @param[in] dt Data type to get the min/max bounds of
- *
- * @return A tuple (min,max) with the minimum and maximum values respectively wrapped in PixelValue.
- */
-inline std::tuple<PixelValue, PixelValue> get_min_max(DataType dt)
-{
- PixelValue min{};
- PixelValue max{};
- switch(dt)
- {
- case DataType::U8:
- case DataType::QASYMM8:
- {
- min = PixelValue(static_cast<int32_t>(std::numeric_limits<uint8_t>::lowest()));
- max = PixelValue(static_cast<int32_t>(std::numeric_limits<uint8_t>::max()));
- break;
- }
- case DataType::S8:
- case DataType::QSYMM8:
- case DataType::QASYMM8_SIGNED:
- case DataType::QSYMM8_PER_CHANNEL:
- {
- min = PixelValue(static_cast<int32_t>(std::numeric_limits<int8_t>::lowest()));
- max = PixelValue(static_cast<int32_t>(std::numeric_limits<int8_t>::max()));
- break;
- }
- case DataType::U16:
- case DataType::QASYMM16:
- {
- min = PixelValue(static_cast<int32_t>(std::numeric_limits<uint16_t>::lowest()));
- max = PixelValue(static_cast<int32_t>(std::numeric_limits<uint16_t>::max()));
- break;
- }
- case DataType::S16:
- case DataType::QSYMM16:
- {
- min = PixelValue(static_cast<int32_t>(std::numeric_limits<int16_t>::lowest()));
- max = PixelValue(static_cast<int32_t>(std::numeric_limits<int16_t>::max()));
- break;
- }
- case DataType::U32:
- {
- min = PixelValue(std::numeric_limits<uint32_t>::lowest());
- max = PixelValue(std::numeric_limits<uint32_t>::max());
- break;
- }
- case DataType::S32:
- {
- min = PixelValue(std::numeric_limits<int32_t>::lowest());
- max = PixelValue(std::numeric_limits<int32_t>::max());
- break;
- }
- case DataType::BFLOAT16:
- {
- min = PixelValue(bfloat16::lowest());
- max = PixelValue(bfloat16::max());
- break;
- }
- case DataType::F16:
- {
- min = PixelValue(std::numeric_limits<half>::lowest());
- max = PixelValue(std::numeric_limits<half>::max());
- break;
- }
- case DataType::F32:
- {
- min = PixelValue(std::numeric_limits<float>::lowest());
- max = PixelValue(std::numeric_limits<float>::max());
- break;
- }
- default:
- ARM_COMPUTE_ERROR("Undefined data type!");
- }
- return std::make_tuple(min, max);
-}
-
-/** Return true if the given format has horizontal subsampling.
- *
- * @param[in] format Format to determine subsampling.
- *
- * @return True if the format can be subsampled horizontaly.
- */
-inline bool has_format_horizontal_subsampling(Format format)
-{
- return (format == Format::YUYV422 || format == Format::UYVY422 || format == Format::NV12 || format == Format::NV21 || format == Format::IYUV || format == Format::UV88) ? true : false;
-}
-
-/** Return true if the given format has vertical subsampling.
- *
- * @param[in] format Format to determine subsampling.
- *
- * @return True if the format can be subsampled verticaly.
- */
-inline bool has_format_vertical_subsampling(Format format)
-{
- return (format == Format::NV12 || format == Format::NV21 || format == Format::IYUV || format == Format::UV88) ? true : false;
-}
-
-/** Separate a 2D convolution into two 1D convolutions
- *
- * @param[in] conv 2D convolution
- * @param[out] conv_col 1D vertical convolution
- * @param[out] conv_row 1D horizontal convolution
- * @param[in] size Size of the 2D convolution
- *
- * @return true if the separation was successful
- */
-inline bool separate_matrix(const int16_t *conv, int16_t *conv_col, int16_t *conv_row, uint8_t size)
-{
- int32_t min_col = -1;
- int16_t min_col_val = -1;
-
- for(int32_t i = 0; i < size; ++i)
- {
- if(conv[i] != 0 && (min_col < 0 || abs(min_col_val) > abs(conv[i])))
- {
- min_col = i;
- min_col_val = conv[i];
- }
- }
-
- if(min_col < 0)
- {
- return false;
- }
-
- for(uint32_t j = 0; j < size; ++j)
- {
- conv_col[j] = conv[min_col + j * size];
- }
-
- for(uint32_t i = 0; i < size; i++)
- {
- if(static_cast<int>(i) == min_col)
- {
- conv_row[i] = 1;
- }
- else
- {
- int16_t coeff = conv[i] / conv[min_col];
-
- for(uint32_t j = 1; j < size; ++j)
- {
- if(conv[i + j * size] != (conv_col[j] * coeff))
- {
- return false;
- }
- }
-
- conv_row[i] = coeff;
- }
- }
-
- return true;
-}
-
-/** Calculate the scale of the given square matrix
- *
- * The scale is the absolute value of the sum of all the coefficients in the matrix.
- *
- * @note If the coefficients add up to 0 then the scale is set to 1.
- *
- * @param[in] matrix Matrix coefficients
- * @param[in] matrix_size Number of elements per side of the square matrix. (Number of coefficients = matrix_size * matrix_size).
- *
- * @return The absolute value of the sum of the coefficients if they don't add up to 0, otherwise 1.
- */
-inline uint32_t calculate_matrix_scale(const int16_t *matrix, unsigned int matrix_size)
-{
- const size_t size = matrix_size * matrix_size;
-
- return std::max(1, std::abs(std::accumulate(matrix, matrix + size, 0)));
-}
-
-/** Adjust tensor shape size if width or height are odd for a given multi-planar format. No modification is done for other formats.
- *
- * @note Adding here a few links discussing the issue of odd size and sharing the same solution:
- * <a href="https://android.googlesource.com/platform/frameworks/base/+/refs/heads/master/graphics/java/android/graphics/YuvImage.java">Android Source</a>
- * <a href="https://groups.google.com/a/webmproject.org/forum/#!topic/webm-discuss/LaCKpqiDTXM">WebM</a>
- * <a href="https://bugs.chromium.org/p/libyuv/issues/detail?id=198&amp;can=1&amp;q=odd%20width">libYUV</a>
- * <a href="https://sourceforge.net/p/raw-yuvplayer/bugs/1/">YUVPlayer</a> *
- *
- * @param[in, out] shape Tensor shape of 2D size
- * @param[in] format Format of the tensor
- *
- * @return The adjusted tensor shape.
- */
-inline TensorShape adjust_odd_shape(const TensorShape &shape, Format format)
-{
- TensorShape output{ shape };
-
- // Force width to be even for formats which require subsampling of the U and V channels
- if(has_format_horizontal_subsampling(format))
- {
- output.set(0, output.x() & ~1U);
- }
-
- // Force height to be even for formats which require subsampling of the U and V channels
- if(has_format_vertical_subsampling(format))
- {
- output.set(1, output.y() & ~1U);
- }
-
- return output;
-}
-
-/** Calculate subsampled shape for a given format and channel
- *
- * @param[in] shape Shape of the tensor to calculate the extracted channel.
- * @param[in] format Format of the tensor.
- * @param[in] channel Channel to create tensor shape to be extracted.
- *
- * @return The subsampled tensor shape.
- */
-inline TensorShape calculate_subsampled_shape(const TensorShape &shape, Format format, Channel channel = Channel::UNKNOWN)
-{
- TensorShape output{ shape };
-
- // Subsample shape only for U or V channel
- if(Channel::U == channel || Channel::V == channel || Channel::UNKNOWN == channel)
- {
- // Subsample width for the tensor shape when channel is U or V
- if(has_format_horizontal_subsampling(format))
- {
- output.set(0, output.x() / 2U);
- }
-
- // Subsample height for the tensor shape when channel is U or V
- if(has_format_vertical_subsampling(format))
- {
- output.set(1, output.y() / 2U);
- }
- }
-
- return output;
-}
-
-/** Calculate accurary required by the horizontal and vertical convolution computations
- *
- * @param[in] conv_col Pointer to the vertical vector of the separated convolution filter
- * @param[in] conv_row Pointer to the horizontal vector of the convolution filter
- * @param[in] size Number of elements per vector of the separated matrix
- *
- * @return The return type is a pair. The first element of the pair is the biggest data type needed for the first stage. The second
- * element of the pair is the biggest data type needed for the second stage.
- */
-inline std::pair<DataType, DataType> data_type_for_convolution(const int16_t *conv_col, const int16_t *conv_row, size_t size)
-{
- DataType first_stage = DataType::UNKNOWN;
- DataType second_stage = DataType::UNKNOWN;
-
- auto gez = [](const int16_t &v)
- {
- return v >= 0;
- };
-
- auto accu_neg = [](const int &first, const int &second)
- {
- return first + (second < 0 ? second : 0);
- };
-
- auto accu_pos = [](const int &first, const int &second)
- {
- return first + (second > 0 ? second : 0);
- };
-
- const bool only_positive_coefficients = std::all_of(conv_row, conv_row + size, gez) && std::all_of(conv_col, conv_col + size, gez);
-
- if(only_positive_coefficients)
- {
- const int max_row_value = std::accumulate(conv_row, conv_row + size, 0) * UINT8_MAX;
- const int max_value = std::accumulate(conv_col, conv_col + size, 0) * max_row_value;
-
- first_stage = (max_row_value <= UINT16_MAX) ? DataType::U16 : DataType::S32;
-
- second_stage = (max_value <= UINT16_MAX) ? DataType::U16 : DataType::S32;
- }
- else
- {
- const int min_row_value = std::accumulate(conv_row, conv_row + size, 0, accu_neg) * UINT8_MAX;
- const int max_row_value = std::accumulate(conv_row, conv_row + size, 0, accu_pos) * UINT8_MAX;
- const int neg_coeffs_sum = std::accumulate(conv_col, conv_col + size, 0, accu_neg);
- const int pos_coeffs_sum = std::accumulate(conv_col, conv_col + size, 0, accu_pos);
- const int min_value = neg_coeffs_sum * max_row_value + pos_coeffs_sum * min_row_value;
- const int max_value = neg_coeffs_sum * min_row_value + pos_coeffs_sum * max_row_value;
-
- first_stage = ((INT16_MIN <= min_row_value) && (max_row_value <= INT16_MAX)) ? DataType::S16 : DataType::S32;
-
- second_stage = ((INT16_MIN <= min_value) && (max_value <= INT16_MAX)) ? DataType::S16 : DataType::S32;
- }
-
- return std::make_pair(first_stage, second_stage);
-}
-
-/** Calculate the accuracy required by the squared convolution calculation.
- *
- *
- * @param[in] conv Pointer to the squared convolution matrix
- * @param[in] size The total size of the convolution matrix
- *
- * @return The return is the biggest data type needed to do the convolution
- */
-inline DataType data_type_for_convolution_matrix(const int16_t *conv, size_t size)
-{
- auto gez = [](const int16_t v)
- {
- return v >= 0;
- };
-
- const bool only_positive_coefficients = std::all_of(conv, conv + size, gez);
-
- if(only_positive_coefficients)
- {
- const int max_conv_value = std::accumulate(conv, conv + size, 0) * UINT8_MAX;
- if(max_conv_value <= UINT16_MAX)
- {
- return DataType::U16;
- }
- else
- {
- return DataType::S32;
- }
- }
- else
- {
- const int min_value = std::accumulate(conv, conv + size, 0, [](int a, int b)
- {
- return b < 0 ? a + b : a;
- })
- * UINT8_MAX;
-
- const int max_value = std::accumulate(conv, conv + size, 0, [](int a, int b)
- {
- return b > 0 ? a + b : a;
- })
- * UINT8_MAX;
-
- if((INT16_MIN <= min_value) && (INT16_MAX >= max_value))
- {
- return DataType::S16;
- }
- else
- {
- return DataType::S32;
- }
- }
-}
-
/** Permutes the given dimensions according the permutation vector
*
* @param[in,out] dimensions Dimensions to be permuted.
@@ -911,7 +69,7 @@ template <typename T>
inline void permute_strides(Dimensions<T> &dimensions, const PermutationVector &perm)
{
const auto old_dim = utility::make_array<Dimensions<T>::num_max_dimensions>(dimensions.begin(), dimensions.end());
- for(unsigned int i = 0; i < perm.num_dimensions(); ++i)
+ for (unsigned int i = 0; i < perm.num_dimensions(); ++i)
{
T dimension_val = old_dim[i];
dimensions.set(perm[i], dimension_val);
@@ -929,7 +87,11 @@ inline void permute_strides(Dimensions<T> &dimensions, const PermutationVector &
*
* @return PadStrideInfo for SAME padding
*/
-PadStrideInfo calculate_same_pad(TensorShape input_shape, TensorShape weights_shape, PadStrideInfo conv_info, DataLayout data_layout = DataLayout::NCHW, const Size2D &dilation = Size2D(1u, 1u),
+PadStrideInfo calculate_same_pad(TensorShape input_shape,
+ TensorShape weights_shape,
+ PadStrideInfo conv_info,
+ DataLayout data_layout = DataLayout::NCHW,
+ const Size2D &dilation = Size2D(1u, 1u),
const DimensionRoundingType &rounding_type = DimensionRoundingType::FLOOR);
/** Returns expected width and height of the deconvolution's output tensor.
@@ -942,8 +104,10 @@ PadStrideInfo calculate_same_pad(TensorShape input_shape, TensorShape weights_sh
*
* @return A pair with the new width in the first position and the new height in the second.
*/
-std::pair<unsigned int, unsigned int> deconvolution_output_dimensions(unsigned int in_width, unsigned int in_height,
- unsigned int kernel_width, unsigned int kernel_height,
+std::pair<unsigned int, unsigned int> deconvolution_output_dimensions(unsigned int in_width,
+ unsigned int in_height,
+ unsigned int kernel_width,
+ unsigned int kernel_height,
const PadStrideInfo &pad_stride_info);
/** Returns expected width and height of output scaled tensor depending on dimensions rounding mode.
@@ -957,11 +121,47 @@ std::pair<unsigned int, unsigned int> deconvolution_output_dimensions(unsigned i
*
* @return A pair with the new width in the first position and the new height in the second.
*/
-std::pair<unsigned int, unsigned int> scaled_dimensions(int width, int height,
- int kernel_width, int kernel_height,
+std::pair<unsigned int, unsigned int> scaled_dimensions(int width,
+ int height,
+ int kernel_width,
+ int kernel_height,
const PadStrideInfo &pad_stride_info,
const Size2D &dilation = Size2D(1U, 1U));
+/** Returns calculated width and height of output scaled tensor depending on dimensions rounding mode.
+ *
+ * @param[in] width Width of input tensor (Number of columns)
+ * @param[in] height Height of input tensor (Number of rows)
+ * @param[in] kernel_width Kernel width.
+ * @param[in] kernel_height Kernel height.
+ * @param[in] pad_stride_info Pad and stride information.
+ *
+ * @return A pair with the new width in the first position and the new height in the second, returned values can be < 1
+ */
+std::pair<int, int> scaled_dimensions_signed(
+ int width, int height, int kernel_width, int kernel_height, const PadStrideInfo &pad_stride_info);
+
+/** Returns calculated width, height and depth of output scaled tensor depending on dimensions rounding mode.
+ *
+ * @param[in] width Width of input tensor
+ * @param[in] height Height of input tensor
+ * @param[in] depth Depth of input tensor
+ * @param[in] kernel_width Kernel width.
+ * @param[in] kernel_height Kernel height.
+ * @param[in] kernel_depth Kernel depth.
+ * @param[in] pool3d_info Pad and stride and round information for 3d pooling
+ *
+ * @return A tuple with the new width in the first position, the new height in the second, and the new depth in the third.
+ * Returned values can be < 1
+ */
+std::tuple<int, int, int> scaled_3d_dimensions_signed(int width,
+ int height,
+ int depth,
+ int kernel_width,
+ int kernel_height,
+ int kernel_depth,
+ const Pooling3dLayerInfo &pool3d_info);
+
/** Check if the given reduction operation should be handled in a serial way.
*
* @param[in] op Reduction operation to perform
@@ -981,16 +181,6 @@ bool needs_serialized_reduction(ReductionOperation op, DataType dt, unsigned int
*/
QuantizationInfo get_softmax_output_quantization_info(DataType input_type, bool is_log);
-/** Returns resize ratio between input and output with consideration of aligned corners
- *
- * @param[in] input_size The input size
- * @param[in] output_size the output size
- * @param[in] align_corners True to align corners of input and output. Defaults to false.
- *
- * @return The ratio between input and output (i.e., the input size divided by the output size)
- */
-float calculate_resize_ratio(size_t input_size, size_t output_size, bool align_corners = false);
-
/** Returns a pair of minimum and maximum values for a quantized activation
*
* @param[in] act_info The information for activation
@@ -999,15 +189,9 @@ float calculate_resize_ratio(size_t input_size, size_t output_size, bool align_c
*
* @return The pair with minimum and maximum values
*/
-std::pair<int32_t, int32_t> get_quantized_activation_min_max(ActivationLayerInfo act_info, DataType data_type, UniformQuantizationInfo oq_info);
-
-/** Convert a tensor format into a string.
- *
- * @param[in] format @ref Format to be translated to string.
- *
- * @return The string describing the format.
- */
-const std::string &string_from_format(Format format);
+std::pair<int32_t, int32_t> get_quantized_activation_min_max(const ActivationLayerInfo &act_info,
+ DataType data_type,
+ UniformQuantizationInfo oq_info);
/** Convert a channel identity into a string.
*
@@ -1016,48 +200,7 @@ const std::string &string_from_format(Format format);
* @return The string describing the channel.
*/
const std::string &string_from_channel(Channel channel);
-/** Convert a data layout identity into a string.
- *
- * @param[in] dl @ref DataLayout to be translated to string.
- *
- * @return The string describing the data layout.
- */
-const std::string &string_from_data_layout(DataLayout dl);
-/** Convert a data type identity into a string.
- *
- * @param[in] dt @ref DataType to be translated to string.
- *
- * @return The string describing the data type.
- */
-const std::string &string_from_data_type(DataType dt);
-/** Convert a matrix pattern into a string.
- *
- * @param[in] pattern @ref MatrixPattern to be translated to string.
- *
- * @return The string describing the matrix pattern.
- */
-const std::string &string_from_matrix_pattern(MatrixPattern pattern);
-/** Translates a given activation function to a string.
- *
- * @param[in] act @ref ActivationLayerInfo::ActivationFunction to be translated to string.
- *
- * @return The string describing the activation function.
- */
-const std::string &string_from_activation_func(ActivationLayerInfo::ActivationFunction act);
-/** Translates a given non linear function to a string.
- *
- * @param[in] function @ref NonLinearFilterFunction to be translated to string.
- *
- * @return The string describing the non linear function.
- */
-const std::string &string_from_non_linear_filter_function(NonLinearFilterFunction function);
-/** Translates a given interpolation policy to a string.
- *
- * @param[in] policy @ref InterpolationPolicy to be translated to string.
- *
- * @return The string describing the interpolation policy.
- */
-const std::string &string_from_interpolation_policy(InterpolationPolicy policy);
+
/** Translates a given border mode policy to a string.
*
* @param[in] border_mode @ref BorderMode to be translated to string.
@@ -1079,162 +222,67 @@ const std::string &string_from_norm_type(NormType type);
* @return The string describing the pooling type.
*/
const std::string &string_from_pooling_type(PoolingType type);
-/** Translates a given GEMMLowp output stage to a string.
- *
- * @param[in] output_stage @ref GEMMLowpOutputStageInfo to be translated to string.
- *
- * @return The string describing the GEMMLowp output stage
- */
-const std::string &string_from_gemmlowp_output_stage(GEMMLowpOutputStageType output_stage);
-/** Convert a PixelValue to a string, represented through the specific data type
- *
- * @param[in] value The PixelValue to convert
- * @param[in] data_type The type to be used to convert the @p value
+/** Check if the pool region is entirely outside the input tensor
*
- * @return String representation of the PixelValue through the given data type.
- */
-std::string string_from_pixel_value(const PixelValue &value, const DataType data_type);
-/** Lower a given string.
+ * @param[in] info @ref PoolingLayerInfo to be checked.
*
- * @param[in] val Given string to lower.
- *
- * @return The lowered string
+ * @return True if the pool region is entirely outside the input tensor, False otherwise.
*/
-std::string lower_string(const std::string &val);
-
-/** Check if a given data type is of floating point type
+bool is_pool_region_entirely_outside_input(const PoolingLayerInfo &info);
+/** Check if the 3d pool region is entirely outside the input tensor
*
- * @param[in] dt Input data type.
+ * @param[in] info @ref Pooling3dLayerInfo to be checked.
*
- * @return True if data type is of floating point type, else false.
+ * @return True if the pool region is entirely outside the input tensor, False otherwise.
*/
-inline bool is_data_type_float(DataType dt)
-{
- switch(dt)
- {
- case DataType::F16:
- case DataType::F32:
- return true;
- default:
- return false;
- }
-}
-
-/** Check if a given data type is of quantized type
- *
- * @note Quantized is considered a super-set of fixed-point and asymmetric data types.
+bool is_pool_3d_region_entirely_outside_input(const Pooling3dLayerInfo &info);
+/** Check if the 3D padding is symmetric i.e. padding in each opposite sides are euqal (left=right, top=bottom and front=back)
*
- * @param[in] dt Input data type.
+ * @param[in] info @ref Padding3D input 3D padding object to check if it is symmetric
*
- * @return True if data type is of quantized type, else false.
+ * @return True if padding is symmetric
*/
-inline bool is_data_type_quantized(DataType dt)
+inline bool is_symmetric(const Padding3D &info)
{
- switch(dt)
- {
- case DataType::QSYMM8:
- case DataType::QASYMM8:
- case DataType::QASYMM8_SIGNED:
- case DataType::QSYMM8_PER_CHANNEL:
- case DataType::QSYMM16:
- case DataType::QASYMM16:
- return true;
- default:
- return false;
- }
+ return ((info.left == info.right) && (info.top == info.bottom) && (info.front == info.back));
}
-
-/** Check if a given data type is of asymmetric quantized type
+/** Translates a given GEMMLowp output stage to a string.
*
- * @param[in] dt Input data type.
+ * @param[in] output_stage @ref GEMMLowpOutputStageInfo to be translated to string.
*
- * @return True if data type is of asymmetric quantized type, else false.
+ * @return The string describing the GEMMLowp output stage
*/
-inline bool is_data_type_quantized_asymmetric(DataType dt)
-{
- switch(dt)
- {
- case DataType::QASYMM8:
- case DataType::QASYMM8_SIGNED:
- case DataType::QASYMM16:
- return true;
- default:
- return false;
- }
-}
-
-/** Check if a given data type is of asymmetric quantized signed type
+const std::string &string_from_gemmlowp_output_stage(GEMMLowpOutputStageType output_stage);
+/** Convert a PixelValue to a string, represented through the specific data type
*
- * @param[in] dt Input data type.
+ * @param[in] value The PixelValue to convert
+ * @param[in] data_type The type to be used to convert the @p value
*
- * @return True if data type is of asymmetric quantized signed type, else false.
+ * @return String representation of the PixelValue through the given data type.
*/
-inline bool is_data_type_quantized_asymmetric_signed(DataType dt)
-{
- switch(dt)
- {
- case DataType::QASYMM8_SIGNED:
- return true;
- default:
- return false;
- }
-}
+std::string string_from_pixel_value(const PixelValue &value, const DataType data_type);
-/** Check if a given data type is of symmetric quantized type
+/** Stores padding information before configuring a kernel
*
- * @param[in] dt Input data type.
+ * @param[in] infos list of tensor infos to store the padding info for
*
- * @return True if data type is of symmetric quantized type, else false.
+ * @return An unordered map where each tensor info pointer is paired with its original padding info
*/
-inline bool is_data_type_quantized_symmetric(DataType dt)
-{
- switch(dt)
- {
- case DataType::QSYMM8:
- case DataType::QSYMM8_PER_CHANNEL:
- case DataType::QSYMM16:
- return true;
- default:
- return false;
- }
-}
-
-/** Check if a given data type is of per channel type
+std::unordered_map<const ITensorInfo *, PaddingSize> get_padding_info(std::initializer_list<const ITensorInfo *> infos);
+/** Stores padding information before configuring a kernel
*
- * @param[in] dt Input data type.
+ * @param[in] tensors list of tensors to store the padding info for
*
- * @return True if data type is of per channel type, else false.
+ * @return An unordered map where each tensor info pointer is paired with its original padding info
*/
-inline bool is_data_type_quantized_per_channel(DataType dt)
-{
- switch(dt)
- {
- case DataType::QSYMM8_PER_CHANNEL:
- return true;
- default:
- return false;
- }
-}
-
-/** Create a string with the float in full precision.
+std::unordered_map<const ITensorInfo *, PaddingSize> get_padding_info(std::initializer_list<const ITensor *> tensors);
+/** Check if the previously stored padding info has changed after configuring a kernel
*
- * @param val Floating point value
+ * @param[in] padding_map an unordered map where each tensor info pointer is paired with its original padding info
*
- * @return String with the floating point value.
+ * @return true if any of the tensor infos has changed its paddings
*/
-inline std::string float_to_string_with_full_precision(float val)
-{
- std::stringstream ss;
- ss.precision(std::numeric_limits<float>::max_digits10);
- ss << val;
-
- if(val != static_cast<int>(val))
- {
- ss << "f";
- }
-
- return ss.str();
-}
+bool has_padding_changed(const std::unordered_map<const ITensorInfo *, PaddingSize> &padding_map);
/** Returns the number of elements required to go from start to end with the wanted step
*
@@ -1250,67 +298,6 @@ inline size_t num_of_elements_in_range(const float start, const float end, const
return size_t(std::ceil((end - start) / step));
}
-/** Returns true if the value can be represented by the given data type
- *
- * @param[in] val value to be checked
- * @param[in] dt data type that is checked
- * @param[in] qinfo (Optional) quantization info if the data type is QASYMM8
- *
- * @return true if the data type can hold the value.
- */
-template <typename T>
-bool check_value_range(T val, DataType dt, QuantizationInfo qinfo = QuantizationInfo())
-{
- switch(dt)
- {
- case DataType::U8:
- {
- const auto val_u8 = static_cast<uint8_t>(val);
- return ((val_u8 == val) && val_u8 >= std::numeric_limits<uint8_t>::lowest() && val_u8 <= std::numeric_limits<uint8_t>::max());
- }
- case DataType::QASYMM8:
- {
- double min = static_cast<double>(dequantize_qasymm8(0, qinfo));
- double max = static_cast<double>(dequantize_qasymm8(std::numeric_limits<uint8_t>::max(), qinfo));
- return ((double)val >= min && (double)val <= max);
- }
- case DataType::S8:
- {
- const auto val_s8 = static_cast<int8_t>(val);
- return ((val_s8 == val) && val_s8 >= std::numeric_limits<int8_t>::lowest() && val_s8 <= std::numeric_limits<int8_t>::max());
- }
- case DataType::U16:
- {
- const auto val_u16 = static_cast<uint16_t>(val);
- return ((val_u16 == val) && val_u16 >= std::numeric_limits<uint16_t>::lowest() && val_u16 <= std::numeric_limits<uint16_t>::max());
- }
- case DataType::S16:
- {
- const auto val_s16 = static_cast<int16_t>(val);
- return ((val_s16 == val) && val_s16 >= std::numeric_limits<int16_t>::lowest() && val_s16 <= std::numeric_limits<int16_t>::max());
- }
- case DataType::U32:
- {
- const auto val_u32 = static_cast<uint32_t>(val);
- return ((val_u32 == val) && val_u32 >= std::numeric_limits<uint32_t>::lowest() && val_u32 <= std::numeric_limits<uint32_t>::max());
- }
- case DataType::S32:
- {
- const auto val_s32 = static_cast<int32_t>(val);
- return ((val_s32 == val) && val_s32 >= std::numeric_limits<int32_t>::lowest() && val_s32 <= std::numeric_limits<int32_t>::max());
- }
- case DataType::BFLOAT16:
- return (val >= bfloat16::lowest() && val <= bfloat16::max());
- case DataType::F16:
- return (val >= std::numeric_limits<half>::lowest() && val <= std::numeric_limits<half>::max());
- case DataType::F32:
- return (val >= std::numeric_limits<float>::lowest() && val <= std::numeric_limits<float>::max());
- default:
- ARM_COMPUTE_ERROR("Data type not supported");
- return false;
- }
-}
-
#ifdef ARM_COMPUTE_ASSERTS_ENABLED
/** Print consecutive elements to an output stream.
*
@@ -1321,26 +308,27 @@ bool check_value_range(T val, DataType dt, QuantizationInfo qinfo = Quantization
* @param[in] element_delim (Optional) Delimeter among the consecutive elements. Defaults to space delimeter
*/
template <typename T>
-void print_consecutive_elements_impl(std::ostream &s, const T *ptr, unsigned int n, int stream_width = 0, const std::string &element_delim = " ")
+void print_consecutive_elements_impl(
+ std::ostream &s, const T *ptr, unsigned int n, int stream_width = 0, const std::string &element_delim = " ")
{
using print_type = typename std::conditional<std::is_floating_point<T>::value, T, int>::type;
std::ios stream_status(nullptr);
stream_status.copyfmt(s);
- for(unsigned int i = 0; i < n; ++i)
+ for (unsigned int i = 0; i < n; ++i)
{
// Set stream width as it is not a "sticky" stream manipulator
- if(stream_width != 0)
+ if (stream_width != 0)
{
s.width(stream_width);
}
- if(std::is_same<typename std::decay<T>::type, half>::value)
+ if (std::is_same<typename std::decay<T>::type, half>::value)
{
// We use T instead of print_type here is because the std::is_floating_point<half> returns false and then the print_type becomes int.
s << std::right << static_cast<T>(ptr[i]) << element_delim;
}
- else if(std::is_same<typename std::decay<T>::type, bfloat16>::value)
+ else if (std::is_same<typename std::decay<T>::type, bfloat16>::value)
{
// We use T instead of print_type here is because the std::is_floating_point<bfloat16> returns false and then the print_type becomes int.
s << std::right << float(ptr[i]) << element_delim;
@@ -1369,17 +357,17 @@ int max_consecutive_elements_display_width_impl(std::ostream &s, const T *ptr, u
using print_type = typename std::conditional<std::is_floating_point<T>::value, T, int>::type;
int max_width = -1;
- for(unsigned int i = 0; i < n; ++i)
+ for (unsigned int i = 0; i < n; ++i)
{
std::stringstream ss;
ss.copyfmt(s);
- if(std::is_same<typename std::decay<T>::type, half>::value)
+ if (std::is_same<typename std::decay<T>::type, half>::value)
{
// We use T instead of print_type here is because the std::is_floating_point<half> returns false and then the print_type becomes int.
ss << static_cast<T>(ptr[i]);
}
- else if(std::is_same<typename std::decay<T>::type, bfloat16>::value)
+ else if (std::is_same<typename std::decay<T>::type, bfloat16>::value)
{
// We use T instead of print_type here is because the std::is_floating_point<bfloat> returns false and then the print_type becomes int.
ss << float(ptr[i]);
@@ -1403,7 +391,12 @@ int max_consecutive_elements_display_width_impl(std::ostream &s, const T *ptr, u
* @param[in] stream_width (Optional) Width of the stream. If set to 0 the element's width is used. Defaults to 0.
* @param[in] element_delim (Optional) Delimeter among the consecutive elements. Defaults to space delimeter
*/
-void print_consecutive_elements(std::ostream &s, DataType dt, const uint8_t *ptr, unsigned int n, int stream_width, const std::string &element_delim = " ");
+void print_consecutive_elements(std::ostream &s,
+ DataType dt,
+ const uint8_t *ptr,
+ unsigned int n,
+ int stream_width,
+ const std::string &element_delim = " ");
/** Identify the maximum width of n consecutive elements.
*
@@ -1416,5 +409,5 @@ void print_consecutive_elements(std::ostream &s, DataType dt, const uint8_t *ptr
*/
int max_consecutive_elements_display_width(std::ostream &s, DataType dt, const uint8_t *ptr, unsigned int n);
#endif /* ARM_COMPUTE_ASSERTS_ENABLED */
-}
+} // namespace arm_compute
#endif /*ARM_COMPUTE_UTILS_H */
diff --git a/arm_compute/core/Validate.h b/arm_compute/core/Validate.h
index bbea5e5575..5550560aff 100644
--- a/arm_compute/core/Validate.h
+++ b/arm_compute/core/Validate.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,12 +25,12 @@
#define ARM_COMPUTE_VALIDATE_H
#include "arm_compute/core/Error.h"
-#include "arm_compute/core/HOGInfo.h"
#include "arm_compute/core/IKernel.h"
-#include "arm_compute/core/IMultiHOG.h"
-#include "arm_compute/core/IMultiImage.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/MultiImageInfo.h"
+#include "arm_compute/core/QuantizationInfo.h"
+#include "arm_compute/core/utils/DataLayoutUtils.h"
+#include "arm_compute/core/utils/DataTypeUtils.h"
+#include "arm_compute/core/utils/FormatUtils.h"
#include "arm_compute/core/Window.h"
#include <algorithm>
@@ -50,9 +50,9 @@ namespace detail
template <typename T>
inline bool have_different_dimensions(const Dimensions<T> &dim1, const Dimensions<T> &dim2, unsigned int upper_dim)
{
- for(unsigned int i = upper_dim; i < arm_compute::Dimensions<T>::num_max_dimensions; ++i)
+ for (unsigned int i = upper_dim; i < arm_compute::Dimensions<T>::num_max_dimensions; ++i)
{
- if(dim1[i] != dim2[i])
+ if (dim1[i] != dim2[i])
{
return true;
}
@@ -80,7 +80,7 @@ public:
* @param[in] line Source code line. Used for error reporting.
*/
compare_dimension(const Dimensions<T> &dim, const char *function, const char *file, int line)
- : _dim{ dim }, _function{ function }, _file{ file }, _line{ line }
+ : _dim{dim}, _function{function}, _file{file}, _line{line}
{
}
@@ -111,7 +111,7 @@ inline arm_compute::Status for_each_error(F &&)
}
template <typename F, typename T, typename... Ts>
-inline arm_compute::Status for_each_error(F &&func, T &&arg, Ts &&... args)
+inline arm_compute::Status for_each_error(F &&func, T &&arg, Ts &&...args)
{
ARM_COMPUTE_RETURN_ON_ERROR(func(arg));
ARM_COMPUTE_RETURN_ON_ERROR(for_each_error(func, args...));
@@ -148,13 +148,11 @@ struct get_tensor_info_t<ITensorInfo *>
* @return Status
*/
template <typename... Ts>
-inline arm_compute::Status error_on_nullptr(const char *function, const char *file, const int line, Ts &&... pointers)
+inline arm_compute::Status error_on_nullptr(const char *function, const char *file, const int line, Ts &&...pointers)
{
- const std::array<const void *, sizeof...(Ts)> pointers_array{ { std::forward<Ts>(pointers)... } };
- bool has_nullptr = std::any_of(pointers_array.begin(), pointers_array.end(), [&](const void *ptr)
- {
- return (ptr == nullptr);
- });
+ const std::array<const void *, sizeof...(Ts)> pointers_array{{std::forward<Ts>(pointers)...}};
+ bool has_nullptr =
+ std::any_of(pointers_array.begin(), pointers_array.end(), [&](const void *ptr) { return (ptr == nullptr); });
ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(has_nullptr, function, file, line, "Nullptr object!");
return arm_compute::Status{};
}
@@ -178,8 +176,8 @@ inline arm_compute::Status error_on_nullptr(const char *function, const char *fi
*
* @return Status
*/
-arm_compute::Status error_on_mismatching_windows(const char *function, const char *file, const int line,
- const Window &full, const Window &win);
+arm_compute::Status error_on_mismatching_windows(
+ const char *function, const char *file, const int line, const Window &full, const Window &win);
#define ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(f, w) \
ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_mismatching_windows(__func__, __FILE__, __LINE__, f, w))
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_WINDOWS(f, w) \
@@ -200,8 +198,8 @@ arm_compute::Status error_on_mismatching_windows(const char *function, const cha
*
* @return Status
*/
-arm_compute::Status error_on_invalid_subwindow(const char *function, const char *file, const int line,
- const Window &full, const Window &sub);
+arm_compute::Status error_on_invalid_subwindow(
+ const char *function, const char *file, const int line, const Window &full, const Window &sub);
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s) \
ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_invalid_subwindow(__func__, __FILE__, __LINE__, f, s))
#define ARM_COMPUTE_RETURN_ERROR_ON_INVALID_SUBWINDOW(f, s) \
@@ -220,12 +218,14 @@ arm_compute::Status error_on_invalid_subwindow(const char *function, const char
*
* @return Status
*/
-arm_compute::Status error_on_window_not_collapsable_at_dimension(const char *function, const char *file, const int line,
- const Window &full, const Window &window, const int dim);
+arm_compute::Status error_on_window_not_collapsable_at_dimension(
+ const char *function, const char *file, const int line, const Window &full, const Window &window, const int dim);
#define ARM_COMPUTE_ERROR_ON_WINDOW_NOT_COLLAPSABLE_AT_DIMENSION(f, w, d) \
- ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_window_not_collapsable_at_dimension(__func__, __FILE__, __LINE__, f, w, d))
+ ARM_COMPUTE_ERROR_THROW_ON( \
+ ::arm_compute::error_on_window_not_collapsable_at_dimension(__func__, __FILE__, __LINE__, f, w, d))
#define ARM_COMPUTE_RETURN_ERROR_ON_WINDOW_NOT_COLLAPSABLE_AT_DIMENSION(f, w, d) \
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_window_not_collapsable_at_dimension(__func__, __FILE__, __LINE__, f, w, d))
+ ARM_COMPUTE_RETURN_ON_ERROR( \
+ ::arm_compute::error_on_window_not_collapsable_at_dimension(__func__, __FILE__, __LINE__, f, w, d))
/** Return an error if the passed coordinates have too many dimensions.
*
@@ -239,8 +239,8 @@ arm_compute::Status error_on_window_not_collapsable_at_dimension(const char *fun
*
* @return Status
*/
-arm_compute::Status error_on_coordinates_dimensions_gte(const char *function, const char *file, const int line,
- const Coordinates &pos, unsigned int max_dim);
+arm_compute::Status error_on_coordinates_dimensions_gte(
+ const char *function, const char *file, const int line, const Coordinates &pos, unsigned int max_dim);
#define ARM_COMPUTE_ERROR_ON_COORDINATES_DIMENSIONS_GTE(p, md) \
ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_coordinates_dimensions_gte(__func__, __FILE__, __LINE__, p, md))
#define ARM_COMPUTE_RETURN_ERROR_ON_COORDINATES_DIMENSIONS_GTE(p, md) \
@@ -258,8 +258,8 @@ arm_compute::Status error_on_coordinates_dimensions_gte(const char *function, co
*
* @return Status
*/
-arm_compute::Status error_on_window_dimensions_gte(const char *function, const char *file, const int line,
- const Window &win, unsigned int max_dim);
+arm_compute::Status error_on_window_dimensions_gte(
+ const char *function, const char *file, const int line, const Window &win, unsigned int max_dim);
#define ARM_COMPUTE_ERROR_ON_WINDOW_DIMENSIONS_GTE(w, md) \
ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_window_dimensions_gte(__func__, __FILE__, __LINE__, w, md))
#define ARM_COMPUTE_RETURN_ERROR_ON_WINDOW_DIMENSIONS_GTE(w, md) \
@@ -277,16 +277,82 @@ arm_compute::Status error_on_window_dimensions_gte(const char *function, const c
* @return Status
*/
template <typename T, typename... Ts>
-arm_compute::Status error_on_mismatching_dimensions(const char *function, const char *file, int line,
- const Dimensions<T> &dim1, const Dimensions<T> &dim2, Ts &&... dims)
+arm_compute::Status error_on_mismatching_dimensions(const char *function,
+ const char *file,
+ int line,
+ const Dimensions<T> &dim1,
+ const Dimensions<T> &dim2,
+ Ts &&...dims)
{
- ARM_COMPUTE_RETURN_ON_ERROR(detail::for_each_error(detail::compare_dimension<T>(dim1, function, file, line), dim2, std::forward<Ts>(dims)...));
+ ARM_COMPUTE_RETURN_ON_ERROR(detail::for_each_error(detail::compare_dimension<T>(dim1, function, file, line), dim2,
+ std::forward<Ts>(dims)...));
return arm_compute::Status{};
}
#define ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(...) \
- ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_mismatching_dimensions(__func__, __FILE__, __LINE__, __VA_ARGS__))
+ ARM_COMPUTE_ERROR_THROW_ON( \
+ ::arm_compute::error_on_mismatching_dimensions(__func__, __FILE__, __LINE__, __VA_ARGS__))
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(...) \
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_dimensions(__func__, __FILE__, __LINE__, __VA_ARGS__))
+ ARM_COMPUTE_RETURN_ON_ERROR( \
+ ::arm_compute::error_on_mismatching_dimensions(__func__, __FILE__, __LINE__, __VA_ARGS__))
+
+/** Return true if the given format has horizontal subsampling.
+ *
+ * @param[in] format Format to determine subsampling.
+ *
+ * @return True if the format can be subsampled horizontaly.
+ */
+inline bool has_format_horizontal_subsampling(Format format)
+{
+ return (format == Format::YUYV422 || format == Format::UYVY422 || format == Format::NV12 ||
+ format == Format::NV21 || format == Format::IYUV || format == Format::UV88)
+ ? true
+ : false;
+}
+
+/** Return true if the given format has vertical subsampling.
+ *
+ * @param[in] format Format to determine subsampling.
+ *
+ * @return True if the format can be subsampled verticaly.
+ */
+inline bool has_format_vertical_subsampling(Format format)
+{
+ return (format == Format::NV12 || format == Format::NV21 || format == Format::IYUV || format == Format::UV88)
+ ? true
+ : false;
+}
+
+/** Adjust tensor shape size if width or height are odd for a given multi-planar format. No modification is done for other formats.
+ *
+ * @note Adding here a few links discussing the issue of odd size and sharing the same solution:
+ * <a href="https://android.googlesource.com/platform/frameworks/base/+/refs/heads/master/graphics/java/android/graphics/YuvImage.java">Android Source</a>
+ * <a href="https://groups.google.com/a/webmproject.org/forum/#!topic/webm-discuss/LaCKpqiDTXM">WebM</a>
+ * <a href="https://bugs.chromium.org/p/libyuv/issues/detail?id=198&amp;can=1&amp;q=odd%20width">libYUV</a>
+ * <a href="https://sourceforge.net/p/raw-yuvplayer/bugs/1/">YUVPlayer</a> *
+ *
+ * @param[in, out] shape Tensor shape of 2D size
+ * @param[in] format Format of the tensor
+ *
+ * @return The adjusted tensor shape.
+ */
+inline TensorShape adjust_odd_shape(const TensorShape &shape, Format format)
+{
+ TensorShape output{shape};
+
+ // Force width to be even for formats which require subsampling of the U and V channels
+ if (has_format_horizontal_subsampling(format))
+ {
+ output.set(0, (output.x() + 1) & ~1U);
+ }
+
+ // Force height to be even for formats which require subsampling of the U and V channels
+ if (has_format_vertical_subsampling(format))
+ {
+ output.set(1, (output.y() + 1) & ~1U);
+ }
+
+ return output;
+}
/** Return an error if the passed tensor objects are not even.
*
@@ -300,18 +366,20 @@ arm_compute::Status error_on_mismatching_dimensions(const char *function, const
* @return Status
*/
template <typename... Ts>
-arm_compute::Status error_on_tensors_not_even(const char *function, const char *file, int line,
- const Format &format, const ITensor *tensor1, Ts... tensors)
+arm_compute::Status error_on_tensors_not_even(
+ const char *function, const char *file, int line, const Format &format, const ITensor *tensor1, Ts... tensors)
{
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor1 == nullptr, function, file, line);
ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, std::forward<Ts>(tensors)...));
- const std::array < const ITensor *, 1 + sizeof...(Ts) > tensors_info_array{ { tensor1, std::forward<Ts>(tensors)... } };
- ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensors_info_array.cbegin(), tensors_info_array.cend(), [&](const ITensor * tensor)
- {
- const TensorShape correct_shape = adjust_odd_shape(tensor->info()->tensor_shape(), format);
- return detail::have_different_dimensions(tensor->info()->tensor_shape(), correct_shape, 2);
- }),
- function, file, line, "Tensor shape has odd dimensions");
+ const std::array<const ITensor *, 1 + sizeof...(Ts)> tensors_info_array{{tensor1, std::forward<Ts>(tensors)...}};
+ ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(
+ std::any_of(tensors_info_array.cbegin(), tensors_info_array.cend(),
+ [&](const ITensor *tensor)
+ {
+ const TensorShape correct_shape = adjust_odd_shape(tensor->info()->tensor_shape(), format);
+ return detail::have_different_dimensions(tensor->info()->tensor_shape(), correct_shape, 2);
+ }),
+ function, file, line, "Tensor shape has odd dimensions");
return arm_compute::Status{};
}
@@ -320,6 +388,38 @@ arm_compute::Status error_on_tensors_not_even(const char *function, const char *
#define ARM_COMPUTE_RETURN_ERROR_ON_TENSORS_NOT_EVEN(...) \
ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_tensors_not_even(__func__, __FILE__, __LINE__, __VA_ARGS__))
+/** Calculate subsampled shape for a given format and channel
+ *
+ * @param[in] shape Shape of the tensor to calculate the extracted channel.
+ * @param[in] format Format of the tensor.
+ * @param[in] channel Channel to create tensor shape to be extracted.
+ *
+ * @return The subsampled tensor shape.
+ */
+inline TensorShape
+calculate_subsampled_shape(const TensorShape &shape, Format format, Channel channel = Channel::UNKNOWN)
+{
+ TensorShape output{shape};
+
+ // Subsample shape only for U or V channel
+ if (Channel::U == channel || Channel::V == channel || Channel::UNKNOWN == channel)
+ {
+ // Subsample width for the tensor shape when channel is U or V
+ if (has_format_horizontal_subsampling(format))
+ {
+ output.set(0, output.x() / 2U);
+ }
+
+ // Subsample height for the tensor shape when channel is U or V
+ if (has_format_vertical_subsampling(format))
+ {
+ output.set(1, output.y() / 2U);
+ }
+ }
+
+ return output;
+}
+
/** Return an error if the passed tensor objects are not sub-sampled.
*
* @param[in] function Function in which the error occurred.
@@ -333,25 +433,32 @@ arm_compute::Status error_on_tensors_not_even(const char *function, const char *
* @return Status
*/
template <typename... Ts>
-arm_compute::Status error_on_tensors_not_subsampled(const char *function, const char *file, int line,
- const Format &format, const TensorShape &shape, const ITensor *tensor1, Ts... tensors)
+arm_compute::Status error_on_tensors_not_subsampled(const char *function,
+ const char *file,
+ int line,
+ const Format &format,
+ const TensorShape &shape,
+ const ITensor *tensor1,
+ Ts... tensors)
{
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor1 == nullptr, function, file, line);
ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, std::forward<Ts>(tensors)...));
- const TensorShape sub2_shape = calculate_subsampled_shape(shape, format);
- const std::array < const ITensor *, 1 + sizeof...(Ts) > tensors_info_array{ { tensor1, std::forward<Ts>(tensors)... } };
- ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensors_info_array.cbegin(), tensors_info_array.cend(), [&](const ITensor * tensor)
- {
- return detail::have_different_dimensions(tensor->info()->tensor_shape(), sub2_shape, 2);
- }),
- function, file, line, "Tensor shape has mismatch dimensions for sub-sampling");
+ const TensorShape sub2_shape = calculate_subsampled_shape(shape, format);
+ const std::array<const ITensor *, 1 + sizeof...(Ts)> tensors_info_array{{tensor1, std::forward<Ts>(tensors)...}};
+ ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(
+ std::any_of(tensors_info_array.cbegin(), tensors_info_array.cend(),
+ [&](const ITensor *tensor)
+ { return detail::have_different_dimensions(tensor->info()->tensor_shape(), sub2_shape, 2); }),
+ function, file, line, "Tensor shape has mismatch dimensions for sub-sampling");
return arm_compute::Status{};
}
#define ARM_COMPUTE_ERROR_ON_TENSORS_NOT_SUBSAMPLED(...) \
- ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_tensors_not_subsampled(__func__, __FILE__, __LINE__, __VA_ARGS__))
+ ARM_COMPUTE_ERROR_THROW_ON( \
+ ::arm_compute::error_on_tensors_not_subsampled(__func__, __FILE__, __LINE__, __VA_ARGS__))
#define ARM_COMPUTE_RETURN_ERROR_ON_TENSORS_NOT_SUBSAMPLED(...) \
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_tensors_not_subsampled(__func__, __FILE__, __LINE__, __VA_ARGS__))
+ ARM_COMPUTE_RETURN_ON_ERROR( \
+ ::arm_compute::error_on_tensors_not_subsampled(__func__, __FILE__, __LINE__, __VA_ARGS__))
/** Return an error if the passed two tensor infos have different shapes from the given dimension
*
@@ -365,10 +472,15 @@ arm_compute::Status error_on_tensors_not_subsampled(const char *function, const
* @return Status
*/
template <typename... Ts>
-inline arm_compute::Status error_on_mismatching_shapes(const char *function, const char *file, const int line,
- const ITensorInfo *tensor_info_1, const ITensorInfo *tensor_info_2, Ts... tensor_infos)
+inline arm_compute::Status error_on_mismatching_shapes(const char *function,
+ const char *file,
+ const int line,
+ const ITensorInfo *tensor_info_1,
+ const ITensorInfo *tensor_info_2,
+ Ts... tensor_infos)
{
- return error_on_mismatching_shapes(function, file, line, 0U, tensor_info_1, tensor_info_2, std::forward<Ts>(tensor_infos)...);
+ return error_on_mismatching_shapes(function, file, line, 0U, tensor_info_1, tensor_info_2,
+ std::forward<Ts>(tensor_infos)...);
}
/** Return an error if the passed two tensors have different shapes from the given dimension
*
@@ -382,8 +494,12 @@ inline arm_compute::Status error_on_mismatching_shapes(const char *function, con
* @return Status
*/
template <typename... Ts>
-inline arm_compute::Status error_on_mismatching_shapes(const char *function, const char *file, const int line,
- const ITensor *tensor_1, const ITensor *tensor_2, Ts... tensors)
+inline arm_compute::Status error_on_mismatching_shapes(const char *function,
+ const char *file,
+ const int line,
+ const ITensor *tensor_1,
+ const ITensor *tensor_2,
+ Ts... tensors)
{
return error_on_mismatching_shapes(function, file, line, 0U, tensor_1, tensor_2, std::forward<Ts>(tensors)...);
}
@@ -400,19 +516,28 @@ inline arm_compute::Status error_on_mismatching_shapes(const char *function, con
* @return Status
*/
template <typename... Ts>
-inline arm_compute::Status error_on_mismatching_shapes(const char *function, const char *file, const int line,
- unsigned int upper_dim, const ITensorInfo *tensor_info_1, const ITensorInfo *tensor_info_2, Ts... tensor_infos)
+inline arm_compute::Status error_on_mismatching_shapes(const char *function,
+ const char *file,
+ const int line,
+ unsigned int upper_dim,
+ const ITensorInfo *tensor_info_1,
+ const ITensorInfo *tensor_info_2,
+ Ts... tensor_infos)
{
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info_1 == nullptr, function, file, line);
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info_2 == nullptr, function, file, line);
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, std::forward<Ts>(tensor_infos)...));
+ ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, tensor_infos...));
- const std::array < const ITensorInfo *, 2 + sizeof...(Ts) > tensors_info_array{ { tensor_info_1, tensor_info_2, std::forward<Ts>(tensor_infos)... } };
- ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(std::next(tensors_info_array.cbegin()), tensors_info_array.cend(), [&](const ITensorInfo * tensor_info)
- {
- return detail::have_different_dimensions((*tensors_info_array.cbegin())->tensor_shape(), tensor_info->tensor_shape(), upper_dim);
- }),
- function, file, line, "Tensors have different shapes");
+ const std::array<const ITensorInfo *, 2 + sizeof...(Ts)> tensors_info_array{
+ {tensor_info_1, tensor_info_2, tensor_infos...}};
+ ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(std::next(tensors_info_array.cbegin()), tensors_info_array.cend(),
+ [&](const ITensorInfo *tensor_info)
+ {
+ return detail::have_different_dimensions(
+ (*tensors_info_array.cbegin())->tensor_shape(),
+ tensor_info->tensor_shape(), upper_dim);
+ }),
+ function, file, line, "Tensors have different shapes");
return arm_compute::Status{};
}
/** Return an error if the passed two tensors have different shapes from the given dimension
@@ -428,14 +553,20 @@ inline arm_compute::Status error_on_mismatching_shapes(const char *function, con
* @return Status
*/
template <typename... Ts>
-inline arm_compute::Status error_on_mismatching_shapes(const char *function, const char *file, const int line,
- unsigned int upper_dim, const ITensor *tensor_1, const ITensor *tensor_2, Ts... tensors)
+inline arm_compute::Status error_on_mismatching_shapes(const char *function,
+ const char *file,
+ const int line,
+ unsigned int upper_dim,
+ const ITensor *tensor_1,
+ const ITensor *tensor_2,
+ Ts... tensors)
{
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_1 == nullptr, function, file, line);
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_2 == nullptr, function, file, line);
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, std::forward<Ts>(tensors)...));
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_shapes(function, file, line, upper_dim, tensor_1->info(), tensor_2->info(),
- detail::get_tensor_info_t<ITensorInfo *>()(tensors)...));
+ ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, tensors...));
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ ::arm_compute::error_on_mismatching_shapes(function, file, line, upper_dim, tensor_1->info(), tensor_2->info(),
+ detail::get_tensor_info_t<ITensorInfo *>()(tensors)...));
return arm_compute::Status{};
}
#define ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(...) \
@@ -454,19 +585,18 @@ inline arm_compute::Status error_on_mismatching_shapes(const char *function, con
* @return Status
*/
template <typename... Ts>
-inline arm_compute::Status error_on_mismatching_data_layouts(const char *function, const char *file, const int line,
- const ITensorInfo *tensor_info, Ts... tensor_infos)
+inline arm_compute::Status error_on_mismatching_data_layouts(
+ const char *function, const char *file, const int line, const ITensorInfo *tensor_info, Ts... tensor_infos)
{
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info == nullptr, function, file, line);
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, std::forward<Ts>(tensor_infos)...));
+ ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, tensor_infos...));
- DataLayout &&tensor_data_layout = tensor_info->data_layout();
- const std::array<const ITensorInfo *, sizeof...(Ts)> tensors_infos_array{ { std::forward<Ts>(tensor_infos)... } };
- ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensors_infos_array.begin(), tensors_infos_array.end(), [&](const ITensorInfo * tensor_info_obj)
- {
- return tensor_info_obj->data_layout() != tensor_data_layout;
- }),
- function, file, line, "Tensors have different data layouts");
+ DataLayout &&tensor_data_layout = tensor_info->data_layout();
+ const std::array<const ITensorInfo *, sizeof...(Ts)> tensors_infos_array{{tensor_infos...}};
+ ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensors_infos_array.begin(), tensors_infos_array.end(),
+ [&](const ITensorInfo *tensor_info_obj)
+ { return tensor_info_obj->data_layout() != tensor_data_layout; }),
+ function, file, line, "Tensors have different data layouts");
return arm_compute::Status{};
}
/** Return an error if the passed tensors have different data layouts
@@ -480,19 +610,21 @@ inline arm_compute::Status error_on_mismatching_data_layouts(const char *functio
* @return Status
*/
template <typename... Ts>
-inline arm_compute::Status error_on_mismatching_data_layouts(const char *function, const char *file, const int line,
- const ITensor *tensor, Ts... tensors)
+inline arm_compute::Status error_on_mismatching_data_layouts(
+ const char *function, const char *file, const int line, const ITensor *tensor, Ts... tensors)
{
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line);
ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, std::forward<Ts>(tensors)...));
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_data_layouts(function, file, line, tensor->info(),
- detail::get_tensor_info_t<ITensorInfo *>()(tensors)...));
+ ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_data_layouts(
+ function, file, line, tensor->info(), detail::get_tensor_info_t<ITensorInfo *>()(tensors)...));
return arm_compute::Status{};
}
#define ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_LAYOUT(...) \
- ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_mismatching_data_layouts(__func__, __FILE__, __LINE__, __VA_ARGS__))
+ ARM_COMPUTE_ERROR_THROW_ON( \
+ ::arm_compute::error_on_mismatching_data_layouts(__func__, __FILE__, __LINE__, __VA_ARGS__))
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(...) \
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_data_layouts(__func__, __FILE__, __LINE__, __VA_ARGS__))
+ ARM_COMPUTE_RETURN_ON_ERROR( \
+ ::arm_compute::error_on_mismatching_data_layouts(__func__, __FILE__, __LINE__, __VA_ARGS__))
/** Return an error if the passed two tensor infos have different data types
*
@@ -505,19 +637,18 @@ inline arm_compute::Status error_on_mismatching_data_layouts(const char *functio
* @return Status
*/
template <typename... Ts>
-inline arm_compute::Status error_on_mismatching_data_types(const char *function, const char *file, const int line,
- const ITensorInfo *tensor_info, Ts... tensor_infos)
+inline arm_compute::Status error_on_mismatching_data_types(
+ const char *function, const char *file, const int line, const ITensorInfo *tensor_info, Ts... tensor_infos)
{
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info == nullptr, function, file, line);
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, std::forward<Ts>(tensor_infos)...));
+ ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, tensor_infos...));
- DataType &&tensor_data_type = tensor_info->data_type();
- const std::array<const ITensorInfo *, sizeof...(Ts)> tensors_infos_array{ { std::forward<Ts>(tensor_infos)... } };
- ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensors_infos_array.begin(), tensors_infos_array.end(), [&](const ITensorInfo * tensor_info_obj)
- {
- return tensor_info_obj->data_type() != tensor_data_type;
- }),
- function, file, line, "Tensors have different data types");
+ DataType &&tensor_data_type = tensor_info->data_type();
+ const std::array<const ITensorInfo *, sizeof...(Ts)> tensors_infos_array{{tensor_infos...}};
+ ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensors_infos_array.begin(), tensors_infos_array.end(),
+ [&](const ITensorInfo *tensor_info_obj)
+ { return tensor_info_obj->data_type() != tensor_data_type; }),
+ function, file, line, "Tensors have different data types");
return arm_compute::Status{};
}
/** Return an error if the passed two tensors have different data types
@@ -531,19 +662,21 @@ inline arm_compute::Status error_on_mismatching_data_types(const char *function,
* @return Status
*/
template <typename... Ts>
-inline arm_compute::Status error_on_mismatching_data_types(const char *function, const char *file, const int line,
- const ITensor *tensor, Ts... tensors)
+inline arm_compute::Status error_on_mismatching_data_types(
+ const char *function, const char *file, const int line, const ITensor *tensor, Ts... tensors)
{
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line);
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, std::forward<Ts>(tensors)...));
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_data_types(function, file, line, tensor->info(),
- detail::get_tensor_info_t<ITensorInfo *>()(tensors)...));
+ ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, tensors...));
+ ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_data_types(
+ function, file, line, tensor->info(), detail::get_tensor_info_t<ITensorInfo *>()(tensors)...));
return arm_compute::Status{};
}
#define ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(...) \
- ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_mismatching_data_types(__func__, __FILE__, __LINE__, __VA_ARGS__))
+ ARM_COMPUTE_ERROR_THROW_ON( \
+ ::arm_compute::error_on_mismatching_data_types(__func__, __FILE__, __LINE__, __VA_ARGS__))
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...) \
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_data_types(__func__, __FILE__, __LINE__, __VA_ARGS__))
+ ARM_COMPUTE_RETURN_ON_ERROR( \
+ ::arm_compute::error_on_mismatching_data_types(__func__, __FILE__, __LINE__, __VA_ARGS__))
/** Return an error if the passed tensor infos have different asymmetric quantized data types or different quantization info
*
@@ -559,28 +692,32 @@ inline arm_compute::Status error_on_mismatching_data_types(const char *function,
* @return Status
*/
template <typename... Ts>
-inline arm_compute::Status error_on_mismatching_quantization_info(const char *function, const char *file, const int line,
- const ITensorInfo *tensor_info_1, const ITensorInfo *tensor_info_2, Ts... tensor_infos)
+inline arm_compute::Status error_on_mismatching_quantization_info(const char *function,
+ const char *file,
+ const int line,
+ const ITensorInfo *tensor_info_1,
+ const ITensorInfo *tensor_info_2,
+ Ts... tensor_infos)
{
DataType &&first_data_type = tensor_info_1->data_type();
const QuantizationInfo first_quantization_info = tensor_info_1->quantization_info();
- if(!is_data_type_quantized(first_data_type))
+ if (!is_data_type_quantized(first_data_type))
{
return arm_compute::Status{};
}
- const std::array < const ITensorInfo *, 1 + sizeof...(Ts) > tensor_infos_array{ { tensor_info_2, std::forward<Ts>(tensor_infos)... } };
- ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensor_infos_array.begin(), tensor_infos_array.end(), [&](const ITensorInfo * tensor_info)
- {
- return tensor_info->data_type() != first_data_type;
- }),
- function, file, line, "Tensors have different asymmetric quantized data types");
- ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensor_infos_array.begin(), tensor_infos_array.end(), [&](const ITensorInfo * tensor_info)
- {
- return tensor_info->quantization_info() != first_quantization_info;
- }),
- function, file, line, "Tensors have different quantization information");
+ const std::array<const ITensorInfo *, 1 + sizeof...(Ts)> tensor_infos_array{
+ {tensor_info_2, std::forward<Ts>(tensor_infos)...}};
+ ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensor_infos_array.begin(), tensor_infos_array.end(),
+ [&](const ITensorInfo *tensor_info)
+ { return tensor_info->data_type() != first_data_type; }),
+ function, file, line, "Tensors have different asymmetric quantized data types");
+ ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(
+ std::any_of(tensor_infos_array.begin(), tensor_infos_array.end(),
+ [&](const ITensorInfo *tensor_info)
+ { return tensor_info->quantization_info() != first_quantization_info; }),
+ function, file, line, "Tensors have different quantization information");
return arm_compute::Status{};
}
@@ -598,17 +735,24 @@ inline arm_compute::Status error_on_mismatching_quantization_info(const char *fu
* @return Status
*/
template <typename... Ts>
-inline arm_compute::Status error_on_mismatching_quantization_info(const char *function, const char *file, const int line,
- const ITensor *tensor_1, const ITensor *tensor_2, Ts... tensors)
+inline arm_compute::Status error_on_mismatching_quantization_info(const char *function,
+ const char *file,
+ const int line,
+ const ITensor *tensor_1,
+ const ITensor *tensor_2,
+ Ts... tensors)
{
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_quantization_info(function, file, line, tensor_1->info(), tensor_2->info(),
- detail::get_tensor_info_t<ITensorInfo *>()(tensors)...));
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ ::arm_compute::error_on_mismatching_quantization_info(function, file, line, tensor_1->info(), tensor_2->info(),
+ detail::get_tensor_info_t<ITensorInfo *>()(tensors)...));
return arm_compute::Status{};
}
#define ARM_COMPUTE_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(...) \
- ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_mismatching_quantization_info(__func__, __FILE__, __LINE__, __VA_ARGS__))
+ ARM_COMPUTE_ERROR_THROW_ON( \
+ ::arm_compute::error_on_mismatching_quantization_info(__func__, __FILE__, __LINE__, __VA_ARGS__))
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(...) \
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_quantization_info(__func__, __FILE__, __LINE__, __VA_ARGS__))
+ ARM_COMPUTE_RETURN_ON_ERROR( \
+ ::arm_compute::error_on_mismatching_quantization_info(__func__, __FILE__, __LINE__, __VA_ARGS__))
/** Throw an error if the format of the passed tensor/multi-image does not match any of the formats provided.
*
@@ -620,8 +764,8 @@ inline arm_compute::Status error_on_mismatching_quantization_info(const char *fu
* @param[in] formats (Optional) Further allowed formats.
*/
template <typename T, typename F, typename... Fs>
-void error_on_format_not_in(const char *function, const char *file, const int line,
- const T *object, F &&format, Fs &&... formats)
+void error_on_format_not_in(
+ const char *function, const char *file, const int line, const T *object, F &&format, Fs &&...formats)
{
ARM_COMPUTE_ERROR_ON_LOC(object == nullptr, function, file, line);
@@ -630,17 +774,17 @@ void error_on_format_not_in(const char *function, const char *file, const int li
ARM_COMPUTE_ERROR_ON_LOC(object_format == Format::UNKNOWN, function, file, line);
- const std::array<F, sizeof...(Fs)> formats_array{ { std::forward<Fs>(formats)... } };
+ const std::array<F, sizeof...(Fs)> formats_array{{std::forward<Fs>(formats)...}};
ARM_COMPUTE_UNUSED(formats_array);
- ARM_COMPUTE_ERROR_ON_LOC_MSG(object_format != format && std::none_of(formats_array.begin(), formats_array.end(), [&](const F & f)
- {
- return f == object_format;
- }),
- function, file, line, "Format %s not supported by this kernel", string_from_format(object_format).c_str());
+ ARM_COMPUTE_ERROR_ON_LOC_MSG(
+ object_format != format &&
+ std::none_of(formats_array.begin(), formats_array.end(), [&](const F &f) { return f == object_format; }),
+ function, file, line, "Format %s not supported by this kernel", string_from_format(object_format).c_str());
ARM_COMPUTE_UNUSED(function, format, file, line);
}
-#define ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(t, ...) ::arm_compute::error_on_format_not_in(__func__, __FILE__, __LINE__, t, __VA_ARGS__)
+#define ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(t, ...) \
+ ::arm_compute::error_on_format_not_in(__func__, __FILE__, __LINE__, t, __VA_ARGS__)
/** Return an error if the data type of the passed tensor info does not match any of the data types provided.
*
@@ -654,20 +798,19 @@ void error_on_format_not_in(const char *function, const char *file, const int li
* @return Status
*/
template <typename T, typename... Ts>
-inline arm_compute::Status error_on_data_type_not_in(const char *function, const char *file, const int line,
- const ITensorInfo *tensor_info, T &&dt, Ts &&... dts)
+inline arm_compute::Status error_on_data_type_not_in(
+ const char *function, const char *file, const int line, const ITensorInfo *tensor_info, T &&dt, Ts &&...dts)
{
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info == nullptr, function, file, line);
const DataType &tensor_dt = tensor_info->data_type(); //NOLINT
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_dt == DataType::UNKNOWN, function, file, line);
- const std::array<T, sizeof...(Ts)> dts_array{ { std::forward<Ts>(dts)... } };
- ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG_VAR(tensor_dt != dt && std::none_of(dts_array.begin(), dts_array.end(), [&](const T & d)
- {
- return d == tensor_dt;
- }),
- function, file, line, "ITensor data type %s not supported by this kernel", string_from_data_type(tensor_dt).c_str());
+ const std::array<T, sizeof...(Ts)> dts_array{{std::forward<Ts>(dts)...}};
+ ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG_VAR(
+ tensor_dt != dt && std::none_of(dts_array.begin(), dts_array.end(), [&](const T &d) { return d == tensor_dt; }),
+ function, file, line, "ITensor data type %s not supported by this kernel",
+ string_from_data_type(tensor_dt).c_str());
return arm_compute::Status{};
}
/** Return an error if the data type of the passed tensor does not match any of the data types provided.
@@ -682,11 +825,12 @@ inline arm_compute::Status error_on_data_type_not_in(const char *function, const
* @return Status
*/
template <typename T, typename... Ts>
-inline arm_compute::Status error_on_data_type_not_in(const char *function, const char *file, const int line,
- const ITensor *tensor, T &&dt, Ts &&... dts)
+inline arm_compute::Status error_on_data_type_not_in(
+ const char *function, const char *file, const int line, const ITensor *tensor, T &&dt, Ts &&...dts)
{
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line);
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_data_type_not_in(function, file, line, tensor->info(), std::forward<T>(dt), std::forward<Ts>(dts)...));
+ ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_data_type_not_in(
+ function, file, line, tensor->info(), std::forward<T>(dt), std::forward<Ts>(dts)...));
return arm_compute::Status{};
}
#define ARM_COMPUTE_ERROR_ON_DATA_TYPE_NOT_IN(t, ...) \
@@ -706,20 +850,19 @@ inline arm_compute::Status error_on_data_type_not_in(const char *function, const
* @return Status
*/
template <typename T, typename... Ts>
-inline arm_compute::Status error_on_data_layout_not_in(const char *function, const char *file, const int line,
- const ITensorInfo *tensor_info, T &&dl, Ts &&... dls)
+inline arm_compute::Status error_on_data_layout_not_in(
+ const char *function, const char *file, const int line, const ITensorInfo *tensor_info, T &&dl, Ts &&...dls)
{
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info == nullptr, function, file, line);
const DataLayout &tensor_dl = tensor_info->data_layout(); //NOLINT
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_dl == DataLayout::UNKNOWN, function, file, line);
- const std::array<T, sizeof...(Ts)> dls_array{ { std::forward<Ts>(dls)... } };
- ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG_VAR(tensor_dl != dl && std::none_of(dls_array.begin(), dls_array.end(), [&](const T & l)
- {
- return l == tensor_dl;
- }),
- function, file, line, "ITensor data layout %s not supported by this kernel", string_from_data_layout(tensor_dl).c_str());
+ const std::array<T, sizeof...(Ts)> dls_array{{std::forward<Ts>(dls)...}};
+ ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG_VAR(
+ tensor_dl != dl && std::none_of(dls_array.begin(), dls_array.end(), [&](const T &l) { return l == tensor_dl; }),
+ function, file, line, "ITensor data layout %s not supported by this kernel",
+ string_from_data_layout(tensor_dl).c_str());
return arm_compute::Status{};
}
/** Return an error if the data layout of the passed tensor does not match any of the data layout provided.
@@ -734,17 +877,19 @@ inline arm_compute::Status error_on_data_layout_not_in(const char *function, con
* @return Status
*/
template <typename T, typename... Ts>
-inline arm_compute::Status error_on_data_layout_not_in(const char *function, const char *file, const int line,
- const ITensor *tensor, T &&dl, Ts &&... dls)
+inline arm_compute::Status error_on_data_layout_not_in(
+ const char *function, const char *file, const int line, const ITensor *tensor, T &&dl, Ts &&...dls)
{
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line);
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_data_layout_not_in(function, file, line, tensor->info(), std::forward<T>(dl), std::forward<Ts>(dls)...));
+ ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_data_layout_not_in(
+ function, file, line, tensor->info(), std::forward<T>(dl), std::forward<Ts>(dls)...));
return arm_compute::Status{};
}
#define ARM_COMPUTE_ERROR_ON_DATA_LAYOUT_NOT_IN(t, ...) \
ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_data_layout_not_in(__func__, __FILE__, __LINE__, t, __VA_ARGS__))
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(t, ...) \
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_data_layout_not_in(__func__, __FILE__, __LINE__, t, __VA_ARGS__))
+ ARM_COMPUTE_RETURN_ON_ERROR( \
+ ::arm_compute::error_on_data_layout_not_in(__func__, __FILE__, __LINE__, t, __VA_ARGS__))
/** Return an error if the data type or the number of channels of the passed tensor info does not match any of the data types and number of channels provided.
*
@@ -759,12 +904,20 @@ inline arm_compute::Status error_on_data_layout_not_in(const char *function, con
* @return Status
*/
template <typename T, typename... Ts>
-inline arm_compute::Status error_on_data_type_channel_not_in(const char *function, const char *file, const int line,
- const ITensorInfo *tensor_info, size_t num_channels, T &&dt, Ts &&... dts)
+inline arm_compute::Status error_on_data_type_channel_not_in(const char *function,
+ const char *file,
+ const int line,
+ const ITensorInfo *tensor_info,
+ size_t num_channels,
+ T &&dt,
+ Ts &&...dts)
{
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_data_type_not_in(function, file, line, tensor_info, std::forward<T>(dt), std::forward<Ts>(dts)...));
+ ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_data_type_not_in(
+ function, file, line, tensor_info, std::forward<T>(dt), std::forward<Ts>(dts)...));
const size_t tensor_nc = tensor_info->num_channels();
- ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG_VAR(tensor_nc != num_channels, function, file, line, "Number of channels %zu. Required number of channels %zu", tensor_nc, num_channels);
+ ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG_VAR(tensor_nc != num_channels, function, file, line,
+ "Number of channels %zu. Required number of channels %zu", tensor_nc,
+ num_channels);
return arm_compute::Status{};
}
/** Return an error if the data type or the number of channels of the passed tensor does not match any of the data types and number of channels provided.
@@ -780,17 +933,25 @@ inline arm_compute::Status error_on_data_type_channel_not_in(const char *functio
* @return Status
*/
template <typename T, typename... Ts>
-inline arm_compute::Status error_on_data_type_channel_not_in(const char *function, const char *file, const int line,
- const ITensor *tensor, size_t num_channels, T &&dt, Ts &&... dts)
+inline arm_compute::Status error_on_data_type_channel_not_in(const char *function,
+ const char *file,
+ const int line,
+ const ITensor *tensor,
+ size_t num_channels,
+ T &&dt,
+ Ts &&...dts)
{
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line);
- ARM_COMPUTE_RETURN_ON_ERROR(error_on_data_type_channel_not_in(function, file, line, tensor->info(), num_channels, std::forward<T>(dt), std::forward<Ts>(dts)...));
+ ARM_COMPUTE_RETURN_ON_ERROR(error_on_data_type_channel_not_in(function, file, line, tensor->info(), num_channels,
+ std::forward<T>(dt), std::forward<Ts>(dts)...));
return arm_compute::Status{};
}
#define ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c, ...) \
- ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_data_type_channel_not_in(__func__, __FILE__, __LINE__, t, c, __VA_ARGS__))
+ ARM_COMPUTE_ERROR_THROW_ON( \
+ ::arm_compute::error_on_data_type_channel_not_in(__func__, __FILE__, __LINE__, t, c, __VA_ARGS__))
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c, ...) \
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_data_type_channel_not_in(__func__, __FILE__, __LINE__, t, c, __VA_ARGS__))
+ ARM_COMPUTE_RETURN_ON_ERROR( \
+ ::arm_compute::error_on_data_type_channel_not_in(__func__, __FILE__, __LINE__, t, c, __VA_ARGS__))
/** Return an error if the data type of the passed tensor info is FP16 and FP16 extension is not supported by the device.
*
@@ -802,12 +963,12 @@ inline arm_compute::Status error_on_data_type_channel_not_in(const char *functio
*
* @return Status
*/
-inline arm_compute::Status error_on_unsupported_fp16(const char *function, const char *file, const int line,
- const ITensorInfo *tensor_info, bool is_fp16_supported)
+inline arm_compute::Status error_on_unsupported_fp16(
+ const char *function, const char *file, const int line, const ITensorInfo *tensor_info, bool is_fp16_supported)
{
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info == nullptr, function, file, line);
- ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG((tensor_info->data_type() == DataType::F16 && !is_fp16_supported),
- function, file, line, "FP16 not supported by the device");
+ ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG((tensor_info->data_type() == DataType::F16 && !is_fp16_supported), function,
+ file, line, "FP16 not supported by the device");
return arm_compute::Status{};
}
@@ -821,11 +982,12 @@ inline arm_compute::Status error_on_unsupported_fp16(const char *function, const
*
* @return Status
*/
-inline arm_compute::Status error_on_unsupported_fp16(const char *function, const char *file, const int line,
- const ITensor *tensor, bool is_fp16_supported)
+inline arm_compute::Status error_on_unsupported_fp16(
+ const char *function, const char *file, const int line, const ITensor *tensor, bool is_fp16_supported)
{
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line);
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_fp16(function, file, line, tensor->info(), is_fp16_supported));
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ ::arm_compute::error_on_unsupported_fp16(function, file, line, tensor->info(), is_fp16_supported));
return arm_compute::Status{};
}
@@ -838,8 +1000,8 @@ inline arm_compute::Status error_on_unsupported_fp16(const char *function, const
*
* @return Status
*/
-arm_compute::Status error_on_tensor_not_2d(const char *function, const char *file, const int line,
- const ITensor *tensor);
+arm_compute::Status
+error_on_tensor_not_2d(const char *function, const char *file, const int line, const ITensor *tensor);
/** Return an error if the tensor info is not 2D.
*
@@ -850,8 +1012,8 @@ arm_compute::Status error_on_tensor_not_2d(const char *function, const char *fil
*
* @return Status
*/
-arm_compute::Status error_on_tensor_not_2d(const char *function, const char *file, const int line,
- const ITensorInfo *tensor);
+arm_compute::Status
+error_on_tensor_not_2d(const char *function, const char *file, const int line, const ITensorInfo *tensor);
#define ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(t) \
ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_tensor_not_2d(__func__, __FILE__, __LINE__, t))
@@ -870,17 +1032,15 @@ arm_compute::Status error_on_tensor_not_2d(const char *function, const char *fil
* @return Status
*/
template <typename T, typename... Ts>
-inline arm_compute::Status error_on_channel_not_in(const char *function, const char *file, const int line,
- T cn, T &&channel, Ts &&... channels)
+inline arm_compute::Status
+error_on_channel_not_in(const char *function, const char *file, const int line, T cn, T &&channel, Ts &&...channels)
{
ARM_COMPUTE_RETURN_ERROR_ON_LOC(cn == Channel::UNKNOWN, function, file, line);
- const std::array<T, sizeof...(Ts)> channels_array{ { std::forward<Ts>(channels)... } };
- ARM_COMPUTE_RETURN_ERROR_ON_LOC(channel != cn && std::none_of(channels_array.begin(), channels_array.end(), [&](const T & f)
- {
- return f == cn;
- }),
- function, file, line);
+ const std::array<T, sizeof...(Ts)> channels_array{{std::forward<Ts>(channels)...}};
+ ARM_COMPUTE_RETURN_ERROR_ON_LOC(channel != cn && std::none_of(channels_array.begin(), channels_array.end(),
+ [&](const T &f) { return f == cn; }),
+ function, file, line);
return arm_compute::Status{};
}
#define ARM_COMPUTE_ERROR_ON_CHANNEL_NOT_IN(c, ...) \
@@ -898,35 +1058,13 @@ inline arm_compute::Status error_on_channel_not_in(const char *function, const c
*
* @return Status
*/
-arm_compute::Status error_on_channel_not_in_known_format(const char *function, const char *file, const int line,
- Format fmt, Channel cn);
+arm_compute::Status
+error_on_channel_not_in_known_format(const char *function, const char *file, const int line, Format fmt, Channel cn);
#define ARM_COMPUTE_ERROR_ON_CHANNEL_NOT_IN_KNOWN_FORMAT(f, c) \
ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_channel_not_in_known_format(__func__, __FILE__, __LINE__, f, c))
#define ARM_COMPUTE_RETURN_ERROR_ON_CHANNEL_NOT_IN_KNOWN_FORMAT(f, c) \
ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_channel_not_in_known_format(__func__, __FILE__, __LINE__, f, c))
-/** Return an error if the @ref IMultiHOG container is invalid
- *
- * An @ref IMultiHOG container is invalid if:
- *
- * -# it is a nullptr
- * -# it doesn't contain models
- * -# it doesn't have the HOG data objects with the same phase_type, normalization_type and l2_hyst_threshold (if normalization_type == L2HYS_NORM)
- *
- * @param[in] function Function in which the error occurred.
- * @param[in] file Name of the file where the error occurred.
- * @param[in] line Line on which the error occurred.
- * @param[in] multi_hog IMultiHOG container to validate
- *
- * @return Status
- */
-arm_compute::Status error_on_invalid_multi_hog(const char *function, const char *file, const int line,
- const IMultiHOG *multi_hog);
-#define ARM_COMPUTE_ERROR_ON_INVALID_MULTI_HOG(m) \
- ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_invalid_multi_hog(__func__, __FILE__, __LINE__, m))
-#define ARM_COMPUTE_RETURN_ERROR_ON_INVALID_MULTI_HOG(m) \
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_invalid_multi_hog(__func__, __FILE__, __LINE__, m))
-
/** Return an error if the kernel is not configured.
*
* @param[in] function Function in which the error occurred.
@@ -936,8 +1074,8 @@ arm_compute::Status error_on_invalid_multi_hog(const char *function, const char
*
* @return Status
*/
-arm_compute::Status error_on_unconfigured_kernel(const char *function, const char *file, const int line,
- const IKernel *kernel);
+arm_compute::Status
+error_on_unconfigured_kernel(const char *function, const char *file, const int line, const IKernel *kernel);
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k) \
ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_unconfigured_kernel(__func__, __FILE__, __LINE__, k))
#define ARM_COMPUTE_RETURN_ERROR_ON_UNCONFIGURED_KERNEL(k) \
@@ -954,8 +1092,12 @@ arm_compute::Status error_on_unconfigured_kernel(const char *function, const cha
*
* @return Status
*/
-arm_compute::Status error_on_invalid_subtensor(const char *function, const char *file, const int line,
- const TensorShape &parent_shape, const Coordinates &coords, const TensorShape &shape);
+arm_compute::Status error_on_invalid_subtensor(const char *function,
+ const char *file,
+ const int line,
+ const TensorShape &parent_shape,
+ const Coordinates &coords,
+ const TensorShape &shape);
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBTENSOR(p, c, s) \
ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_invalid_subtensor(__func__, __FILE__, __LINE__, p, c, s))
#define ARM_COMPUTE_RETURN_ERROR_ON_INVALID_SUBTENSOR(p, c, s) \
@@ -971,11 +1113,16 @@ arm_compute::Status error_on_invalid_subtensor(const char *function, const char
*
* @return Status
*/
-arm_compute::Status error_on_invalid_subtensor_valid_region(const char *function, const char *file, const int line,
- const ValidRegion &parent_valid_region, const ValidRegion &valid_region);
+arm_compute::Status error_on_invalid_subtensor_valid_region(const char *function,
+ const char *file,
+ const int line,
+ const ValidRegion &parent_valid_region,
+ const ValidRegion &valid_region);
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBTENSOR_VALID_REGION(pv, sv) \
- ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_invalid_subtensor_valid_region(__func__, __FILE__, __LINE__, pv, sv))
+ ARM_COMPUTE_ERROR_THROW_ON( \
+ ::arm_compute::error_on_invalid_subtensor_valid_region(__func__, __FILE__, __LINE__, pv, sv))
#define ARM_COMPUTE_RETURN_ERROR_ON_INVALID_SUBTENSOR_VALID_REGION(pv, sv) \
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_invalid_subtensor_valid_region(__func__, __FILE__, __LINE__, pv, sv))
-}
+ ARM_COMPUTE_RETURN_ON_ERROR( \
+ ::arm_compute::error_on_invalid_subtensor_valid_region(__func__, __FILE__, __LINE__, pv, sv))
+} // namespace arm_compute
#endif /* ARM_COMPUTE_VALIDATE_H*/
diff --git a/arm_compute/core/Version.h b/arm_compute/core/Version.h
index be3f0264bb..44d400bad8 100644
--- a/arm_compute/core/Version.h
+++ b/arm_compute/core/Version.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020 ARM Limited.
+ * Copyright (c) 2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -27,12 +27,12 @@
#include <string>
/* Macro utilities */
-#define STRINGIFY2(s) #s
-#define STRINGIFY(s) STRINGIFY2(s)
+#define ARM_COMPUTE_STRINGIFY2(s) #s
+#define ARM_COMPUTE_STRINGIFY(s) ARM_COMPUTE_STRINGIFY2(s)
-#define ARM_COMPUTE_VERSION_STR \
- STRINGIFY(ARM_COMPUTE_VERSION_MAJOR) \
- "." STRINGIFY(ARM_COMPUTE_VERSION_MINOR) "." STRINGIFY(ARM_COMPUTE_VERSION_PATCH)
+#define ARM_COMPUTE_VERSION_STR \
+ ARM_COMPUTE_STRINGIFY(ARM_COMPUTE_VERSION_MAJOR) \
+ "." ARM_COMPUTE_STRINGIFY(ARM_COMPUTE_VERSION_MINOR) "." ARM_COMPUTE_STRINGIFY(ARM_COMPUTE_VERSION_PATCH)
namespace arm_compute
{
@@ -45,4 +45,7 @@ namespace arm_compute
std::string build_information();
} // namespace arm_compute
+#undef ARM_COMPUTE_STRINGIFY
+#undef ARM_COMPUTE_STRINGIFY2
+
#endif /* ARM_COMPUTE_LIBRARY_VERSION_H */
diff --git a/arm_compute/core/Window.h b/arm_compute/core/Window.h
index d6690d484a..e93d2863c9 100644
--- a/arm_compute/core/Window.h
+++ b/arm_compute/core/Window.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020, 2022-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,17 +21,17 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_WINDOW_H
-#define ARM_COMPUTE_WINDOW_H
-
-#include <algorithm>
-#include <array>
-#include <cstddef>
+#ifndef ACL_ARM_COMPUTE_CORE_WINDOW_H
+#define ACL_ARM_COMPUTE_CORE_WINDOW_H
#include "arm_compute/core/Coordinates.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/ITensorInfo.h"
-#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/utils/math/Math.h"
+
+#include <algorithm>
+#include <array>
+#include <cstddef>
namespace arm_compute
{
@@ -45,6 +45,10 @@ public:
static constexpr size_t DimY = 1;
/** Alias for dimension 2 also known as Z dimension */
static constexpr size_t DimZ = 2;
+ /** Alias for dimension 3 also known as W dimension */
+ static constexpr size_t DimW = 3;
+ /** Alias for dimension 4 also known as V dimension */
+ static constexpr size_t DimV = 4;
/** Default constructor: create a window containing a single element. */
constexpr Window()
@@ -82,10 +86,10 @@ public:
* @param[in] step Step between two elements of the dimension when iterating.
*
*/
- constexpr Dimension(int start = 0, int end = 1, int step = 1)
- : _start(start), _end(end), _step(step)
+ constexpr Dimension(int start = 0, int end = 1, int step = 1) : _start(start), _end(end), _step(step)
{
}
+ Dimension(const Dimension &d) = default;
/** Default assignment operator to allow dimensions to be copied */
Dimension &operator=(const Dimension &d) = default;
/** Return the start of the dimension */
@@ -119,6 +123,17 @@ public:
{
_end = end;
}
+ /** Check whether two Dimensions are equal.
+ *
+ * @param[in] lhs LHS Dimensions
+ * @param[in] rhs RHS Dimensions
+ *
+ * @return True if the Dimensions are the same.
+ */
+ friend bool operator==(const Dimension &lhs, const Dimension &rhs)
+ {
+ return (lhs._start == rhs._start) && (lhs._end == rhs._end) && (lhs._step == rhs._step);
+ }
private:
int _start; /**< Start of the dimension */
@@ -198,15 +213,17 @@ public:
*/
void shift(size_t dimension, int shift_value);
- /** Shift down all the dimensions of a window
+ /** Shift down all the dimensions of a window starting from the specified dimension.
*
- * i.e new_dims[n] = old_dims[n+shift_value].
+ * new_dims[i] = old_dims[i] for all i < start_dim.
+ * new_dims[i] = old_dims[i+shift_value] for all i >= start_dim.
*
* @param[in] shift_value Number of dimensions to shift the window by.
+ * @param[in] start_dim The dimension from which the dimensions start to shift.
*
* @return The window with the shifted dimensions.
*/
- Window shift_dimensions(unsigned int shift_value) const;
+ Window shift_dimensions(unsigned int shift_value, unsigned int start_dim = 0) const;
/** Adjust the start or end of a given dimension by the given value
*
@@ -346,7 +363,6 @@ public:
{
return slide_window_slice<4>(slice);
}
-
/** Collapse the dimensions between @p first and @p last if possible.
*
* A dimension is collapsable if it starts from 0 and matches the corresponding dimension in the full_window
@@ -358,7 +374,8 @@ public:
*
* @return Collapsed window.
*/
- Window collapse_if_possible(const Window &full_window, size_t first, size_t last, bool *has_collapsed = nullptr) const;
+ Window
+ collapse_if_possible(const Window &full_window, size_t first, size_t last, bool *has_collapsed = nullptr) const;
/** Collapse the dimensions higher than @p first if possible.
*
@@ -411,6 +428,14 @@ public:
* @param[in] rhs Second window to swap.
*/
friend void swap(Window &lhs, Window &rhs);
+ /** Check whether two Windows are equal.
+ *
+ * @param[in] lhs LHS window
+ * @param[in] rhs RHS window
+ *
+ * @return True if the given windows are the same.
+ */
+ friend bool operator==(const Window &lhs, const Window &rhs);
private:
/** First slice of the window
@@ -418,7 +443,7 @@ private:
* @return The first slice of the window.
*/
template <unsigned int window_dimension>
- Window first_slice_window() const;
+ Window first_slice_window() const;
/** Slide the passed window slice.
*
@@ -437,4 +462,4 @@ private:
};
} // namespace arm_compute
#include "Window.inl"
-#endif /*ARM_COMPUTE_WINDOW_H */
+#endif // ACL_ARM_COMPUTE_CORE_WINDOW_H
diff --git a/arm_compute/core/Window.inl b/arm_compute/core/Window.inl
index 70c4f80ac2..0f7c4fbdd7 100644
--- a/arm_compute/core/Window.inl
+++ b/arm_compute/core/Window.inl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 ARM Limited.
+ * Copyright (c) 2016-2020, 2022-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,12 +21,16 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
+#ifndef ACL_ARM_COMPUTE_CORE_WINDOW_INL
+#define ACL_ARM_COMPUTE_CORE_WINDOW_INL
+
namespace arm_compute
{
inline Window::Window(const Window &src)
: _dims(), _is_broadcasted(utility::generate_array<bool, Coordinates::num_max_dimensions, false>::value)
{
- for(size_t i = 0; i < Coordinates::num_max_dimensions; ++i)
+ for (size_t i = 0; i < Coordinates::num_max_dimensions; ++i)
{
set(i, src[i]);
_is_broadcasted[i] = src.is_broadcasted(i);
@@ -65,32 +69,34 @@ inline bool Window::is_broadcasted(size_t dimension) const
return _is_broadcasted[dimension];
}
-inline Window Window::collapse_if_possible(const Window &full_window, const size_t first,
- const size_t last, bool *has_collapsed) const
+inline Window Window::collapse_if_possible(const Window &full_window,
+ const size_t first,
+ const size_t last,
+ bool *has_collapsed) const
{
Window collapsed(*this);
bool is_collapsable = true;
int collapsed_end = _dims[first].end();
- for(size_t d = first + 1; is_collapsable && (d < last); ++d)
+ for (size_t d = first + 1; is_collapsable && (d < last); ++d)
{
// The _dims's dimension must match the full _dims dimension to be collapsable:
- is_collapsable = (_dims[d].start() == 0) && (full_window[d].start() == 0) && (_dims[d].step() <= 1)
- && (full_window[d].end() == _dims[d].end());
+ is_collapsable = (_dims[d].start() == 0) && (full_window[d].start() == 0) && (_dims[d].step() <= 1) &&
+ (full_window[d].end() == _dims[d].end());
collapsed_end *= _dims[d].end();
}
- if(is_collapsable)
+ if (is_collapsable)
{
collapsed._dims.at(first).set_end(collapsed_end);
- for(size_t d = first + 1; is_collapsable && (d < last); ++d)
+ for (size_t d = first + 1; is_collapsable && (d < last); ++d)
{
collapsed.set(d, Dimension());
}
}
- if(has_collapsed != nullptr)
+ if (has_collapsed != nullptr)
{
*has_collapsed = is_collapsable;
}
@@ -98,13 +104,21 @@ inline Window Window::collapse_if_possible(const Window &full_window, const size
return collapsed;
}
-inline Window Window::shift_dimensions(unsigned int shift_value) const
+inline Window Window::shift_dimensions(unsigned int shift_value, unsigned int start_dim) const
{
Window shifted_window;
- for(size_t n = 0; n < (Coordinates::num_max_dimensions - shift_value); n++)
+ size_t n = 0;
+
+ for (; n < start_dim; ++n)
+ {
+ shifted_window.set(n, _dims[n]);
+ }
+
+ for (; n < (Coordinates::num_max_dimensions - shift_value); n++)
{
shifted_window.set(n, _dims[n + shift_value]);
}
+
return shifted_window;
}
@@ -120,9 +134,9 @@ inline Window Window::collapse(const Window &full_window, const size_t first, co
inline Window Window::broadcast_if_dimension_le_one(const TensorShape &shape) const
{
Window broadcastWin(*this);
- for(size_t d = 0; d < TensorShape::num_max_dimensions; ++d)
+ for (size_t d = 0; d < TensorShape::num_max_dimensions; ++d)
{
- if(shape[d] <= 1)
+ if (shape[d] <= 1)
{
broadcastWin.set_broadcasted(d);
}
@@ -142,7 +156,7 @@ inline void Window::adjust(size_t dimension, int adjust_value, bool is_at_start)
ARM_COMPUTE_ERROR_ON(dimension >= Coordinates::num_max_dimensions);
Window::Dimension &d = _dims[dimension];
- if(is_at_start)
+ if (is_at_start)
{
d = Window::Dimension(d.start() + adjust_value, d.end(), d.step());
}
@@ -172,7 +186,7 @@ inline void Window::set_dimension_step(size_t dimension, int step)
inline void Window::validate() const
{
- for(size_t i = 0; i < Coordinates::num_max_dimensions; ++i)
+ for (size_t i = 0; i < Coordinates::num_max_dimensions; ++i)
{
ARM_COMPUTE_ERROR_ON(_dims[i].end() < _dims[i].start());
ARM_COMPUTE_ERROR_ON((_dims[i].step() != 0) && (((_dims[i].end() - _dims[i].start()) % _dims[i].step()) != 0));
@@ -193,21 +207,21 @@ inline Window Window::split_window(size_t dimension, size_t id, size_t total) co
Window out;
- for(size_t d = 0; d < Coordinates::num_max_dimensions; ++d)
+ for (size_t d = 0; d < Coordinates::num_max_dimensions; ++d)
{
- if(d == dimension)
+ if (d == dimension)
{
- int start = _dims[d].start();
- int end = _dims[d].end();
- const int step = _dims[d].step();
+ int start = _dims[d].start();
+ int end = _dims[d].end();
+ const int step = _dims[d].step();
const int num_it = num_iterations(d);
const int rem = num_it % total;
- int work = num_it / total;
+ int work = num_it / total;
- int it_start = work * id;
+ int it_start = work * id;
- if(int(id) < rem)
+ if (int(id) < rem)
{
++work;
it_start += id;
@@ -234,18 +248,18 @@ inline Window Window::split_window(size_t dimension, size_t id, size_t total) co
template <unsigned int window_dimension>
inline bool Window::slide_window_slice(Window &slice) const
{
- for(unsigned int n = window_dimension; n < Coordinates::num_max_dimensions; ++n)
+ for (unsigned int n = window_dimension; n < Coordinates::num_max_dimensions; ++n)
{
// Did we reach the end of this dimension?
const int v = slice._dims[n].start() + 1;
- if(v < _dims[n].end())
+ if (v < _dims[n].end())
{
// No: increment
slice._dims[n] = Dimension(v, v + 1, 1);
// Reset lower dimensions:
- for(unsigned int lower = window_dimension; lower < n; ++lower)
+ for (unsigned int lower = window_dimension; lower < n; ++lower)
{
slice._dims[lower] = Dimension(_dims[lower].start(), _dims[lower].start() + 1, 1);
}
@@ -258,14 +272,14 @@ inline bool Window::slide_window_slice(Window &slice) const
}
template <unsigned int window_dimension>
-inline Window Window::first_slice_window() const
+inline Window Window::first_slice_window() const
{
Window slice;
std::copy_n(_dims.begin(), window_dimension, slice._dims.begin());
//Initialise higher dimensions to be the first slice.
- for(unsigned int n = window_dimension; n < Coordinates::num_max_dimensions; ++n)
+ for (unsigned int n = window_dimension; n < Coordinates::num_max_dimensions; ++n)
{
slice._dims[n] = Dimension(_dims[n].start(), _dims[n].start() + 1, 1);
}
@@ -275,16 +289,16 @@ inline Window Window::first_slice_window() const
inline void Window::use_tensor_dimensions(const TensorShape &shape, size_t first_dimension)
{
- for(unsigned int n = first_dimension; n < shape.num_dimensions(); ++n)
+ for (unsigned int n = first_dimension; n < shape.num_dimensions(); ++n)
{
- set(n, Window::Dimension(0, std::max(shape[n], static_cast<uint32_t>(1))));
+ set(n, Window::Dimension(0, std::max(shape[n], static_cast<size_t>(1))));
}
}
inline TensorShape Window::shape() const
{
TensorShape shape;
- for(size_t d = 0; d < TensorShape::num_max_dimensions; ++d)
+ for (size_t d = 0; d < TensorShape::num_max_dimensions; ++d)
{
shape.set(d, (_dims[d].end() - _dims[d].start()) / _dims[d].step());
}
@@ -294,7 +308,7 @@ inline TensorShape Window::shape() const
inline size_t Window::num_iterations_total() const
{
size_t total = 1;
- for(size_t d = 0; d < Coordinates::num_max_dimensions; ++d)
+ for (size_t d = 0; d < Coordinates::num_max_dimensions; ++d)
{
total *= num_iterations(d);
}
@@ -305,4 +319,11 @@ inline void swap(Window &lhs, Window &rhs)
{
lhs._dims.swap(rhs._dims);
}
+
+inline bool operator==(const Window &lhs, const Window &rhs)
+{
+ return (lhs._dims == rhs._dims) && (lhs._is_broadcasted == rhs._is_broadcasted);
+}
} // namespace arm_compute
+
+#endif // ACL_ARM_COMPUTE_CORE_WINDOW_INL
diff --git a/arm_compute/core/WindowIterator.h b/arm_compute/core/WindowIterator.h
index e7d5334fa0..29302c410a 100644
--- a/arm_compute/core/WindowIterator.h
+++ b/arm_compute/core/WindowIterator.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -28,10 +28,6 @@
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/Window.h"
-//FIXME: Delete the "PRINTF" before the release. In the meantime it's probably going to be useful to debug
-//#define PRINTF printf
-#define PRINTF(...)
-
namespace arm_compute
{
/** Convert an offset in window steps into absolute coordinates.
@@ -44,7 +40,7 @@ namespace arm_compute
inline Coordinates convert_window_coord_to_position(const Window &w, const Coordinates &offset)
{
Coordinates position;
- for(unsigned int i = 0; i < Coordinates::num_max_dimensions; ++i)
+ for (unsigned int i = 0; i < Coordinates::num_max_dimensions; ++i)
{
position.set(i, w[i].start() + offset[i] * w[i].step());
}
@@ -168,16 +164,14 @@ public:
template <typename M>
void iterate_3D(M &&on_new_row_size)
{
- while(_end.z() != _position.z())
+ while (_end.z() != _position.z())
{
- PRINTF("New slice %d\n", _position.z());
iterate_2D_internal(on_new_row_size, _w.x().end() - _w.x().step(), _w.y().end() - _w.y().step());
_position[2] += _w.z().step();
_position[1] = _w.y().start();
_position[0] = _w.x().start();
}
// Left over:
- PRINTF("Left over slice\n");
iterate_2D(on_new_row_size);
}
@@ -217,29 +211,25 @@ private:
void iterate_2D_internal(M &&on_new_row_size, int end_x, int end_y)
{
//Is there more than one row to process ?
- if(end_y == _position.y())
+ if (end_y == _position.y())
{
- // Single row:
- PRINTF("Partial row only\n");
// Both start and end belong to the same row:
iterate_over_dim0(end_x + _w.x().step(), on_new_row_size);
}
else
{
// Do we start from the beginning of the row ?
- if(_w.x().start() != _position.x())
+ if (_w.x().start() != _position.x())
{
//Start in the middle of a row: process left-over X
- PRINTF("Partial row first\n");
iterate_over_dim0(_w.x().end(), on_new_row_size);
_position[1] += _w.y().step();
}
//Middle rows
bool no_leftover = end_x + _w.x().step() == _w.x().end();
- if(no_leftover)
+ if (no_leftover)
{
- PRINTF("no left over\n");
//Switch to full row size:
on_new_row_size(_w[0].start(), _w.x().end());
// Shouldn't be possible to reach that point and not have at least one entire row to process
@@ -249,17 +239,14 @@ private:
}
else
{
- PRINTF("with left over\n");
// Are there full rows to process ?
- if(_position[1] != end_y)
+ if (_position[1] != end_y)
{
- PRINTF("full rows\n");
//Switch to full row size:
on_new_row_size(_w[0].start(), _w.x().end());
iterate_over_dim1(end_y);
}
- PRINTF("Final leftover\n");
//Leftover end x
_position[0] = _w.x().start();
iterate_over_dim0(end_x + _w.x().step(), on_new_row_size);
@@ -273,7 +260,7 @@ private:
*/
void iterate_over_dim1(int end)
{
- for(; _position[1] != end; _position[1] += _w[1].step())
+ for (; _position[1] != end; _position[1] += _w[1].step())
{
_position[0] = _w[0].start();
iterate_over_dim0(_w[0].end());
@@ -298,10 +285,9 @@ private:
*/
void iterate_over_dim0(int end)
{
- PRINTF("X [%d, %d, %d]\n", _position.x(), end, _w[0].step());
// Both start and end belong to the same row:
ARM_COMPUTE_ERROR_ON(_position[0] > end);
- for(; _position.x() < end; _position[0] += _w[0].step())
+ for (; _position.x() < end; _position[0] += _w[0].step())
{
_lambda_function(_position);
}
@@ -323,9 +309,10 @@ private:
* @return A WindowIterator object.
*/
template <typename L>
-WindowIterator<L> create_window_iterator(const Window &w, const Coordinates &start, const Coordinates &end, L &&lambda_function)
+WindowIterator<L>
+create_window_iterator(const Window &w, const Coordinates &start, const Coordinates &end, L &&lambda_function)
{
return WindowIterator<L>(w, start, end, std::move(lambda_function));
}
-}
+} // namespace arm_compute
#endif /*ARM_COMPUTE_WINDOW_ITERATOR_H*/
diff --git a/arm_compute/core/experimental/Types.h b/arm_compute/core/experimental/Types.h
new file mode 100644
index 0000000000..63a3a1a1ec
--- /dev/null
+++ b/arm_compute/core/experimental/Types.h
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2020-2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_ARM_COMPUTE_CORE_EXPERIMENTAL_TYPES_H
+#define ACL_ARM_COMPUTE_CORE_EXPERIMENTAL_TYPES_H
+
+#include "arm_compute/core/ITensorPack.h"
+#include "arm_compute/core/TensorShape.h"
+
+#include <vector>
+
+namespace arm_compute
+{
+// Forward declaration
+class ITensor;
+
+/** Memory type */
+enum TensorType : int32_t
+{
+ ACL_UNKNOWN = -1,
+ ACL_SRC_DST = 0,
+
+ // Src
+ ACL_SRC = 0,
+ ACL_SRC_0 = 0,
+ ACL_SRC_1 = 1,
+ ACL_SRC_2 = 2,
+ ACL_SRC_3 = 3,
+ ACL_SRC_4 = 4,
+ ACL_SRC_5 = 5,
+ ACL_SRC_6 = 6,
+ ACL_SRC_END = 6,
+
+ // Dst
+ ACL_DST = 30,
+ ACL_DST_0 = 30,
+ ACL_DST_1 = 31,
+ ACL_DST_2 = 32,
+ ACL_DST_END = 32,
+
+ // Aux
+ ACL_INT = 50,
+ ACL_INT_0 = 50,
+ ACL_INT_1 = 51,
+ ACL_INT_2 = 52,
+ ACL_INT_3 = 53,
+ ACL_INT_4 = 54,
+ ACL_SRC_VEC = 256,
+ ACL_DST_VEC = 512,
+ ACL_INT_VEC = 1024,
+
+ // Aliasing Types
+ // Conv etc
+ ACL_BIAS = ACL_SRC_2,
+
+ // Gemm
+ ACL_VEC_ROW_SUM = ACL_SRC_3,
+ ACL_VEC_COL_SUM = ACL_SRC_4,
+ ACL_SHIFTS = ACL_SRC_5,
+ ACL_MULTIPLIERS = ACL_SRC_6,
+};
+
+namespace experimental
+{
+enum class MemoryLifetime
+{
+ Temporary = 0,
+ Persistent = 1,
+ Prepare = 2,
+};
+struct MemoryInfo
+{
+ MemoryInfo() = default;
+
+ MemoryInfo(int slot, size_t size, size_t alignment = 0) noexcept : slot(slot), size(size), alignment(alignment)
+ {
+ }
+
+ MemoryInfo(int slot, MemoryLifetime lifetime, size_t size, size_t alignment = 0) noexcept
+ : slot(slot), lifetime(lifetime), size(size), alignment(alignment)
+ {
+ }
+
+ bool merge(int slot, size_t new_size, size_t new_alignment = 0) noexcept
+ {
+ if (slot != this->slot)
+ {
+ return false;
+ }
+
+ size = std::max(size, new_size);
+ alignment = std::max(alignment, new_alignment);
+
+ return true;
+ }
+
+ int slot{ACL_UNKNOWN};
+ MemoryLifetime lifetime{MemoryLifetime::Temporary};
+ size_t size{0};
+ size_t alignment{64};
+};
+
+using MemoryRequirements = std::vector<MemoryInfo>;
+} // namespace experimental
+} // namespace arm_compute
+#endif // ACL_ARM_COMPUTE_CORE_EXPERIMENTAL_TYPES_H
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/qmov.h b/arm_compute/core/utils/ActivationFunctionUtils.h
index bb64bef1e9..c988efa256 100644
--- a/arm_compute/core/NEON/wrapper/intrinsics/qmov.h
+++ b/arm_compute/core/utils/ActivationFunctionUtils.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020 ARM Limited.
+ * Copyright (c) 2016-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,29 +21,21 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_WRAPPER_QMOV_H
-#define ARM_COMPUTE_WRAPPER_QMOV_H
+#ifndef ARM_COMPUTE_CORE_UTILS_ACTIVATIONFUNCTIONUTILS_H
+#define ARM_COMPUTE_CORE_UTILS_ACTIVATIONFUNCTIONUTILS_H
-#include <arm_neon.h>
+#include "arm_compute/core/Types.h"
-namespace arm_compute
-{
-namespace wrapper
-{
-template <typename T>
-inline typename std::enable_if<std::is_same<T, uint8_t>::value, uint8x8_t>::type
-vqmov(const int16x8_t &a)
-{
- return vqmovun_s16(a);
-}
+#include <string>
-template <typename T>
-inline typename std::enable_if<std::is_same<T, int8_t>::value, int8x8_t>::type
-vqmov(const int16x8_t &a)
+namespace arm_compute
{
- return vqmovn_s16(a);
-}
-
-} // namespace wrapper
+/** Translates a given activation function to a string.
+ *
+ * @param[in] act @ref ActivationLayerInfo::ActivationFunction to be translated to string.
+ *
+ * @return The string describing the activation function.
+ */
+const std::string &string_from_activation_func(const ActivationFunction &act);
} // namespace arm_compute
-#endif /* ARM_COMPUTE_WRAPPER_QMOV_H */
+#endif /*ARM_COMPUTE_CORE_UTILS_ACTIVATIONFUNCTIONUTILS_H */
diff --git a/arm_compute/core/NEON/INEKernel.h b/arm_compute/core/utils/DataLayoutUtils.h
index c09972353c..61839c9f91 100644
--- a/arm_compute/core/NEON/INEKernel.h
+++ b/arm_compute/core/utils/DataLayoutUtils.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2016-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,14 +21,20 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_INEKERNEL_H
-#define ARM_COMPUTE_INEKERNEL_H
+#ifndef ARM_COMPUTE_CORE_UTILS_DATALAYOUTUTILS_H
+#define ARM_COMPUTE_CORE_UTILS_DATALAYOUTUTILS_H
+#include "arm_compute/core/Types.h"
-#include "arm_compute/core/CPP/ICPPKernel.h"
+#include <string>
namespace arm_compute
{
-/** Common interface for all kernels implemented in NEON. */
-using INEKernel = ICPPKernel;
+/** Convert a data layout identity into a string.
+ *
+ * @param[in] dl @ref DataLayout to be translated to string.
+ *
+ * @return The string describing the data layout.
+ */
+const std::string &string_from_data_layout(DataLayout dl);
} // namespace arm_compute
-#endif /*ARM_COMPUTE_INEKERNEL_H */
+#endif /*ARM_COMPUTE_CORE_UTILS_DATALAYOUTUTILS_H */
diff --git a/arm_compute/core/utils/DataTypeUtils.h b/arm_compute/core/utils/DataTypeUtils.h
new file mode 100644
index 0000000000..6fabb19b64
--- /dev/null
+++ b/arm_compute/core/utils/DataTypeUtils.h
@@ -0,0 +1,549 @@
+/*
+ * Copyright (c) 2016-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_ARM_COMPUTE_CORE_UTILS_DATATYPEUTILS_H
+#define ACL_ARM_COMPUTE_CORE_UTILS_DATATYPEUTILS_H
+
+#include "arm_compute/core/PixelValue.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+/** The size in bytes of the data type
+ *
+ * @param[in] data_type Input data type
+ *
+ * @return The size in bytes of the data type
+ */
+inline size_t data_size_from_type(DataType data_type)
+{
+ switch (data_type)
+ {
+ case DataType::U8:
+ case DataType::S8:
+ case DataType::QSYMM8:
+ case DataType::QASYMM8:
+ case DataType::QASYMM8_SIGNED:
+ case DataType::QSYMM8_PER_CHANNEL:
+ return 1;
+ case DataType::U16:
+ case DataType::S16:
+ case DataType::QSYMM16:
+ case DataType::QASYMM16:
+ case DataType::BFLOAT16:
+ case DataType::F16:
+ return 2;
+ case DataType::F32:
+ case DataType::U32:
+ case DataType::S32:
+ return 4;
+ case DataType::F64:
+ case DataType::U64:
+ case DataType::S64:
+ return 8;
+ case DataType::SIZET:
+ return sizeof(size_t);
+ default:
+ ARM_COMPUTE_ERROR("Invalid data type");
+ return 0;
+ }
+}
+
+/** The size in bytes of the data type
+ *
+ * @param[in] dt Input data type
+ *
+ * @return The size in bytes of the data type
+ */
+inline size_t element_size_from_data_type(DataType dt)
+{
+ switch (dt)
+ {
+ case DataType::S8:
+ case DataType::U8:
+ case DataType::QSYMM8:
+ case DataType::QASYMM8:
+ case DataType::QASYMM8_SIGNED:
+ case DataType::QSYMM8_PER_CHANNEL:
+ return 1;
+ case DataType::U16:
+ case DataType::S16:
+ case DataType::QSYMM16:
+ case DataType::QASYMM16:
+ case DataType::BFLOAT16:
+ case DataType::F16:
+ return 2;
+ case DataType::U32:
+ case DataType::S32:
+ case DataType::F32:
+ return 4;
+ case DataType::U64:
+ case DataType::S64:
+ return 8;
+ default:
+ ARM_COMPUTE_ERROR("Undefined element size for given data type");
+ return 0;
+ }
+}
+
+/** Return the data type used by a given single-planar pixel format
+ *
+ * @param[in] format Input format
+ *
+ * @return The size in bytes of the pixel format
+ */
+inline DataType data_type_from_format(Format format)
+{
+ switch (format)
+ {
+ case Format::U8:
+ case Format::UV88:
+ case Format::RGB888:
+ case Format::RGBA8888:
+ case Format::YUYV422:
+ case Format::UYVY422:
+ return DataType::U8;
+ case Format::U16:
+ return DataType::U16;
+ case Format::S16:
+ return DataType::S16;
+ case Format::U32:
+ return DataType::U32;
+ case Format::S32:
+ return DataType::S32;
+ case Format::BFLOAT16:
+ return DataType::BFLOAT16;
+ case Format::F16:
+ return DataType::F16;
+ case Format::F32:
+ return DataType::F32;
+ //Doesn't make sense for planar formats:
+ case Format::NV12:
+ case Format::NV21:
+ case Format::IYUV:
+ case Format::YUV444:
+ default:
+ ARM_COMPUTE_ERROR("Not supported data_type for given format");
+ return DataType::UNKNOWN;
+ }
+}
+
+/** Return the promoted data type of a given data type.
+ *
+ * @note If promoted data type is not supported an error will be thrown
+ *
+ * @param[in] dt Data type to get the promoted type of.
+ *
+ * @return Promoted data type
+ */
+inline DataType get_promoted_data_type(DataType dt)
+{
+ switch (dt)
+ {
+ case DataType::U8:
+ return DataType::U16;
+ case DataType::S8:
+ return DataType::S16;
+ case DataType::U16:
+ return DataType::U32;
+ case DataType::S16:
+ return DataType::S32;
+ case DataType::QSYMM8:
+ case DataType::QASYMM8:
+ case DataType::QASYMM8_SIGNED:
+ case DataType::QSYMM8_PER_CHANNEL:
+ case DataType::QSYMM16:
+ case DataType::QASYMM16:
+ case DataType::BFLOAT16:
+ case DataType::F16:
+ case DataType::U32:
+ case DataType::S32:
+ case DataType::F32:
+ ARM_COMPUTE_ERROR("Unsupported data type promotions!");
+ default:
+ ARM_COMPUTE_ERROR("Undefined data type!");
+ }
+ return DataType::UNKNOWN;
+}
+
+/** Compute the mininum and maximum values a data type can take
+ *
+ * @param[in] dt Data type to get the min/max bounds of
+ *
+ * @return A tuple (min,max) with the minimum and maximum values respectively wrapped in PixelValue.
+ */
+inline std::tuple<PixelValue, PixelValue> get_min_max(DataType dt)
+{
+ PixelValue min{};
+ PixelValue max{};
+ switch (dt)
+ {
+ case DataType::U8:
+ case DataType::QASYMM8:
+ {
+ min = PixelValue(static_cast<int32_t>(std::numeric_limits<uint8_t>::lowest()));
+ max = PixelValue(static_cast<int32_t>(std::numeric_limits<uint8_t>::max()));
+ break;
+ }
+ case DataType::S8:
+ case DataType::QSYMM8:
+ case DataType::QASYMM8_SIGNED:
+ case DataType::QSYMM8_PER_CHANNEL:
+ {
+ min = PixelValue(static_cast<int32_t>(std::numeric_limits<int8_t>::lowest()));
+ max = PixelValue(static_cast<int32_t>(std::numeric_limits<int8_t>::max()));
+ break;
+ }
+ case DataType::U16:
+ case DataType::QASYMM16:
+ {
+ min = PixelValue(static_cast<int32_t>(std::numeric_limits<uint16_t>::lowest()));
+ max = PixelValue(static_cast<int32_t>(std::numeric_limits<uint16_t>::max()));
+ break;
+ }
+ case DataType::S16:
+ case DataType::QSYMM16:
+ {
+ min = PixelValue(static_cast<int32_t>(std::numeric_limits<int16_t>::lowest()));
+ max = PixelValue(static_cast<int32_t>(std::numeric_limits<int16_t>::max()));
+ break;
+ }
+ case DataType::U32:
+ {
+ min = PixelValue(std::numeric_limits<uint32_t>::lowest());
+ max = PixelValue(std::numeric_limits<uint32_t>::max());
+ break;
+ }
+ case DataType::S32:
+ {
+ min = PixelValue(std::numeric_limits<int32_t>::lowest());
+ max = PixelValue(std::numeric_limits<int32_t>::max());
+ break;
+ }
+ case DataType::BFLOAT16:
+ {
+ min = PixelValue(bfloat16::lowest());
+ max = PixelValue(bfloat16::max());
+ break;
+ }
+ case DataType::F16:
+ {
+ min = PixelValue(std::numeric_limits<half>::lowest());
+ max = PixelValue(std::numeric_limits<half>::max());
+ break;
+ }
+ case DataType::F32:
+ {
+ min = PixelValue(std::numeric_limits<float>::lowest());
+ max = PixelValue(std::numeric_limits<float>::max());
+ break;
+ }
+ default:
+ ARM_COMPUTE_ERROR("Undefined data type!");
+ }
+ return std::make_tuple(min, max);
+}
+
+/** Convert a data type identity into a string.
+ *
+ * @param[in] dt @ref DataType to be translated to string.
+ *
+ * @return The string describing the data type.
+ */
+const std::string &string_from_data_type(DataType dt);
+
+/** Convert a string to DataType
+ *
+ * @param[in] name The name of the data type
+ *
+ * @return DataType
+ */
+DataType data_type_from_name(const std::string &name);
+
+/** Input Stream operator for @ref DataType
+ *
+ * @param[in] stream Stream to parse
+ * @param[out] data_type Output data type
+ *
+ * @return Updated stream
+ */
+inline ::std::istream &operator>>(::std::istream &stream, DataType &data_type)
+{
+ std::string value;
+ stream >> value;
+ data_type = data_type_from_name(value);
+ return stream;
+}
+
+/** Check if a given data type is of floating point type
+ *
+ * @param[in] dt Input data type.
+ *
+ * @return True if data type is of floating point type, else false.
+ */
+inline bool is_data_type_float(DataType dt)
+{
+ switch (dt)
+ {
+ case DataType::F16:
+ case DataType::F32:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/** Check if a given data type is of quantized type
+ *
+ * @note Quantized is considered a super-set of fixed-point and asymmetric data types.
+ *
+ * @param[in] dt Input data type.
+ *
+ * @return True if data type is of quantized type, else false.
+ */
+inline bool is_data_type_quantized(DataType dt)
+{
+ switch (dt)
+ {
+ case DataType::QSYMM8:
+ case DataType::QASYMM8:
+ case DataType::QASYMM8_SIGNED:
+ case DataType::QSYMM8_PER_CHANNEL:
+ case DataType::QSYMM16:
+ case DataType::QASYMM16:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/** Check if a given data type is of asymmetric quantized type
+ *
+ * @param[in] dt Input data type.
+ *
+ * @return True if data type is of asymmetric quantized type, else false.
+ */
+inline bool is_data_type_quantized_asymmetric(DataType dt)
+{
+ switch (dt)
+ {
+ case DataType::QASYMM8:
+ case DataType::QASYMM8_SIGNED:
+ case DataType::QASYMM16:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/** Check if a given data type is of asymmetric quantized signed type
+ *
+ * @param[in] dt Input data type.
+ *
+ * @return True if data type is of asymmetric quantized signed type, else false.
+ */
+inline bool is_data_type_quantized_asymmetric_signed(DataType dt)
+{
+ switch (dt)
+ {
+ case DataType::QASYMM8_SIGNED:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/** Check if a given data type is of 8-bit asymmetric quantized signed type
+ *
+ * @param[in] dt Input data type.
+ *
+ * @return True if data type is of 8-bit asymmetric quantized signed type, else false.
+ */
+inline bool is_data_type_quantized_asymmetric_char(DataType dt)
+{
+ switch (dt)
+ {
+ case DataType::QASYMM8_SIGNED:
+ case DataType::QASYMM8:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/** Check if a given data type is of symmetric quantized type
+ *
+ * @param[in] dt Input data type.
+ *
+ * @return True if data type is of symmetric quantized type, else false.
+ */
+inline bool is_data_type_quantized_symmetric(DataType dt)
+{
+ switch (dt)
+ {
+ case DataType::QSYMM8:
+ case DataType::QSYMM8_PER_CHANNEL:
+ case DataType::QSYMM16:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/** Check if a given data type is of per channel type
+ *
+ * @param[in] dt Input data type.
+ *
+ * @return True if data type is of per channel type, else false.
+ */
+inline bool is_data_type_quantized_per_channel(DataType dt)
+{
+ switch (dt)
+ {
+ case DataType::QSYMM8_PER_CHANNEL:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/** Returns true if the value can be represented by the given data type
+ *
+ * @param[in] val value to be checked
+ * @param[in] dt data type that is checked
+ * @param[in] qinfo (Optional) quantization info if the data type is QASYMM8
+ *
+ * @return true if the data type can hold the value.
+ */
+template <typename T>
+bool check_value_range(T val, DataType dt, QuantizationInfo qinfo = QuantizationInfo())
+{
+ switch (dt)
+ {
+ case DataType::U8:
+ {
+ const auto val_u8 = static_cast<uint8_t>(val);
+ return ((val_u8 == val) && val >= std::numeric_limits<uint8_t>::lowest() &&
+ val <= std::numeric_limits<uint8_t>::max());
+ }
+ case DataType::QASYMM8:
+ {
+ double min = static_cast<double>(dequantize_qasymm8(0, qinfo));
+ double max = static_cast<double>(dequantize_qasymm8(std::numeric_limits<uint8_t>::max(), qinfo));
+ return ((double)val >= min && (double)val <= max);
+ }
+ case DataType::S8:
+ {
+ const auto val_s8 = static_cast<int8_t>(val);
+ return ((val_s8 == val) && val >= std::numeric_limits<int8_t>::lowest() &&
+ val <= std::numeric_limits<int8_t>::max());
+ }
+ case DataType::U16:
+ {
+ const auto val_u16 = static_cast<uint16_t>(val);
+ return ((val_u16 == val) && val >= std::numeric_limits<uint16_t>::lowest() &&
+ val <= std::numeric_limits<uint16_t>::max());
+ }
+ case DataType::S16:
+ {
+ const auto val_s16 = static_cast<int16_t>(val);
+ return ((val_s16 == val) && val >= std::numeric_limits<int16_t>::lowest() &&
+ val <= std::numeric_limits<int16_t>::max());
+ }
+ case DataType::U32:
+ {
+ const auto val_d64 = static_cast<double>(val);
+ const auto val_u32 = static_cast<uint32_t>(val);
+ return ((val_u32 == val_d64) && val_d64 >= std::numeric_limits<uint32_t>::lowest() &&
+ val_d64 <= std::numeric_limits<uint32_t>::max());
+ }
+ case DataType::S32:
+ {
+ const auto val_d64 = static_cast<double>(val);
+ const auto val_s32 = static_cast<int32_t>(val);
+ return ((val_s32 == val_d64) && val_d64 >= std::numeric_limits<int32_t>::lowest() &&
+ val_d64 <= std::numeric_limits<int32_t>::max());
+ }
+ case DataType::BFLOAT16:
+ return (val >= bfloat16::lowest() && val <= bfloat16::max());
+ case DataType::F16:
+ return (val >= std::numeric_limits<half>::lowest() && val <= std::numeric_limits<half>::max());
+ case DataType::F32:
+ return (val >= std::numeric_limits<float>::lowest() && val <= std::numeric_limits<float>::max());
+ default:
+ ARM_COMPUTE_ERROR("Data type not supported");
+ return false;
+ }
+}
+
+/** Returns the suffix string of CPU kernel implementation names based on the given data type
+ *
+ * @param[in] data_type The data type the CPU kernel implemetation uses
+ *
+ * @return the suffix string of CPU kernel implementations
+ */
+inline std::string cpu_impl_dt(const DataType &data_type)
+{
+ std::string ret = "";
+
+ switch (data_type)
+ {
+ case DataType::F32:
+ ret = "fp32";
+ break;
+ case DataType::F16:
+ ret = "fp16";
+ break;
+ case DataType::U8:
+ ret = "u8";
+ break;
+ case DataType::S16:
+ ret = "s16";
+ break;
+ case DataType::S32:
+ ret = "s32";
+ break;
+ case DataType::QASYMM8:
+ ret = "qu8";
+ break;
+ case DataType::QASYMM8_SIGNED:
+ ret = "qs8";
+ break;
+ case DataType::QSYMM16:
+ ret = "qs16";
+ break;
+ case DataType::QSYMM8_PER_CHANNEL:
+ ret = "qp8";
+ break;
+ case DataType::BFLOAT16:
+ ret = "bf16";
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Unsupported.");
+ }
+
+ return ret;
+}
+
+} // namespace arm_compute
+#endif // ACL_ARM_COMPUTE_CORE_UTILS_DATATYPEUTILS_H
diff --git a/arm_compute/core/utils/FormatUtils.h b/arm_compute/core/utils/FormatUtils.h
new file mode 100644
index 0000000000..a8e96bd361
--- /dev/null
+++ b/arm_compute/core/utils/FormatUtils.h
@@ -0,0 +1,344 @@
+/*
+ * Copyright (c) 2016-2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CORE_UTILS_FORMATUTILS_H
+#define ARM_COMPUTE_CORE_UTILS_FORMATUTILS_H
+
+#include "arm_compute/core/CoreTypes.h"
+#include "arm_compute/core/Error.h"
+
+namespace arm_compute
+{
+/** The size in bytes of the pixel format
+ *
+ * @param[in] format Input format
+ *
+ * @return The size in bytes of the pixel format
+ */
+inline size_t pixel_size_from_format(Format format)
+{
+ switch (format)
+ {
+ case Format::U8:
+ return 1;
+ case Format::U16:
+ case Format::S16:
+ case Format::BFLOAT16:
+ case Format::F16:
+ case Format::UV88:
+ case Format::YUYV422:
+ case Format::UYVY422:
+ return 2;
+ case Format::RGB888:
+ return 3;
+ case Format::RGBA8888:
+ return 4;
+ case Format::U32:
+ case Format::S32:
+ case Format::F32:
+ return 4;
+ //Doesn't make sense for planar formats:
+ case Format::NV12:
+ case Format::NV21:
+ case Format::IYUV:
+ case Format::YUV444:
+ default:
+ ARM_COMPUTE_ERROR("Undefined pixel size for given format");
+ return 0;
+ }
+}
+
+/** Return the plane index of a given channel given an input format.
+ *
+ * @param[in] format Input format
+ * @param[in] channel Input channel
+ *
+ * @return The plane index of the specific channel of the specific format
+ */
+inline int plane_idx_from_channel(Format format, Channel channel)
+{
+ switch (format)
+ {
+ // Single planar formats have a single plane
+ case Format::U8:
+ case Format::U16:
+ case Format::S16:
+ case Format::U32:
+ case Format::S32:
+ case Format::BFLOAT16:
+ case Format::F16:
+ case Format::F32:
+ case Format::UV88:
+ case Format::RGB888:
+ case Format::RGBA8888:
+ case Format::YUYV422:
+ case Format::UYVY422:
+ return 0;
+ // Multi planar formats
+ case Format::NV12:
+ case Format::NV21:
+ {
+ // Channel U and V share the same plane of format UV88
+ switch (channel)
+ {
+ case Channel::Y:
+ return 0;
+ case Channel::U:
+ case Channel::V:
+ return 1;
+ default:
+ ARM_COMPUTE_ERROR("Not supported channel");
+ return 0;
+ }
+ }
+ case Format::IYUV:
+ case Format::YUV444:
+ {
+ switch (channel)
+ {
+ case Channel::Y:
+ return 0;
+ case Channel::U:
+ return 1;
+ case Channel::V:
+ return 2;
+ default:
+ ARM_COMPUTE_ERROR("Not supported channel");
+ return 0;
+ }
+ }
+ default:
+ ARM_COMPUTE_ERROR("Not supported format");
+ return 0;
+ }
+}
+
+/** Return the channel index of a given channel given an input format.
+ *
+ * @param[in] format Input format
+ * @param[in] channel Input channel
+ *
+ * @return The channel index of the specific channel of the specific format
+ */
+inline int channel_idx_from_format(Format format, Channel channel)
+{
+ switch (format)
+ {
+ case Format::RGB888:
+ {
+ switch (channel)
+ {
+ case Channel::R:
+ return 0;
+ case Channel::G:
+ return 1;
+ case Channel::B:
+ return 2;
+ default:
+ ARM_COMPUTE_ERROR("Not supported channel");
+ return 0;
+ }
+ }
+ case Format::RGBA8888:
+ {
+ switch (channel)
+ {
+ case Channel::R:
+ return 0;
+ case Channel::G:
+ return 1;
+ case Channel::B:
+ return 2;
+ case Channel::A:
+ return 3;
+ default:
+ ARM_COMPUTE_ERROR("Not supported channel");
+ return 0;
+ }
+ }
+ case Format::YUYV422:
+ {
+ switch (channel)
+ {
+ case Channel::Y:
+ return 0;
+ case Channel::U:
+ return 1;
+ case Channel::V:
+ return 3;
+ default:
+ ARM_COMPUTE_ERROR("Not supported channel");
+ return 0;
+ }
+ }
+ case Format::UYVY422:
+ {
+ switch (channel)
+ {
+ case Channel::Y:
+ return 1;
+ case Channel::U:
+ return 0;
+ case Channel::V:
+ return 2;
+ default:
+ ARM_COMPUTE_ERROR("Not supported channel");
+ return 0;
+ }
+ }
+ case Format::NV12:
+ {
+ switch (channel)
+ {
+ case Channel::Y:
+ return 0;
+ case Channel::U:
+ return 0;
+ case Channel::V:
+ return 1;
+ default:
+ ARM_COMPUTE_ERROR("Not supported channel");
+ return 0;
+ }
+ }
+ case Format::NV21:
+ {
+ switch (channel)
+ {
+ case Channel::Y:
+ return 0;
+ case Channel::U:
+ return 1;
+ case Channel::V:
+ return 0;
+ default:
+ ARM_COMPUTE_ERROR("Not supported channel");
+ return 0;
+ }
+ }
+ case Format::YUV444:
+ case Format::IYUV:
+ {
+ switch (channel)
+ {
+ case Channel::Y:
+ return 0;
+ case Channel::U:
+ return 0;
+ case Channel::V:
+ return 0;
+ default:
+ ARM_COMPUTE_ERROR("Not supported channel");
+ return 0;
+ }
+ }
+ default:
+ ARM_COMPUTE_ERROR("Not supported format");
+ return 0;
+ }
+}
+
+/** Return the number of planes for a given format
+ *
+ * @param[in] format Input format
+ *
+ * @return The number of planes for a given image format.
+ */
+inline size_t num_planes_from_format(Format format)
+{
+ switch (format)
+ {
+ case Format::U8:
+ case Format::S16:
+ case Format::U16:
+ case Format::S32:
+ case Format::U32:
+ case Format::BFLOAT16:
+ case Format::F16:
+ case Format::F32:
+ case Format::RGB888:
+ case Format::RGBA8888:
+ case Format::YUYV422:
+ case Format::UYVY422:
+ return 1;
+ case Format::NV12:
+ case Format::NV21:
+ return 2;
+ case Format::IYUV:
+ case Format::YUV444:
+ return 3;
+ default:
+ ARM_COMPUTE_ERROR("Not supported format");
+ return 0;
+ }
+}
+
+/** Return the number of channels for a given single-planar pixel format
+ *
+ * @param[in] format Input format
+ *
+ * @return The number of channels for a given image format.
+ */
+inline size_t num_channels_from_format(Format format)
+{
+ switch (format)
+ {
+ case Format::U8:
+ case Format::U16:
+ case Format::S16:
+ case Format::U32:
+ case Format::S32:
+ case Format::BFLOAT16:
+ case Format::F16:
+ case Format::F32:
+ return 1;
+ // Because the U and V channels are subsampled
+ // these formats appear like having only 2 channels:
+ case Format::YUYV422:
+ case Format::UYVY422:
+ return 2;
+ case Format::UV88:
+ return 2;
+ case Format::RGB888:
+ return 3;
+ case Format::RGBA8888:
+ return 4;
+ //Doesn't make sense for planar formats:
+ case Format::NV12:
+ case Format::NV21:
+ case Format::IYUV:
+ case Format::YUV444:
+ default:
+ return 0;
+ }
+}
+
+/** Convert a tensor format into a string.
+ *
+ * @param[in] format @ref Format to be translated to string.
+ *
+ * @return The string describing the format.
+ */
+const std::string &string_from_format(Format format);
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CORE_UTILS_FORMATUTILS_H */
diff --git a/arm_compute/core/NEON/NEFixedPoint.h b/arm_compute/core/utils/InterpolationPolicyUtils.h
index 3aff677385..8d4ae4321c 100644
--- a/arm_compute/core/NEON/NEFixedPoint.h
+++ b/arm_compute/core/utils/InterpolationPolicyUtils.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2016-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,21 +21,21 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_NEFIXEDPOINT_H
-#define ARM_COMPUTE_NEFIXEDPOINT_H
+#ifndef ARM_COMPUTE_CORE_UTILS_INTERPOLATIONPOLICYUTILS_H
+#define ARM_COMPUTE_CORE_UTILS_INTERPOLATIONPOLICYUTILS_H
-#include <arm_neon.h>
+#include "arm_compute/core/Types.h"
+
+#include <string>
namespace arm_compute
{
-/** Compute lane-by-lane maximum between elements of a float vector with 4x2 elements
+/** Translates a given interpolation policy to a string.
*
- * @param[in] a Float input vector
- * @param[in] b Float input vector
+ * @param[in] policy @ref InterpolationPolicy to be translated to string.
*
- * @return The lane-by-lane maximum -> float32x4x2
+ * @return The string describing the interpolation policy.
*/
-float32x4x2_t vmax2q_f32(float32x4x2_t a, float32x4x2_t b);
+const std::string &string_from_interpolation_policy(InterpolationPolicy policy);
} // namespace arm_compute
-#include "arm_compute/core/NEON/NEFixedPoint.inl"
-#endif /* ARM_COMPUTE_NEFIXEDPOINT_H */ \ No newline at end of file
+#endif /*ARM_COMPUTE_CORE_UTILS_INTERPOLATIONPOLICYUTILS_H */
diff --git a/arm_compute/core/GLES_COMPUTE/GCHelpers.h b/arm_compute/core/utils/StringUtils.h
index b1a9ab32be..c13cbaa334 100644
--- a/arm_compute/core/GLES_COMPUTE/GCHelpers.h
+++ b/arm_compute/core/utils/StringUtils.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 ARM Limited.
+ * Copyright (c) 2016-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,38 +21,45 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_GCHELPERS_H
-#define ARM_COMPUTE_GCHELPERS_H
+#ifndef ARM_COMPUTE_CORE_UTILS_STRINGUTILS_H
+#define ARM_COMPUTE_CORE_UTILS_STRINGUTILS_H
-#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
-#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
-#include "arm_compute/core/GPUTarget.h"
-#include "arm_compute/core/Helpers.h"
-
-#include <set>
#include <string>
+#include <vector>
namespace arm_compute
{
-// Forward declarations
-class GCCoreRuntimeContext;
+/** Lower a given string.
+ *
+ * @param[in] val Given string to lower.
+ *
+ * @return The lowered string
+ */
+std::string lower_string(const std::string &val);
-/** Max vector width of an GLES vector */
-static constexpr unsigned int max_gc_vector_width = 16;
+/** Raise a given string to upper case
+ *
+ * @param[in] val Given string to lower.
+ *
+ * @return The upper case string
+ */
+std::string upper_string(const std::string &val);
-/** Helper function to get the GPU target from GLES using GL_RENDERER enum
+/** Create a string with the float in full precision.
*
- * @return the GPU target
+ * @param val Floating point value
+ *
+ * @return String with the floating point value.
*/
-GPUTarget get_target_from_device();
-/** Creates an GLES kernel
+std::string float_to_string_with_full_precision(float val);
+
+/** Join a sequence of strings with separator @p sep
*
- * @param[in] ctx A context to be used to create the GLES kernel.
- * @param[in] kernel_name The kernel name.
- * @param[in] build_opts The build options to be used for the GLES kernel compilation.
+ * @param[in] strings Strings to join
+ * @param[in] sep Separator to join consecutive strings in the sequence
*
- * @return A GLES kernel
+ * @return std::string
*/
-GCKernel create_opengl_kernel(GCCoreRuntimeContext *ctx, const std::string &kernel_name, const std::set<std::string> &build_opts);
+std::string join(const std::vector<std::string> strings, const std::string &sep);
} // namespace arm_compute
-#endif /* ARM_COMPUTE_GCHELPERS_H */
+#endif /*ARM_COMPUTE_CORE_UTILS_STRINGUTILS_H */
diff --git a/arm_compute/core/utils/misc/CRTP.h b/arm_compute/core/utils/helpers/AdjustVecSize.h
index 037c69ab1d..842e3b57d6 100644
--- a/arm_compute/core/utils/misc/CRTP.h
+++ b/arm_compute/core/utils/helpers/AdjustVecSize.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2016-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,35 +21,35 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_MISC_CRTP_H
-#define ARM_COMPUTE_MISC_CRTP_H
+#ifndef ARM_COMPUTE_UTILS_ADJUSTVECSIZE_H
+#define ARM_COMPUTE_UTILS_ADJUSTVECSIZE_H
+
+#include "arm_compute/core/Error.h"
namespace arm_compute
{
-namespace misc
-{
-/** Curiously recurring template pattern Interface */
-template <typename T, template <typename> class Type>
-struct CRTP
+/** Returns the adjusted vector size in case it is less than the input's first dimension, getting rounded down to its closest valid vector size
+ *
+ * @param[in] vec_size vector size to be adjusted
+ * @param[in] dim0 size of the first dimension
+ *
+ * @return the number of element processed along the X axis per thread
+ */
+inline unsigned int adjust_vec_size(unsigned int vec_size, size_t dim0)
{
-public:
- /** Exact type */
- using ExactType = T;
+ ARM_COMPUTE_ERROR_ON(vec_size > 16);
-protected:
- const T &impl() const
+ if ((vec_size >= dim0) && (dim0 == 3))
{
- return static_cast<const T &>(*this);
+ return dim0;
}
- T &impl()
+
+ while (vec_size > dim0)
{
- return static_cast<T &>(*this);
+ vec_size >>= 1;
}
-private:
- CRTP() = default;
- friend Type<T>;
-};
-} // namespace misc
+ return vec_size;
+}
} // namespace arm_compute
-#endif /* ARM_COMPUTE_MISC_CRTP_H */
+#endif /*ARM_COMPUTE_UTILS_H */
diff --git a/arm_compute/core/utils/helpers/bit_ops.h b/arm_compute/core/utils/helpers/bit_ops.h
deleted file mode 100644
index 6dbca179e7..0000000000
--- a/arm_compute/core/utils/helpers/bit_ops.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_UTILS_HELPERS_BIT_OPS_H
-#define ARM_COMPUTE_UTILS_HELPERS_BIT_OPS_H
-
-#include "arm_compute/core/utils/misc/Requires.h"
-
-#include <type_traits>
-
-namespace arm_compute
-{
-namespace helpers
-{
-namespace bit_ops
-{
-/** Checks if the idx-th bit is set in an integral type
- *
- * @param[in] v Integral input
- * @param[in] idx Index of the bit to check
- *
- * @return True if the idx-th bit is set else false
- */
-template <typename T, REQUIRES_TA(std::is_integral<T>::value)>
-bool is_bit_set(T v, unsigned int idx)
-{
- return (v & 1 << idx) != 0;
-}
-} // namespace bit_ops
-} // namespace helpers
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_UTILS_HELPERS_BIT_OPS_H */
diff --git a/arm_compute/core/utils/helpers/fft.h b/arm_compute/core/utils/helpers/fft.h
deleted file mode 100644
index b22bece73f..0000000000
--- a/arm_compute/core/utils/helpers/fft.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_UTILS_HELPERS_FFT_H
-#define ARM_COMPUTE_UTILS_HELPERS_FFT_H
-
-#include <set>
-#include <vector>
-
-namespace arm_compute
-{
-namespace helpers
-{
-namespace fft
-{
-/** Decompose a given 1D input size using the provided supported factors.
- *
- * @param[in] N Input size to be decomposed.
- * @param[in] supported_factors Supported factors that can be used for decomposition.
- *
- * @return A vector with the stages of the decomposition. Will be empty if decomposition failed.
- */
-std::vector<unsigned int> decompose_stages(unsigned int N, const std::set<unsigned int> &supported_factors);
-/** Calculate digit reverse index vector given fft size and the decomposed stages
- *
- * @param N Input size to calculate digit reverse for
- * @param fft_stages A vector with the FFT decomposed stages
- *
- * @return A vector with the digit reverse indices. Will be empty if it failed.
- */
-std::vector<unsigned int> digit_reverse_indices(unsigned int N, const std::vector<unsigned int> &fft_stages);
-} // namespace fft
-} // namespace helpers
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_UTILS_HELPERS_FFT_H */
diff --git a/arm_compute/core/utils/helpers/float_ops.h b/arm_compute/core/utils/helpers/float_ops.h
deleted file mode 100644
index fceee2e3fe..0000000000
--- a/arm_compute/core/utils/helpers/float_ops.h
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_UTILS_HELPERS_FLOAT_OPS_H
-#define ARM_COMPUTE_UTILS_HELPERS_FLOAT_OPS_H
-
-namespace arm_compute
-{
-namespace helpers
-{
-namespace float_ops
-{
-union RawFloat
-{
- /** Constructor
- *
- * @param[in] val Floating-point value
- */
- explicit RawFloat(float val)
- : f32(val)
- {
- }
- /** Extract sign of floating point number
- *
- * @return Sign of floating point number
- */
- int32_t sign() const
- {
- return i32 >> 31;
- }
- /** Extract exponent of floating point number
- *
- * @return Exponent of floating point number
- */
- int32_t exponent() const
- {
- return (i32 >> 23) & 0xFF;
- }
- /** Extract mantissa of floating point number
- *
- * @return Mantissa of floating point number
- */
- int32_t mantissa() const
- {
- return i32 & 0x007FFFFF;
- }
-
- int32_t i32;
- float f32;
-};
-
-/** Checks if two floating point numbers are equal given an allowed number of ULPs
- *
- * @param[in] a First number to compare
- * @param[in] b Second number to compare
- * @param[in] max_allowed_ulps (Optional) Number of allowed ULPs
- *
- * @return True if number is close else false
- */
-inline bool is_equal_ulps(float a, float b, int max_allowed_ulps = 0)
-{
- RawFloat ra(a);
- RawFloat rb(b);
-
- // Check ULP distance
- const int ulps = std::abs(ra.i32 - rb.i32);
- return ulps <= max_allowed_ulps;
-}
-
-/** Checks if the input floating point number is 1.0f checking if the difference is within a range defined with epsilon
- *
- * @param[in] a Input floating point number
- * @param[in] epsilon (Optional) Epsilon used to define the error bounds
- *
- * @return True if number is close to 1.0f
- */
-inline bool is_one(float a, float epsilon = 0.00001f)
-{
- return std::abs(1.0f - a) <= epsilon;
-}
-
-/** Checks if the input floating point number is 0.0f checking if the difference is within a range defined with epsilon
- *
- * @param[in] a Input floating point number
- * @param[in] epsilon (Optional) Epsilon used to define the error bounds
- *
- * @return True if number is close to 0.0f
- */
-inline bool is_zero(float a, float epsilon = 0.00001f)
-{
- return std::abs(0.0f - a) <= epsilon;
-}
-} // namespace float_ops
-} // namespace helpers
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_UTILS_HELPERS_FLOAT_OPS_H */
diff --git a/arm_compute/core/utils/helpers/tensor_info.h b/arm_compute/core/utils/helpers/tensor_info.h
deleted file mode 100644
index da24e82f5a..0000000000
--- a/arm_compute/core/utils/helpers/tensor_info.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_UTILS_HELPERS_TENSOR_INFO_H
-#define ARM_COMPUTE_UTILS_HELPERS_TENSOR_INFO_H
-
-#include "arm_compute/core/ITensorInfo.h"
-
-namespace arm_compute
-{
-namespace helpers
-{
-namespace tensor_info
-{
-/** Checks if the quantization info of given tensors are different
- *
- * @param tensor_info_1 Tensor info of the first tensor
- * @param tensor_info_2 Tensor info of the second tensor
- * @param tensor_infos Tensor infos of the rest tensors
- *
- * @return True if tensors have mismatching quantization info else false.
- */
-template <typename... Ts>
-inline bool tensors_have_different_quantization_info(const ITensorInfo *tensor_info_1, const ITensorInfo *tensor_info_2, Ts... tensor_infos)
-{
- const QuantizationInfo first_quantization_info = tensor_info_1->quantization_info();
-
- const std::array < const ITensorInfo *, 1 + sizeof...(Ts) > tensor_infos_array{ { tensor_info_2, std::forward<Ts>(tensor_infos)... } };
- return std::any_of(tensor_infos_array.begin(), tensor_infos_array.end(), [&](const ITensorInfo * tensor_info)
- {
- return tensor_info->quantization_info() != first_quantization_info;
- });
-}
-} // namespace tensor_info
-} // namespace helpers
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_UTILS_HELPERS_TENSOR_INFO_H */
diff --git a/arm_compute/core/utils/helpers/tensor_transform.h b/arm_compute/core/utils/helpers/tensor_transform.h
index 7e912a6f0a..7a61fa192a 100644
--- a/arm_compute/core/utils/helpers/tensor_transform.h
+++ b/arm_compute/core/utils/helpers/tensor_transform.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2019 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -52,7 +52,8 @@ int calculate_stride_on_index(int index, Coordinates strides);
*
* @return Absolute start position of a given index
*/
-int calculate_start_on_index(TensorShape input_shape, int index, Coordinates starts, Coordinates strides, int32_t begin_mask);
+int calculate_start_on_index(
+ TensorShape input_shape, int index, Coordinates starts, Coordinates strides, int32_t begin_mask);
/** Returns the absolute end position of a given index for a strided slice operation
*
@@ -68,8 +69,13 @@ int calculate_start_on_index(TensorShape input_shape, int index, Coordinates sta
*
* @return Absolute end position of a given index
*/
-int calculate_end_on_index(TensorShape input_shape, int index, int start_on_index, Coordinates ends, Coordinates strides,
- int32_t end_mask = 0, int32_t shrink_axis_mask = 0);
+int calculate_end_on_index(TensorShape input_shape,
+ int index,
+ int start_on_index,
+ Coordinates ends,
+ Coordinates strides,
+ int32_t end_mask = 0,
+ int32_t shrink_axis_mask = 0);
/** Calculate start, end and stride coordinates for a strided slice
*
@@ -87,8 +93,12 @@ int calculate_end_on_index(TensorShape input_shape, int index, int start_on_inde
* @return A tuple with <Start,End,Strides>
*/
std::tuple<Coordinates, Coordinates, Coordinates> calculate_strided_slice_coords(TensorShape input_shape,
- Coordinates starts, Coordinates ends, Coordinates strides,
- int32_t begin_mask = 0, int32_t end_mask = 0, int32_t shrink_axis_mask = 0);
+ Coordinates starts,
+ Coordinates ends,
+ Coordinates strides,
+ int32_t begin_mask = 0,
+ int32_t end_mask = 0,
+ int32_t shrink_axis_mask = 0);
/** Computes output shape of strided slice
*
@@ -109,9 +119,14 @@ std::tuple<Coordinates, Coordinates, Coordinates> calculate_strided_slice_coords
*
* @return The output tensor shape
*/
-TensorShape compute_strided_slice_output_shape(TensorShape input_shape, Coordinates starts, Coordinates ends, Coordinates strides,
- int32_t begin_mask = 0, int32_t end_mask = 0, int32_t shrink_axis_mask = 0,
- bool return_unshrinked = false);
+TensorShape compute_strided_slice_output_shape(TensorShape input_shape,
+ Coordinates starts,
+ Coordinates ends,
+ Coordinates strides,
+ int32_t begin_mask = 0,
+ int32_t end_mask = 0,
+ int32_t shrink_axis_mask = 0,
+ bool return_unshrinked = false);
/** Constructs end mask in case we want to perform a slice operation using the strided slice interface
*
@@ -122,7 +137,7 @@ TensorShape compute_strided_slice_output_shape(TensorShape input_shape, Coordina
* @return End mask
*/
int32_t construct_slice_end_mask(Coordinates ends);
-} // namespace tensor_tranform
+} // namespace tensor_transform
} // namespace helpers
} // namespace arm_compute
#endif /* ARM_COMPUTE_UTILS_HELPERS_TENSOR_TRANSFORM_H */
diff --git a/arm_compute/core/utils/io/FileHandler.h b/arm_compute/core/utils/io/FileHandler.h
index ebc2ef06c1..615651d5b1 100644
--- a/arm_compute/core/utils/io/FileHandler.h
+++ b/arm_compute/core/utils/io/FileHandler.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2019 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
diff --git a/arm_compute/core/utils/logging/FilePrinter.h b/arm_compute/core/utils/logging/FilePrinter.h
index 73a5421ed4..a865aadddb 100644
--- a/arm_compute/core/utils/logging/FilePrinter.h
+++ b/arm_compute/core/utils/logging/FilePrinter.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2019 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,9 +24,8 @@
#ifndef ARM_COMPUTE_LOGGING_FILE_PRINTER_H
#define ARM_COMPUTE_LOGGING_FILE_PRINTER_H
-#include "arm_compute/core/utils/logging/IPrinter.h"
-
#include "arm_compute/core/utils/io/FileHandler.h"
+#include "arm_compute/core/utils/logging/IPrinter.h"
namespace arm_compute
{
diff --git a/arm_compute/core/utils/logging/Helpers.h b/arm_compute/core/utils/logging/Helpers.h
index 341f944ddc..c3c2f0f0b8 100644
--- a/arm_compute/core/utils/logging/Helpers.h
+++ b/arm_compute/core/utils/logging/Helpers.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 ARM Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,7 +25,7 @@
#define ARM_COMPUTE_LOGGING_HELPERS_H
#include "arm_compute/core/utils/logging/Types.h"
-#include "support/MemorySupport.h"
+
#include "support/ToolchainSupport.h"
#include <cstddef>
@@ -46,10 +46,10 @@ namespace logging
* @return The formatted string
*/
template <typename... Ts>
-inline std::string string_with_format(const std::string &fmt, Ts &&... args)
+inline std::string string_with_format(const std::string &fmt, Ts &&...args)
{
size_t size = support::cpp11::snprintf(nullptr, 0, fmt.c_str(), args...) + 1;
- auto char_str = support::cpp14::make_unique<char[]>(size);
+ auto char_str = std::make_unique<char[]>(size);
support::cpp11::snprintf(char_str.get(), size, fmt.c_str(), args...);
return std::string(char_str.get(), char_str.get() + size - 1);
}
diff --git a/arm_compute/core/utils/logging/IPrinter.h b/arm_compute/core/utils/logging/IPrinter.h
index b6ede5853a..7fde4d9302 100644
--- a/arm_compute/core/utils/logging/IPrinter.h
+++ b/arm_compute/core/utils/logging/IPrinter.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2019 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -35,8 +35,7 @@ class Printer
{
public:
/** Default Constructor */
- Printer() noexcept
- : _mtx()
+ Printer() noexcept : _mtx()
{
}
/** Prevent instances of this class from being copied */
diff --git a/arm_compute/core/utils/logging/LogMsgDecorators.h b/arm_compute/core/utils/logging/LogMsgDecorators.h
index 08abcb4519..66a8180e21 100644
--- a/arm_compute/core/utils/logging/LogMsgDecorators.h
+++ b/arm_compute/core/utils/logging/LogMsgDecorators.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2019 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -63,8 +63,7 @@ public:
*
* @param str Sting to append
*/
- StringDecorator(const std::string &str)
- : _str(str)
+ StringDecorator(const std::string &str) : _str(str)
{
_str = angle_wrap_value(str);
}
@@ -103,7 +102,7 @@ private:
auto time = std::chrono::system_clock::to_time_t(now);
// TODO: use put_time for gcc > 4.9
- char buf[100] = { 0 };
+ char buf[100] = {0};
std::strftime(buf, sizeof(buf), "%d-%m-%Y %I:%M:%S", std::localtime(&time));
return buf;
}
diff --git a/arm_compute/core/utils/logging/Logger.h b/arm_compute/core/utils/logging/Logger.h
index 2bd467ae2b..608db39138 100644
--- a/arm_compute/core/utils/logging/Logger.h
+++ b/arm_compute/core/utils/logging/Logger.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2019 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -88,7 +88,7 @@ public:
* @param[in] args Message arguments
*/
template <typename... Ts>
- void log(LogLevel log_level, const std::string &fmt, Ts &&... args);
+ void log(LogLevel log_level, const std::string &fmt, Ts &&...args);
/** Sets log level of the logger
*
* @warning Not thread-safe
@@ -159,11 +159,11 @@ private:
};
template <typename... Ts>
-inline void Logger::log(LogLevel log_level, const std::string &fmt, Ts &&... args)
+inline void Logger::log(LogLevel log_level, const std::string &fmt, Ts &&...args)
{
// Return if message shouldn't be logged
// i.e. if log level does not match the logger's
- if(!is_loggable(log_level))
+ if (!is_loggable(log_level))
{
return;
}
diff --git a/arm_compute/core/utils/logging/LoggerRegistry.h b/arm_compute/core/utils/logging/LoggerRegistry.h
index c1a182c1ae..4e52a10935 100644
--- a/arm_compute/core/utils/logging/LoggerRegistry.h
+++ b/arm_compute/core/utils/logging/LoggerRegistry.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2019 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -27,6 +27,7 @@
#include "arm_compute/core/utils/logging/Logger.h"
#include "arm_compute/core/utils/logging/Printers.h"
#include "arm_compute/core/utils/logging/Types.h"
+
#include "support/Mutex.h"
#include <memory>
@@ -54,8 +55,9 @@ public:
* @param[in] log_level Logger's log level. Defaults to INFO
* @param[in] printers Printers to attach to the system loggers. Defaults with a @ref StdPrinter.
*/
- void create_logger(const std::string &name, LogLevel log_level = LogLevel::INFO,
- const std::vector<std::shared_ptr<Printer>> &printers = { std::make_shared<StdPrinter>() });
+ void create_logger(const std::string &name,
+ LogLevel log_level = LogLevel::INFO,
+ const std::vector<std::shared_ptr<Printer>> &printers = {std::make_shared<StdPrinter>()});
/** Remove a logger
*
* @param name Logger's name
@@ -74,16 +76,17 @@ public:
* @param[in] printers (Optional) Printers to attach to the system loggers. Defaults with a @ref StdPrinter.
*/
void create_reserved_loggers(LogLevel log_level = LogLevel::INFO,
- const std::vector<std::shared_ptr<Printer>> &printers = { std::make_shared<StdPrinter>() });
+ const std::vector<std::shared_ptr<Printer>> &printers = {
+ std::make_shared<StdPrinter>()});
private:
/** Default constructor */
LoggerRegistry();
private:
- arm_compute::Mutex _mtx;
+ arm_compute::Mutex _mtx;
std::unordered_map<std::string, std::shared_ptr<Logger>> _loggers;
- static std::set<std::string> _reserved_loggers;
+ static std::set<std::string> _reserved_loggers;
};
} // namespace logging
} // namespace arm_compute
diff --git a/arm_compute/core/utils/logging/Macros.h b/arm_compute/core/utils/logging/Macros.h
index e4d9734792..4d5aa5fe2c 100644
--- a/arm_compute/core/utils/logging/Macros.h
+++ b/arm_compute/core/utils/logging/Macros.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -30,44 +30,71 @@
#ifdef ARM_COMPUTE_LOGGING_ENABLED
+#ifdef __GNUC__
+inline std::string signature_name(const std::string &pretty_func)
+{
+ const auto scope_op = pretty_func.find("::");
+ const auto begin = pretty_func.substr(0, scope_op).rfind(" ") + 1;
+ const auto end = pretty_func.rfind("(") - begin;
+
+ return pretty_func.substr(begin, end) + "()";
+}
+#define ARM_COMPUTE_SIGNATURE_NAME signature_name(__PRETTY_FUNCTION__)
+#else /* __GNUC__ */
+#define ARM_COMPUTE_SIGNATURE_NAME (__func__)
+#endif /* __GNUC__ */
+
#define ARM_COMPUTE_LOG_MSG(logger_name, log_level, msg) \
do \
{ \
auto __logger = arm_compute::logging::LoggerRegistry::get().logger(logger_name); \
- if(__logger != nullptr) \
+ if (__logger != nullptr) \
{ \
__logger->log(log_level, msg); \
} \
- } while(false)
+ } while (false)
+
+#define ARM_COMPUTE_LOG_MSG_WITH_FUNCNAME(logger_name, log_level, msg) \
+ do \
+ { \
+ auto __logger = arm_compute::logging::LoggerRegistry::get().logger(logger_name); \
+ if (__logger != nullptr) \
+ { \
+ std::ostringstream s; \
+ s << ARM_COMPUTE_SIGNATURE_NAME << " : " << msg; \
+ __logger->log(log_level, s.str()); \
+ } \
+ } while (false)
#define ARM_COMPUTE_LOG_MSG_WITH_FORMAT(logger_name, log_level, fmt, ...) \
do \
{ \
auto __logger = arm_compute::logging::LoggerRegistry::get().logger(logger_name); \
- if(__logger != nullptr) \
+ if (__logger != nullptr) \
{ \
size_t size = ::snprintf(nullptr, 0, fmt, __VA_ARGS__) + 1; \
- auto char_str = support::cpp14::make_unique<char[]>(size); \
- ::snprintf(char_str.get(), size, #fmt, __VA_ARGS__); \
+ auto char_str = std::make_unique<char[]>(size); \
+ ::snprintf(char_str.get(), size, fmt, __VA_ARGS__); \
__logger->log(log_level, std::string(char_str.get(), char_str.get() + size - 1)); \
} \
- } while(false)
+ } while (false)
#define ARM_COMPUTE_LOG_STREAM(logger_name, log_level, stream) \
do \
{ \
auto __logger = arm_compute::logging::LoggerRegistry::get().logger(logger_name); \
- if(__logger != nullptr) \
+ if (__logger != nullptr) \
{ \
std::ostringstream s; \
s << stream; \
__logger->log(log_level, s.str()); \
} \
- } while(false)
+ } while (false)
#else /* ARM_COMPUTE_LOGGING_ENABLED */
#define ARM_COMPUTE_LOG_MSG(logger_name, log_level, msg)
+#define ARM_COMPUTE_LOG_MSG_WITH_FUNCNAME(logger_name, log_level, msg)
#define ARM_COMPUTE_LOG_MSG_WITH_FORMAT(logger_name, log_level, fmt, ...)
#define ARM_COMPUTE_LOG_STREAM(logger_name, log_level, stream)
diff --git a/arm_compute/core/utils/logging/Printers.h b/arm_compute/core/utils/logging/Printers.h
index e09880cc53..80493e7052 100644
--- a/arm_compute/core/utils/logging/Printers.h
+++ b/arm_compute/core/utils/logging/Printers.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2019 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
diff --git a/arm_compute/core/utils/logging/StdPrinter.h b/arm_compute/core/utils/logging/StdPrinter.h
index ea41ce2599..eb0e78ee84 100644
--- a/arm_compute/core/utils/logging/StdPrinter.h
+++ b/arm_compute/core/utils/logging/StdPrinter.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2019 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
diff --git a/arm_compute/core/utils/logging/Types.h b/arm_compute/core/utils/logging/Types.h
index 838adf95b4..64c567b984 100644
--- a/arm_compute/core/utils/logging/Types.h
+++ b/arm_compute/core/utils/logging/Types.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2019 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -44,8 +44,7 @@ enum class LogLevel
struct LogMsg
{
/** Default constructor */
- LogMsg()
- : raw_(), log_level_(LogLevel::OFF)
+ LogMsg() : raw_(), log_level_(LogLevel::OFF)
{
}
/** Construct a log message
@@ -53,8 +52,7 @@ struct LogMsg
* @param[in] msg Message to log.
* @param[in] log_level Logging level. Default: OFF
*/
- LogMsg(std::string msg, LogLevel log_level = LogLevel::OFF)
- : raw_(msg), log_level_(log_level)
+ LogMsg(std::string msg, LogLevel log_level = LogLevel::OFF) : raw_(msg), log_level_(log_level)
{
}
diff --git a/arm_compute/core/utils/math/Math.h b/arm_compute/core/utils/math/Math.h
new file mode 100644
index 0000000000..e70337ba0f
--- /dev/null
+++ b/arm_compute/core/utils/math/Math.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2017-2018, 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_UTILS_MATH_H
+#define ARM_COMPUTE_UTILS_MATH_H
+
+namespace arm_compute
+{
+/** Calculate the rounded up quotient of val / m.
+ *
+ * @param[in] val Value to divide and round up.
+ * @param[in] m Value to divide by.
+ *
+ * @return the result.
+ */
+template <typename S, typename T>
+constexpr auto DIV_CEIL(S val, T m) -> decltype((val + m - 1) / m)
+{
+ return (val + m - 1) / m;
+}
+
+/** Computes the smallest number larger or equal to value that is a multiple of divisor.
+ *
+ * @param[in] value Lower bound value
+ * @param[in] divisor Value to compute multiple of.
+ *
+ * @return the result.
+ */
+template <typename S, typename T>
+inline auto ceil_to_multiple(S value, T divisor) -> decltype(((value + divisor - 1) / divisor) * divisor)
+{
+ ARM_COMPUTE_ERROR_ON(value < 0 || divisor <= 0);
+ return DIV_CEIL(value, divisor) * divisor;
+}
+
+/** Computes the largest number smaller or equal to value that is a multiple of divisor.
+ *
+ * @param[in] value Upper bound value
+ * @param[in] divisor Value to compute multiple of.
+ *
+ * @return the result.
+ */
+template <typename S, typename T>
+inline auto floor_to_multiple(S value, T divisor) -> decltype((value / divisor) * divisor)
+{
+ ARM_COMPUTE_ERROR_ON(value < 0 || divisor <= 0);
+ return (value / divisor) * divisor;
+}
+
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_UTILS_MATH_H */
diff --git a/arm_compute/core/utils/math/SafeOps.h b/arm_compute/core/utils/math/SafeOps.h
index 41bbb12e70..ef8bcf7e14 100644
--- a/arm_compute/core/utils/math/SafeOps.h
+++ b/arm_compute/core/utils/math/SafeOps.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,7 +25,10 @@
#define ARM_COMPUTE_UTILS_MATH_SAFE_OPS
#include "arm_compute/core/Error.h"
-#include "arm_compute/core/utils/misc/Requires.h"
+
+#include "support/AclRequires.h"
+
+#include <limits>
namespace arm_compute
{
@@ -44,16 +47,16 @@ namespace math
*
* @return The addition result
*/
-template <typename T, REQUIRES_TA(std::is_integral<T>::value)>
+template <typename T, ARM_COMPUTE_REQUIRES_TA(std::is_integral<T>::value)>
T safe_integer_add(T val_a, T val_b)
{
T result = 0;
- if((val_b > 0) && (val_a > std::numeric_limits<T>::max() - val_b))
+ if ((val_b > 0) && (val_a > std::numeric_limits<T>::max() - val_b))
{
result = std::numeric_limits<T>::max();
}
- else if((val_b < 0) && (val_a < std::numeric_limits<T>::min() - val_b))
+ else if ((val_b < 0) && (val_a < std::numeric_limits<T>::min() - val_b))
{
result = std::numeric_limits<T>::min();
}
@@ -76,16 +79,16 @@ T safe_integer_add(T val_a, T val_b)
*
* @return The subtraction result
*/
-template <typename T, REQUIRES_TA(std::is_integral<T>::value)>
+template <typename T, ARM_COMPUTE_REQUIRES_TA(std::is_integral<T>::value)>
T safe_integer_sub(T val_a, T val_b)
{
T result = 0;
- if((val_b < 0) && (val_a > std::numeric_limits<T>::max() + val_b))
+ if ((val_b < 0) && (val_a > std::numeric_limits<T>::max() + val_b))
{
result = std::numeric_limits<T>::max();
}
- else if((val_b > 0) && (val_a < std::numeric_limits<T>::min() + val_b))
+ else if ((val_b > 0) && (val_a < std::numeric_limits<T>::min() + val_b))
{
result = std::numeric_limits<T>::min();
}
@@ -108,18 +111,18 @@ T safe_integer_sub(T val_a, T val_b)
*
* @return The multiplication result
*/
-template <typename T, REQUIRES_TA(std::is_integral<T>::value)>
+template <typename T, ARM_COMPUTE_REQUIRES_TA(std::is_integral<T>::value)>
T safe_integer_mul(T val_a, T val_b)
{
T result = 0;
- if(val_a > 0)
+ if (val_a > 0)
{
- if((val_b > 0) && (val_a > (std::numeric_limits<T>::max() / val_b)))
+ if ((val_b > 0) && (val_a > (std::numeric_limits<T>::max() / val_b)))
{
result = std::numeric_limits<T>::max();
}
- else if(val_b < (std::numeric_limits<T>::min() / val_a))
+ else if (val_b < (std::numeric_limits<T>::min() / val_a))
{
result = std::numeric_limits<T>::min();
}
@@ -130,11 +133,11 @@ T safe_integer_mul(T val_a, T val_b)
}
else
{
- if((val_b > 0) && (val_a < (std::numeric_limits<T>::min() / val_b)))
+ if ((val_b > 0) && (val_a < (std::numeric_limits<T>::min() / val_b)))
{
result = std::numeric_limits<T>::max();
}
- else if((val_a != 0) && (val_b < (std::numeric_limits<T>::max() / val_a)))
+ else if ((val_a != 0) && (val_b < (std::numeric_limits<T>::max() / val_a)))
{
result = std::numeric_limits<T>::min();
}
@@ -158,12 +161,12 @@ T safe_integer_mul(T val_a, T val_b)
*
* @return The quotient
*/
-template <typename T, REQUIRES_TA(std::is_integral<T>::value)>
+template <typename T, ARM_COMPUTE_REQUIRES_TA(std::is_integral<T>::value)>
T safe_integer_div(T val_a, T val_b)
{
T result = 0;
- if((val_b == 0) || ((val_a == std::numeric_limits<T>::min()) && (val_b == -1)))
+ if ((val_b == 0) || ((val_a == std::numeric_limits<T>::min()) && (val_b == -1)))
{
result = std::numeric_limits<T>::min();
}
@@ -174,7 +177,7 @@ T safe_integer_div(T val_a, T val_b)
return result;
}
-} // namespace cast
+} // namespace math
} // namespace utils
} // namespace arm_compute
#endif /* ARM_COMPUTE_UTILS_MATH_SAFE_OPS */
diff --git a/arm_compute/core/utils/misc/Cast.h b/arm_compute/core/utils/misc/Cast.h
deleted file mode 100644
index fc6246aace..0000000000
--- a/arm_compute/core/utils/misc/Cast.h
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_MISC_CAST_H
-#define ARM_COMPUTE_MISC_CAST_H
-
-#include "arm_compute/core/Error.h"
-
-namespace arm_compute
-{
-namespace utils
-{
-namespace cast
-{
-/** Polymorphic cast between two types
- *
- * @warning Will throw an exception if cast cannot take place
- *
- * @tparam Target Target to cast type
- * @tparam Source Source from cast type
- *
- * @param[in] v Value to cast
- *
- * @return The casted value
- */
-template <typename Target, typename Source>
-inline Target polymorphic_cast(Source *v)
-{
- if(dynamic_cast<Target>(v) == nullptr)
- {
- ARM_COMPUTE_THROW(std::bad_cast());
- }
- return static_cast<Target>(v);
-}
-
-/** Polymorphic down cast between two types
- *
- * @warning Will assert if cannot take place
- *
- * @tparam Target Target to cast type
- * @tparam Source Source from cast type
- *
- * @param[in] v Value to cast
- *
- * @return The casted value
- */
-template <typename Target, typename Source>
-inline Target polymorphic_downcast(Source *v)
-{
- ARM_COMPUTE_ERROR_ON(dynamic_cast<Target>(v) != static_cast<Target>(v));
- return static_cast<Target>(v);
-}
-
-/** Polymorphic cast between two unique pointer types
- *
- * @warning Will throw an exception if cast cannot take place
- *
- * @tparam Target Target to cast type
- * @tparam Source Source from cast type
- * @tparam Deleter Deleter function type
- *
- * @param[in] v Value to cast
- *
- * @return The casted value
- */
-template <typename Target, typename Source, typename Deleter>
-std::unique_ptr<Target, Deleter> polymorphic_cast_unique_ptr(std::unique_ptr<Source, Deleter> &&v)
-{
- if(dynamic_cast<Target *>(v.get()) == nullptr)
- {
- ARM_COMPUTE_THROW(std::bad_cast());
- }
- auto r = static_cast<Target *>(v.release());
- return std::unique_ptr<Target, Deleter>(r, std::move(v.get_deleter()));
-}
-
-/** Polymorphic down cast between two unique pointer types
- *
- * @warning Will assert if cannot take place
- *
- * @tparam Target Target to cast type
- * @tparam Source Source from cast type
- * @tparam Deleter Deleter function type
- *
- * @param[in] v Value to cast
- *
- * @return The casted value
- */
-template <typename Target, typename Source, typename Deleter>
-std::unique_ptr<Target, Deleter> polymorphic_downcast_unique_ptr(std::unique_ptr<Source, Deleter> &&v)
-{
- ARM_COMPUTE_ERROR_ON(dynamic_cast<Target *>(v.get()) != static_cast<Target *>(v.get()));
- auto r = static_cast<Target *>(v.release());
- return std::unique_ptr<Target, Deleter>(r, std::move(v.get_deleter()));
-}
-} // namespace cast
-} // namespace utils
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_MISC_CAST_H */
diff --git a/arm_compute/core/utils/misc/ICloneable.h b/arm_compute/core/utils/misc/ICloneable.h
deleted file mode 100644
index 064f408201..0000000000
--- a/arm_compute/core/utils/misc/ICloneable.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_MISC_ICLONEABLE_H
-#define ARM_COMPUTE_MISC_ICLONEABLE_H
-
-#include <memory>
-
-namespace arm_compute
-{
-namespace misc
-{
-/** Clonable Interface */
-template <class T>
-class ICloneable
-{
-public:
- /** Default virtual desctructor */
- virtual ~ICloneable() = default;
- /** Provide a clone of the current object of class T
- *
- * @return Clone object of class T
- */
- virtual std::unique_ptr<T> clone() const = 0;
-};
-} // namespace misc
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_MISC_ICLONEABLE_H */
diff --git a/arm_compute/core/utils/misc/InfoHelpers.h b/arm_compute/core/utils/misc/InfoHelpers.h
index c6ee7c9031..1d1b4ea8d7 100644
--- a/arm_compute/core/utils/misc/InfoHelpers.h
+++ b/arm_compute/core/utils/misc/InfoHelpers.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 ARM Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -53,10 +53,12 @@ inline bool is_relu(ActivationLayerInfo activation_info)
*/
inline bool is_relu6(ActivationLayerInfo activation_info)
{
- const bool is_lu_bounded_relu = activation_info.activation() == ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU
- && activation_info.a() == 6.f && activation_info.b() == 0.f;
- const bool is_bounded_relu = activation_info.activation() == ActivationLayerInfo::ActivationFunction::BOUNDED_RELU
- && activation_info.a() == 6.f;
+ const bool is_lu_bounded_relu =
+ activation_info.activation() == ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU &&
+ activation_info.a() == 6.f && activation_info.b() == 0.f;
+ const bool is_bounded_relu =
+ activation_info.activation() == ActivationLayerInfo::ActivationFunction::BOUNDED_RELU &&
+ activation_info.a() == 6.f;
return activation_info.enabled() && (is_lu_bounded_relu || is_bounded_relu);
}
@@ -68,50 +70,52 @@ inline bool is_relu6(ActivationLayerInfo activation_info)
*
*/
template <typename T>
-inline void build_lstm_params_tensor_info(const LSTMParams<T> &lstm_params,
- LSTMParams<ITensorInfo> *lstm_params_info)
+inline void build_lstm_params_tensor_info(const LSTMParams<T> &lstm_params, LSTMParams<ITensorInfo> *lstm_params_info)
{
- if(lstm_params.has_peephole_opt())
+ if (lstm_params.has_peephole_opt())
{
ARM_COMPUTE_ERROR_ON_NULLPTR(lstm_params.cell_to_forget_weights(), lstm_params.cell_to_output_weights());
- lstm_params_info->set_peephole_params(lstm_params.cell_to_forget_weights()->info(), lstm_params.cell_to_output_weights()->info());
+ lstm_params_info->set_peephole_params(lstm_params.cell_to_forget_weights()->info(),
+ lstm_params.cell_to_output_weights()->info());
}
- if(lstm_params.has_projection())
+ if (lstm_params.has_projection())
{
ARM_COMPUTE_ERROR_ON_NULLPTR(lstm_params.projection_weights());
- lstm_params_info->set_projection_params(lstm_params.projection_weights()->info(),
- lstm_params.projection_bias() != nullptr ? lstm_params.projection_bias()->info() : nullptr);
+ lstm_params_info->set_projection_params(
+ lstm_params.projection_weights()->info(),
+ lstm_params.projection_bias() != nullptr ? lstm_params.projection_bias()->info() : nullptr);
}
- if(!lstm_params.has_cifg_opt())
+ if (!lstm_params.has_cifg_opt())
{
- ARM_COMPUTE_ERROR_ON_NULLPTR(lstm_params.input_to_input_weights(), lstm_params.recurrent_to_input_weights(), lstm_params.input_gate_bias());
+ ARM_COMPUTE_ERROR_ON_NULLPTR(lstm_params.input_to_input_weights(), lstm_params.recurrent_to_input_weights(),
+ lstm_params.input_gate_bias());
- const ITensorInfo *cell_to_input_weights_info = (lstm_params.has_peephole_opt()) ? lstm_params.cell_to_input_weights()->info() : nullptr;
- lstm_params_info->set_cifg_params(lstm_params.input_to_input_weights()->info(), lstm_params.recurrent_to_input_weights()->info(),
- cell_to_input_weights_info, lstm_params.input_gate_bias()->info());
+ ITensorInfo *cell_to_input_weights_info =
+ (lstm_params.has_peephole_opt()) ? lstm_params.cell_to_input_weights()->info() : nullptr;
+ lstm_params_info->set_cifg_params(lstm_params.input_to_input_weights()->info(),
+ lstm_params.recurrent_to_input_weights()->info(), cell_to_input_weights_info,
+ lstm_params.input_gate_bias()->info());
}
- if(lstm_params.use_layer_norm())
+ if (lstm_params.use_layer_norm())
{
- ARM_COMPUTE_ERROR_ON_NULLPTR(lstm_params.forget_layer_norm_weights(),
- lstm_params.output_layer_norm_weights(),
+ ARM_COMPUTE_ERROR_ON_NULLPTR(lstm_params.forget_layer_norm_weights(), lstm_params.output_layer_norm_weights(),
lstm_params.cell_layer_norm_weights());
- if(!lstm_params.has_cifg_opt())
+ if (!lstm_params.has_cifg_opt())
{
ARM_COMPUTE_ERROR_ON_NULLPTR(lstm_params.input_layer_norm_weights());
}
- const ITensorInfo *forget_info = lstm_params.forget_layer_norm_weights()->info();
- const ITensorInfo *cell_info = lstm_params.cell_layer_norm_weights()->info();
- const ITensorInfo *output_info = lstm_params.output_layer_norm_weights()->info();
- const ITensorInfo *input_info = lstm_params.has_cifg_opt() ? nullptr : lstm_params.input_layer_norm_weights()->info();
+ ITensorInfo *forget_info = lstm_params.forget_layer_norm_weights()->info();
+ ITensorInfo *cell_info = lstm_params.cell_layer_norm_weights()->info();
+ ITensorInfo *output_info = lstm_params.output_layer_norm_weights()->info();
+ ITensorInfo *input_info = lstm_params.has_cifg_opt() ? nullptr : lstm_params.input_layer_norm_weights()->info();
lstm_params_info->set_layer_normalization_params(input_info, forget_info, cell_info, output_info);
}
- lstm_params_info->set_matmul_scale_params(lstm_params.input_intermediate_scale(),
- lstm_params.forget_intermediate_scale(),
- lstm_params.cell_intermediate_scale(),
- lstm_params.output_intermediate_scale());
+ lstm_params_info->set_matmul_scale_params(
+ lstm_params.input_intermediate_scale(), lstm_params.forget_intermediate_scale(),
+ lstm_params.cell_intermediate_scale(), lstm_params.output_intermediate_scale());
lstm_params_info->set_hidden_state_params(lstm_params.hidden_state_zero(), lstm_params.hidden_state_scale());
}
diff --git a/arm_compute/core/utils/misc/Iterable.h b/arm_compute/core/utils/misc/Iterable.h
deleted file mode 100644
index 829c4b44a8..0000000000
--- a/arm_compute/core/utils/misc/Iterable.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_MISC_ITERABLE_H
-#define ARM_COMPUTE_MISC_ITERABLE_H
-
-#include <iterator>
-
-namespace arm_compute
-{
-namespace utils
-{
-namespace iterable
-{
-/** Reverse range iterable class
- *
- * @tparam T Type to create a reverse range on
- */
-template <typename T>
-class reverse_iterable
-{
-public:
- /** Default constructor
- *
- * @param[in] it Value to reverse iterate on
- */
- explicit reverse_iterable(T &it)
- : _it(it)
- {
- }
-
- /** Get beginning of iterator.
- *
- * @return beginning of iterator.
- */
- typename T::reverse_iterator begin()
- {
- return _it.rbegin();
- }
-
- /** Get end of iterator.
- *
- * @return end of iterator.
- */
- typename T::reverse_iterator end()
- {
- return _it.rend();
- }
-
- /** Get beginning of const iterator.
- *
- * @return beginning of const iterator.
- */
- typename T::const_reverse_iterator cbegin()
- {
- return _it.rbegin();
- }
-
- /** Get end of const iterator.
- *
- * @return end of const iterator.
- */
- typename T::const_reverse_iterator cend()
- {
- return _it.rend();
- }
-
-private:
- T &_it;
-};
-
-/** Creates a reverse iterable for a given type
- *
- * @tparam T Type to create a reverse iterable on
- *
- * @param[in] val Iterable input
- *
- * @return Reverse iterable container
- */
-template <typename T>
-reverse_iterable<T> reverse_iterate(T &val)
-{
- return reverse_iterable<T>(val);
-}
-} // namespace iterable
-} // namespace utils
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_MISC_ITERABLE_H */
diff --git a/arm_compute/core/utils/misc/MMappedFile.h b/arm_compute/core/utils/misc/MMappedFile.h
index 7669c5cc96..3efdbc5bda 100644
--- a/arm_compute/core/utils/misc/MMappedFile.h
+++ b/arm_compute/core/utils/misc/MMappedFile.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_MISC_MMAPPED_FILE_H
#define ARM_COMPUTE_MISC_MMAPPED_FILE_H
-#if !defined(BARE_METAL)
+#if !defined(_WIN64) && !defined(BARE_METAL)
#include <string>
#include <utility>
@@ -105,6 +105,6 @@ private:
} // namespace mmap_io
} // namespace utils
} // namespace arm_compute
-#endif // !defined(BARE_METAL)
+#endif // !defined(_WIN64) &&!defined(BARE_METAL)
#endif /* ARM_COMPUTE_MISC_MMAPPED_FILE_H */
diff --git a/arm_compute/core/utils/misc/Macros.h b/arm_compute/core/utils/misc/Macros.h
index 6e8d7659ee..fa861fa442 100644
--- a/arm_compute/core/utils/misc/Macros.h
+++ b/arm_compute/core/utils/misc/Macros.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,15 +26,16 @@
#if defined(__cplusplus) && (__cplusplus >= 201402L)
-#define ARM_COMPUTE_DEPRECATED [[deprecated]]
-#define ARM_COMPUTE_DEPRECATED_REL(rel) [[deprecated("Deprecated in : " #rel)]]
+#define ARM_COMPUTE_DEPRECATED [[deprecated]]
+#define ARM_COMPUTE_DEPRECATED_REL(rel) [[deprecated("Deprecated in : " #rel)]]
#define ARM_COMPUTE_DEPRECATED_REL_REPLACE(rel, replace) [[deprecated("Deprecated in : " #rel " - Use : " #replace)]]
#elif defined(__GNUC__) || defined(__clang__)
-#define ARM_COMPUTE_DEPRECATED __attribute__((deprecated))
+#define ARM_COMPUTE_DEPRECATED __attribute__((deprecated))
#define ARM_COMPUTE_DEPRECATED_REL(rel) __attribute__((deprecated("Deprecated in : " #rel)))
-#define ARM_COMPUTE_DEPRECATED_REL_REPLACE(rel, replace) __attribute__((deprecated("Deprecated in : " #rel " - Use : " #replace)))
+#define ARM_COMPUTE_DEPRECATED_REL_REPLACE(rel, replace) \
+ __attribute__((deprecated("Deprecated in : " #rel " - Use : " #replace)))
#else // defined(__cplusplus) && (__cplusplus >= 201402L)
diff --git a/arm_compute/core/utils/misc/Random.h b/arm_compute/core/utils/misc/Random.h
deleted file mode 100644
index 9f5a128546..0000000000
--- a/arm_compute/core/utils/misc/Random.h
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_MISC_RANDOM_H
-#define ARM_COMPUTE_MISC_RANDOM_H
-
-#include "arm_compute/core/Error.h"
-
-#include <random>
-#include <type_traits>
-
-namespace arm_compute
-{
-namespace utils
-{
-namespace random
-{
-/** Uniform distribution within a given number of sub-ranges
- *
- * @tparam T Distribution primitive type
- */
-template <typename T>
-class RangedUniformDistribution
-{
-public:
- using DT = typename std::conditional<std::is_integral<T>::value,
- std::uniform_int_distribution<T>,
- std::uniform_real_distribution<float>>::type;
- using result_type = T;
- using range_pair = std::pair<result_type, result_type>;
-
-public:
- /** Constructor
- *
- * @param[in] low lowest value in the range (inclusive)
- * @param[in] high highest value in the range (inclusive for uniform_int_distribution, exclusive for uniform_real_distribution)
- * @param[in] exclude_ranges Ranges to exclude from the generator
- */
- RangedUniformDistribution(result_type low, result_type high, const std::vector<range_pair> &exclude_ranges)
- : _distributions(), _selector()
- {
- result_type clow = low;
- for(const auto &erange : exclude_ranges)
- {
- result_type epsilon = std::is_integral<result_type>::value ? 1 : static_cast<result_type>(std::numeric_limits<float>::epsilon());
-
- ARM_COMPUTE_ERROR_ON(clow > erange.first || clow >= erange.second);
-
- _distributions.emplace_back(DT(clow, erange.first - epsilon));
- clow = erange.second + epsilon;
- }
- ARM_COMPUTE_ERROR_ON(clow > high);
- _distributions.emplace_back(DT(clow, high));
- _selector = std::uniform_int_distribution<uint32_t>(0, _distributions.size() - 1);
- }
- /** Generate random number
- *
- * @tparam URNG Random number generator object type
- *
- * @param[in] g A uniform random number generator object, used as the source of randomness.
- *
- * @return A new random number.
- */
- template <class URNG>
- result_type operator()(URNG &g)
- {
- unsigned int rand_select = _selector(g);
- return _distributions[rand_select](g);
- }
-
-private:
- std::vector<DT> _distributions;
- std::uniform_int_distribution<uint32_t> _selector;
-};
-} // namespace random
-} // namespace utils
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_MISC_RANDOM_H */
diff --git a/arm_compute/core/utils/misc/Requires.h b/arm_compute/core/utils/misc/Requires.h
deleted file mode 100644
index 33c6fa3096..0000000000
--- a/arm_compute/core/utils/misc/Requires.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_UTILS_REQUIRES_H
-#define ARM_COMPUTE_UTILS_REQUIRES_H
-
-namespace arm_compute
-{
-namespace utils
-{
-namespace requires
-{
-// *INDENT-OFF*
-// clang-format off
-namespace detail
-{
-enum class enabler
-{
-};
-} // namespace arm_compute
-
-/** Requirements as template */
-#define REQUIRES_T(...) template <bool Cond = (__VA_ARGS__), typename std::enable_if<Cond, int>::type = 0>
-/** Requirements as template argument */
-#define REQUIRES_TA(...) typename = typename std::enable_if<(__VA_ARGS__), arm_compute::utils::requires::detail::enabler>::type
-// clang-format on
-// *INDENT-ON*
-} // namespace requires
-} // namespace utils
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_UTILS_REQUIRES_H */
diff --git a/arm_compute/core/utils/misc/Rounding.h b/arm_compute/core/utils/misc/Rounding.h
deleted file mode 100644
index 650137a473..0000000000
--- a/arm_compute/core/utils/misc/Rounding.h
+++ /dev/null
@@ -1,205 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_UTILS_ROUNDING_H
-#define ARM_COMPUTE_UTILS_ROUNDING_H
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/utils/misc/Requires.h"
-#include "arm_compute/core/utils/misc/Traits.h"
-#include "support/ToolchainSupport.h"
-
-#include <cmath>
-
-namespace arm_compute
-{
-namespace utils
-{
-namespace rounding
-{
-/** Rounding mode */
-enum class RoundingMode
-{
- TO_ZERO, /**< Round towards zero */
- AWAY_FROM_ZERO, /**< Round away from zero */
- HALF_TO_ZERO, /**< Round half towards from zero */
- HALF_AWAY_FROM_ZERO, /**< Round half away from zero */
- HALF_UP, /**< Round half towards positive infinity */
- HALF_DOWN, /**< Round half towards negative infinity */
- HALF_EVEN /**< Round half towards nearest even */
-};
-
-/** Round floating-point value with round to zero
- *
- * @tparam T Parameter type. Should be of floating point type.
- *
- * @param[in] value floating-point value to be rounded.
- *
- * @return Floating-point value of rounded @p value.
- */
-template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)>
-inline T round_to_zero(T value)
-{
- T res = std::floor(std::fabs(value));
- return (value < 0.f) ? -res : res;
-}
-
-/** Round floating-point value with round away from zero
- *
- * @tparam T Parameter type. Should be of floating point type.
- *
- * @param[in] value floating-point value to be rounded.
- *
- * @return Floating-point value of rounded @p value.
- */
-template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)>
-inline T round_away_from_zero(T value)
-{
- T res = std::ceil(std::fabs(value));
- return (value < 0.f) ? -res : res;
-}
-
-/** Round floating-point value with half value rounding towards zero.
- *
- * @tparam T Parameter type. Should be of floating point type.
- *
- * @param[in] value floating-point value to be rounded.
- *
- * @return Floating-point value of rounded @p value.
- */
-template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)>
-inline T round_half_to_zero(T value)
-{
- T res = T(std::ceil(std::fabs(value) - 0.5f));
- return (value < 0.f) ? -res : res;
-}
-
-/** Round floating-point value with half value rounding away from zero.
- *
- * @tparam T Parameter type. Should be of floating point type.
- *
- * @param[in] value floating-point value to be rounded.
- *
- * @return Floating-point value of rounded @p value.
- */
-template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)>
-inline T round_half_away_from_zero(T value)
-{
- T res = T(std::floor(std::fabs(value) + 0.5f));
- return (value < 0.f) ? -res : res;
-}
-
-/** Round floating-point value with half value rounding to positive infinity.
- *
- * @tparam T Parameter type. Should be of floating point type.
- *
- * @param[in] value floating-point value to be rounded.
- *
- * @return Floating-point value of rounded @p value.
- */
-template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)>
-inline T round_half_up(T value)
-{
- return std::floor(value + 0.5f);
-}
-
-/** Round floating-point value with half value rounding to negative infinity.
- *
- * @tparam T Parameter type. Should be of floating point type.
- *
- * @param[in] value floating-point value to be rounded.
- *
- * @return Floating-point value of rounded @p value.
- */
-template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)>
-inline T round_half_down(T value)
-{
- return std::ceil(value - 0.5f);
-}
-
-/** Round floating-point value with half value rounding to nearest even.
- *
- * @tparam T Parameter type. Should be of floating point type.
- *
- * @param[in] value floating-point value to be rounded.
- * @param[in] epsilon precision.
- *
- * @return Floating-point value of rounded @p value.
- */
-template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)>
-inline T round_half_even(T value, T epsilon = std::numeric_limits<T>::epsilon())
-{
- T positive_value = std::abs(value);
- T ipart = 0;
- std::modf(positive_value, &ipart);
- // If 'value' is exactly halfway between two integers
- if(std::abs(positive_value - (ipart + 0.5f)) < epsilon)
- {
- // If 'ipart' is even then return 'ipart'
- if(std::fmod(ipart, 2.f) < epsilon)
- {
- return support::cpp11::copysign(ipart, value);
- }
- // Else return the nearest even integer
- return support::cpp11::copysign(std::ceil(ipart + 0.5f), value);
- }
- // Otherwise use the usual round to closest
- return support::cpp11::copysign(support::cpp11::round(positive_value), value);
-}
-
-/** Round floating-point value given a rounding mode
- *
- * @tparam T Parameter type. Should be of floating point type.
- *
- * @param[in] value floating-point value to be rounded.
- * @param[in] rounding_mode Rounding mode to use.
- *
- * @return Floating-point value of rounded @p value.
- */
-template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)>
-inline T round(T value, RoundingMode rounding_mode)
-{
- switch(rounding_mode)
- {
- case RoundingMode::TO_ZERO:
- return round_to_zero(value);
- case RoundingMode::AWAY_FROM_ZERO:
- return round_away_from_zero(value);
- case RoundingMode::HALF_TO_ZERO:
- return round_half_to_zero(value);
- case RoundingMode::HALF_AWAY_FROM_ZERO:
- return round_half_away_from_zero(value);
- case RoundingMode::HALF_UP:
- return round_half_up(value);
- case RoundingMode::HALF_DOWN:
- return round_half_down(value);
- case RoundingMode::HALF_EVEN:
- return round_half_even(value);
- default:
- ARM_COMPUTE_ERROR("Unsupported rounding mode!");
- }
-}
-} // namespace rounding
-} // namespace utils
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_UTILS_ROUNDING_H */
diff --git a/arm_compute/core/utils/misc/SaturateCast.h b/arm_compute/core/utils/misc/SaturateCast.h
deleted file mode 100644
index 0241c64b14..0000000000
--- a/arm_compute/core/utils/misc/SaturateCast.h
+++ /dev/null
@@ -1,218 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_UTILS_CAST_SATURATE_CAST_H
-#define ARM_COMPUTE_UTILS_CAST_SATURATE_CAST_H
-
-#include "arm_compute/core/utils/misc/Rounding.h"
-#include "arm_compute/core/utils/misc/Traits.h"
-#include "arm_compute/core/utils/misc/Utility.h"
-
-namespace arm_compute
-{
-namespace utils
-{
-namespace cast
-{
-// *INDENT-OFF*
-// clang-format off
-// same type
-template<typename T,
- typename U,
- typename std::enable_if<std::is_same<T, U>::value, int >::type = 0 >
-T saturate_cast(U v)
-{
- return v;
-}
-
-// signed -> signed widening/same_width
-template<typename T,
- typename U,
- typename std::enable_if<std::is_integral<T>::value &&
- std::is_integral<U>::value &&
- std::is_signed<U>() &&
- std::is_signed<T>() &&
- !std::is_same<T, U>::value &&
- sizeof(T) >= sizeof(U),
- int >::type = 0 >
-inline T saturate_cast(U v)
-{
- return static_cast<T>(v);
-}
-// signed -> signed narrowing
-template<typename T,
- typename U,
- typename std::enable_if<std::is_integral<T>::value &&
- std::is_integral<U>::value &&
- std::is_signed<U>() &&
- std::is_signed<T>() &&
- !std::is_same<T, U>::value &&
- sizeof(T) < sizeof(U),
- int >::type = 0 >
-inline T saturate_cast(U v)
-{
- return static_cast<T>(utility::clamp<U>(v, std::numeric_limits<T>::lowest(), std::numeric_limits<T>::max()));
-}
-
-// unsigned -> signed widening
-template<typename T,
- typename U,
- typename std::enable_if<std::is_integral<T>::value &&
- std::is_integral<U>::value &&
- std::is_unsigned<U>() &&
- std::is_signed<T>() &&
- !std::is_same<T, U>::value &&
- (sizeof(T) > sizeof(U)),
- int >::type = 0 >
-inline T saturate_cast(U v)
-{
- return static_cast<T>(v);
-}
-// unsigned -> signed narrowing
-template<typename T,
- typename U,
- typename std::enable_if<std::is_integral<T>::value &&
- std::is_integral<U>::value &&
- std::is_unsigned<U>() &&
- std::is_signed<T>() &&
- !std::is_same<T, U>::value &&
- sizeof(T) < sizeof(U),
- int >::type = 0 >
-inline T saturate_cast(U v)
-{
- return static_cast<T>(std::min<U>(v, std::numeric_limits<T>::max()));
-}
-// unsigned -> signed same_width
-template<typename T,
- typename U,
- typename std::enable_if<std::is_integral<T>::value &&
- std::is_integral<U>::value &&
- std::is_unsigned<U>() &&
- std::is_signed<T>() &&
- !std::is_same<T, U>::value &&
- sizeof(T) == sizeof(U),
- int >::type = 0 >
-inline T saturate_cast(U v)
-{
- return static_cast<T>(std::min<U>(v, std::numeric_limits<T>::max()));
-}
-
-// signed -> unsigned widening/same width
-template<typename T,
- typename U,
- typename std::enable_if<std::is_integral<T>::value &&
- std::is_integral<U>::value &&
- std::is_signed<U>() &&
- std::is_unsigned<T>() &&
- !std::is_same<T, U>::value &&
- sizeof(T) >= sizeof(U),
- int >::type = 0 >
-inline T saturate_cast(U v)
-{
- return static_cast<T>(std::max<U>(0, v));
-}
-
-// signed -> unsigned narrowing
-template<typename T,
- typename U,
- typename std::enable_if<std::is_integral<T>::value &&
- std::is_integral<U>::value &&
- std::is_signed<U>() &&
- std::is_unsigned<T>() &&
- !std::is_same<T, U>::value &&
- sizeof(T) < sizeof(U),
- int >::type = 0 >
-inline T saturate_cast(U v)
-{
- return static_cast<T>(utility::clamp<U>(v, 0, std::numeric_limits<T>::max()));
-}
-
-// unsigned -> unsigned widening/same width
-template<typename T,
- typename U,
- typename std::enable_if<std::is_integral<T>::value &&
- std::is_integral<U>::value &&
- std::is_unsigned<T>() &&
- std::is_unsigned<U>() &&
- !std::is_same<T, U>::value &&
- sizeof(T) >= sizeof(U),
- int >::type = 0 >
-inline T saturate_cast(U v)
-{
- return static_cast<T>(v);
-}
-
-// unsigned -> unsigned narrowing
-template<typename T,
- typename U,
- typename std::enable_if<std::is_integral<T>::value &&
- std::is_integral<U>::value &&
- std::is_unsigned<T>() &&
- std::is_unsigned<U>() &&
- !std::is_same<T, U>::value &&
- sizeof(T) < sizeof(U),
- int >::type = 0 >
-inline T saturate_cast(U v)
-{
- return static_cast<T>(utility::clamp<U>(v, std::numeric_limits<T>::lowest(), std::numeric_limits<T>::max()));
-}
-
-// float -> int
-template<typename T,
- typename U,
- typename std::enable_if<std::is_integral<T>::value &&
- traits::is_floating_point<U>::value,
- int >::type = 0 >
-inline T saturate_cast(U v)
-{
- int32_t vi = utils::rounding::round_half_away_from_zero(v);
- return saturate_cast<T>(vi);
-}
-
-// int -> float
-template<typename T,
- typename U,
- typename std::enable_if<traits::is_floating_point<T>::value &&
- std::is_integral<U>::value,
- int >::type = 0 >
-inline T saturate_cast(U v)
-{
- return static_cast<T>(v);
-}
-
-// float -> float
-template<typename T,
- typename U,
- typename std::enable_if<traits::is_floating_point<T>::value &&
- traits::is_floating_point<U>::value,
- int >::type = 0 >
-inline T saturate_cast(U v)
-{
- return static_cast<T>(v);
-}
-// clang-format on
-// *INDENT-ON*
-} // namespace cast
-} // namespace utils
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_UTILS_CAST_SATURATE_CAST_H */
diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h
index dfccec8b37..e97d81390e 100644
--- a/arm_compute/core/utils/misc/ShapeCalculator.h
+++ b/arm_compute/core/utils/misc/ShapeCalculator.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 ARM Limited.
+ * Copyright (c) 2017-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,15 +21,16 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_MISC_SHAPE_CALCULATOR_H
-#define ARM_COMPUTE_MISC_SHAPE_CALCULATOR_H
+#ifndef ACL_ARM_COMPUTE_CORE_UTILS_MISC_SHAPECALCULATOR_H
+#define ACL_ARM_COMPUTE_CORE_UTILS_MISC_SHAPECALCULATOR_H
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensorInfo.h"
#include "arm_compute/core/KernelDescriptors.h"
#include "arm_compute/core/Utils.h"
-
#include "arm_compute/core/utils/helpers/tensor_transform.h"
+#include "arm_compute/function_info/ConvolutionInfo.h"
+#include "arm_compute/runtime/FunctionDescriptors.h"
#include <cmath>
@@ -47,28 +48,35 @@ namespace shape_calculator
*
* @return the calculated shape
*/
-inline TensorShape calculate_reduce_mean_shape(ITensor *input, const Coordinates &reduction_axis, bool keep_dims)
+inline TensorShape calculate_reduce_mean_shape(ITensorInfo *input, const Coordinates &reduction_axis, bool keep_dims)
{
const int reduction_ops = reduction_axis.num_dimensions();
Coordinates axis_local = reduction_axis;
- const int input_dims = input->info()->num_dimensions();
+ const int input_dims = input->num_dimensions();
convert_negative_axis(axis_local, input_dims);
- TensorShape out_shape = input->info()->tensor_shape();
+ TensorShape out_shape = input->tensor_shape();
// Configure reshape layer if we want to drop the dimensions
- if(!keep_dims)
+ if (!keep_dims)
{
// We have to sort the reduction axis vectors in order for remove_dimension
// to work properly
+
+// Suppress warning produced by a compiler bug in GCC
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104165
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Warray-bounds"
std::sort(axis_local.begin(), axis_local.begin() + reduction_ops);
- for(int i = 0; i < reduction_ops; ++i)
+#pragma GCC diagnostic pop
+
+ for (int i = 0; i < reduction_ops; ++i)
{
- out_shape.remove_dimension(axis_local[i] - i);
+ out_shape.remove_dimension(axis_local[i] - i, false);
}
return out_shape;
}
else
{
- for(int i = 0; i < reduction_ops; ++i)
+ for (int i = 0; i < reduction_ops; ++i)
{
out_shape.set(axis_local[i], 1);
}
@@ -84,7 +92,10 @@ inline TensorShape calculate_reduce_mean_shape(ITensor *input, const Coordinates
*
* @return the calculated shape
*/
-inline TensorShape compute_vector_to_tensor_output_shape(const TensorShape &input, size_t conv_w, size_t conv_h, const DataLayout &data_layout)
+inline TensorShape compute_vector_to_tensor_output_shape(const TensorShape &input,
+ size_t conv_w,
+ size_t conv_h,
+ const DataLayout &data_layout)
{
const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
@@ -126,10 +137,12 @@ inline TensorShape compute_reorg_output_shape(const ITensorInfo &input, int32_t
const size_t idx_channel = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::CHANNEL);
ARM_COMPUTE_ERROR_ON(stride <= 0);
- ARM_COMPUTE_ERROR_ON_MSG((input.tensor_shape()[idx_width] % stride != 0), "The width of the input tensor must be a multiple of stride");
- ARM_COMPUTE_ERROR_ON_MSG((input.tensor_shape()[idx_height] % stride != 0), "The height of the input tensor must be a multiple of stride");
+ ARM_COMPUTE_ERROR_ON_MSG((input.tensor_shape()[idx_width] % stride != 0),
+ "The width of the input tensor must be a multiple of stride");
+ ARM_COMPUTE_ERROR_ON_MSG((input.tensor_shape()[idx_height] % stride != 0),
+ "The height of the input tensor must be a multiple of stride");
- TensorShape output_shape{ input.tensor_shape() };
+ TensorShape output_shape{input.tensor_shape()};
output_shape.set(idx_width, output_shape[idx_width] / stride);
output_shape.set(idx_height, output_shape[idx_height] / stride);
@@ -146,7 +159,8 @@ inline TensorShape compute_reorg_output_shape(const ITensorInfo &input, int32_t
*
* @return the calculated shape of the reshaped weights
*/
-inline TensorShape compute_weights_reshaped_shape(const ITensorInfo &weights, bool has_bias = false, unsigned int num_groups = 1)
+inline TensorShape
+compute_weights_reshaped_shape(const ITensorInfo &weights, bool has_bias = false, unsigned int num_groups = 1)
{
// Number of groups greater than one are only supported for NCHW data layout, and the number of weights must be a multiple of it.
ARM_COMPUTE_ERROR_ON(num_groups == 0);
@@ -154,14 +168,14 @@ inline TensorShape compute_weights_reshaped_shape(const ITensorInfo &weights, bo
ARM_COMPUTE_ERROR_ON((weights.dimension(3) % num_groups) != 0);
// Calculate output shape
- TensorShape weights_reshaped{ weights.tensor_shape() };
+ TensorShape weights_reshaped{weights.tensor_shape()};
weights_reshaped.set(3, weights_reshaped[3] / num_groups);
weights_reshaped.collapse(3);
const size_t tmp_dim = weights_reshaped[0];
weights_reshaped.set(0, weights_reshaped[1]);
weights_reshaped.set(1, tmp_dim + (has_bias ? 1 : 0));
- if(weights.num_dimensions() < 5)
+ if (weights.num_dimensions() < 5)
{
weights_reshaped.set(2, num_groups);
}
@@ -177,7 +191,9 @@ inline TensorShape compute_weights_reshaped_shape(const ITensorInfo &weights, bo
*
* @return the calculated shape
*/
-inline TensorShape compute_lhs_reshaped_shape(const ITensorInfo &a, const GEMMLHSMatrixInfo &lhs_info, bool reinterpret_input_as_3d = false)
+inline TensorShape compute_lhs_reshaped_shape(const ITensorInfo &a,
+ const GEMMLHSMatrixInfo &lhs_info,
+ bool reinterpret_input_as_3d = false)
{
ARM_COMPUTE_ERROR_ON(lhs_info.m0 == 0);
ARM_COMPUTE_ERROR_ON(lhs_info.k0 == 0);
@@ -198,11 +214,11 @@ inline TensorShape compute_lhs_reshaped_shape(const ITensorInfo &a, const GEMMLH
const unsigned int output_width = block_size * num_horiz_blocks * lhs_info.v0;
const unsigned int output_height = std::ceil(num_vert_blocks / static_cast<float>(lhs_info.v0));
- TensorShape lhs_shape{ a.tensor_shape() };
+ TensorShape lhs_shape{a.tensor_shape()};
lhs_shape.set(0, output_width);
lhs_shape.set(1, output_height);
- if((reinterpret_input_as_3d) && (lhs_shape.num_dimensions() > 2))
+ if ((reinterpret_input_as_3d) && (lhs_shape.num_dimensions() > 2))
{
// When the data format is NHWC and the shapes are Nx1x1
// the tensor shape num_dimensions is automatically set to 1 instead of 3.
@@ -242,7 +258,7 @@ inline TensorShape compute_rhs_reshaped_shape(const ITensorInfo &a, const GEMMRH
const unsigned int output_width = block_size * num_vert_blocks * rhs_info.h0;
const unsigned int output_height = std::ceil(num_horiz_blocks / static_cast<float>(rhs_info.h0));
- TensorShape rhs_shape{ a.tensor_shape() };
+ TensorShape rhs_shape{a.tensor_shape()};
rhs_shape.set(0, output_width);
rhs_shape.set(1, output_height);
@@ -257,14 +273,15 @@ inline TensorShape compute_rhs_reshaped_shape(const ITensorInfo &a, const GEMMRH
*
* @return the calculated shape
*/
-inline TensorShape compute_interleaved_shape(const ITensorInfo &a, int mult_interleave4x4_height = 1, bool reinterpret_input_as_3d = false)
+inline TensorShape
+compute_interleaved_shape(const ITensorInfo &a, int mult_interleave4x4_height = 1, bool reinterpret_input_as_3d = false)
{
// The interleaved output matrix will have the following shape: [ a_height * W, ceil(a_width / W) ] where W = 4 * mult_interleave4x4_height
ARM_COMPUTE_ERROR_ON(mult_interleave4x4_height < 1);
const int interleave_width = 4 * mult_interleave4x4_height;
- TensorShape shape_interleaved_a{ a.tensor_shape() };
+ TensorShape shape_interleaved_a{a.tensor_shape()};
shape_interleaved_a.set(0, a.dimension(0) * interleave_width);
- if(reinterpret_input_as_3d)
+ if (reinterpret_input_as_3d)
{
const int M = a.dimension(1) * a.dimension(2);
const int height = std::ceil(M / static_cast<float>(interleave_width));
@@ -274,7 +291,7 @@ inline TensorShape compute_interleaved_shape(const ITensorInfo &a, int mult_inte
// the tensor shape num_dimensions is automatically set to 1 instead of 3.
// To avoid failures by removing a dimension that doesn't exist
// check if the number of dimensions is greater than 2.
- if(shape_interleaved_a.num_dimensions() > 2)
+ if (shape_interleaved_a.num_dimensions() > 2)
{
shape_interleaved_a.remove_dimension(2);
}
@@ -287,30 +304,6 @@ inline TensorShape compute_interleaved_shape(const ITensorInfo &a, int mult_inte
return shape_interleaved_a;
}
-/** Calculate the reshaped shape of the weights to use in depthwise convolution
- *
- * @param[in] input Input tensor info
- * @param[in] info Depthwise convolution information to be used for reshaping.
- *
- * @return the calculated shape
- */
-inline TensorShape compute_reshaped_depthwise_weights_shape(const ITensorInfo &input, const DepthwiseConvolutionReshapeInfo &info)
-{
- const auto data_layout = input.data_layout();
- TensorShape weights_shape{};
-
- const int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
- const int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
- const int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
- const size_t num_channels = input.dimension(channel_idx);
- const size_t num_rows = input.dimension(height_idx);
- const size_t num_cols = input.dimension(width_idx);
-
- weights_shape.set(0, num_rows * num_cols * info.c0);
- weights_shape.set(1, DIV_CEIL(num_channels, info.c0));
- return weights_shape;
-}
-
/** Calculate the transposed 1xW shape
*
* @param[in] b Input tensor info
@@ -320,7 +313,7 @@ inline TensorShape compute_reshaped_depthwise_weights_shape(const ITensorInfo &i
inline TensorShape compute_transpose1xW_shape(const ITensorInfo &b)
{
// The transpose1xW output matrix will have the following shape: [ b_height * 16, ceil(b_width / 16.0f) ]
- TensorShape shape_transposed1xW_b{ b.tensor_shape() };
+ TensorShape shape_transposed1xW_b{b.tensor_shape()};
shape_transposed1xW_b.set(0, b.dimension(1) * 16);
shape_transposed1xW_b.set(1, std::ceil(b.dimension(0) / 16.f));
@@ -340,7 +333,7 @@ inline TensorShape compute_transpose1xW_with_element_size_shape(const ITensorInf
// The transpose1xW output matrix will have the following shape:
// [ b_height * W, ceil(b_width / W) ] where W = (16 / element size of the tensor) * mult_transpose1xW_width
ARM_COMPUTE_ERROR_ON(mult_transpose1xW_width < 1);
- TensorShape shape_transposed1xW_b{ b.tensor_shape() };
+ TensorShape shape_transposed1xW_b{b.tensor_shape()};
const size_t transpose_width = (16 / b.element_size()) * mult_transpose1xW_width;
shape_transposed1xW_b.set(0, b.dimension(1) * transpose_width);
shape_transposed1xW_b.set(1, static_cast<size_t>(std::ceil(b.dimension(0) / static_cast<float>(transpose_width))));
@@ -356,8 +349,8 @@ inline TensorShape compute_transpose1xW_with_element_size_shape(const ITensorInf
*/
inline TensorShape compute_reductionA_shape(const ITensorInfo &b)
{
- TensorShape shape_vector_sum_col{ b.tensor_shape() };
- if(shape_vector_sum_col.num_dimensions() > 1)
+ TensorShape shape_vector_sum_col{b.tensor_shape()};
+ if (shape_vector_sum_col.num_dimensions() > 1)
{
shape_vector_sum_col.remove_dimension(1);
}
@@ -373,9 +366,9 @@ inline TensorShape compute_reductionA_shape(const ITensorInfo &b)
*/
inline TensorShape compute_reductionB_shape(const ITensorInfo &a)
{
- TensorShape shape_vector_sum_row{ a.tensor_shape() };
+ TensorShape shape_vector_sum_row{a.tensor_shape()};
shape_vector_sum_row.set(Window::DimX, a.dimension(1));
- if(shape_vector_sum_row.num_dimensions() > 1)
+ if (shape_vector_sum_row.num_dimensions() > 1)
{
shape_vector_sum_row.remove_dimension(1);
}
@@ -392,7 +385,10 @@ inline TensorShape compute_reductionB_shape(const ITensorInfo &a)
*
* @return the calculated shape
*/
-inline TensorShape compute_col2im_shape(const ITensorInfo &input, const Size2D &convolved_dims, bool batch_size_on_z, unsigned int num_groups = 1)
+inline TensorShape compute_col2im_shape(const ITensorInfo &input,
+ const Size2D &convolved_dims,
+ bool batch_size_on_z,
+ unsigned int num_groups = 1)
{
ARM_COMPUTE_ERROR_ON(num_groups == 0);
ARM_COMPUTE_ERROR_ON(input.tensor_shape()[1] != (convolved_dims.area()));
@@ -403,10 +399,10 @@ inline TensorShape compute_col2im_shape(const ITensorInfo &input, const Size2D &
const int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
const int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
- TensorShape col2im_shape{ input.tensor_shape() };
+ TensorShape col2im_shape{input.tensor_shape()};
// If batches start on 3rd dimension shift dimensions right by 1 to retain upper tensor shape,
// as first three will be override by H,W,C data
- if(batch_size_on_z && num_groups == 1)
+ if (batch_size_on_z && num_groups == 1)
{
col2im_shape.shift_right(1);
}
@@ -425,29 +421,27 @@ inline TensorShape compute_col2im_shape(const ITensorInfo &input, const Size2D &
*/
inline TensorShape compute_transposed_shape(const ITensorInfo &input)
{
- TensorShape shape_transposed{ input.tensor_shape() };
+ TensorShape shape_transposed{input.tensor_shape()};
- shape_transposed.set(0, input.dimension(1));
- shape_transposed.set(1, input.dimension(0));
+ shape_transposed.set(0, input.dimension(1), false);
+ shape_transposed.set(1, input.dimension(0), false);
return shape_transposed;
}
/** Calculate the depthwise convolution output shape of a tensor
*
- * @param[in] input Input tensor info
- * @param[in] weights Weights tensor info
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier Multiplier to apply to the input's depth in order to retrieve the output's depth.
- * @param[in] dilation Dilation, in elements, across x and y. Defaults to (1, 1).
+ * @param[in] input Input tensor info
+ * @param[in] weights Weights tensor info
+ * @param[in] info Convolution info
*
* @return the calculated shape
*/
-inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, PadStrideInfo conv_info, unsigned int depth_multiplier, const Size2D &dilation = Size2D(1U,
- 1U))
+inline TensorShape
+compute_depthwise_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, const ConvolutionInfo &info)
{
- const TensorShape input_shape{ input.tensor_shape() };
- const TensorShape weights_shape{ weights.tensor_shape() };
+ const TensorShape input_shape{input.tensor_shape()};
+ const TensorShape weights_shape{weights.tensor_shape()};
const DataLayout data_layout = input.data_layout();
const int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
@@ -455,23 +449,54 @@ inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input,
const int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
const DataLayout weights_data_layout = weights.data_layout();
- const int weights_width_idx = get_data_layout_dimension_index(weights_data_layout, DataLayoutDimension::WIDTH);
- const int weights_height_idx = get_data_layout_dimension_index(weights_data_layout, DataLayoutDimension::HEIGHT);
+ const int weights_width_idx = get_data_layout_dimension_index(weights_data_layout, DataLayoutDimension::WIDTH);
+ const int weights_height_idx = get_data_layout_dimension_index(weights_data_layout, DataLayoutDimension::HEIGHT);
unsigned int output_width = 0;
unsigned int output_height = 0;
- std::tie(output_width, output_height) = scaled_dimensions(input_shape[width_idx], input_shape[height_idx],
- weights_shape[weights_width_idx], weights_shape[weights_height_idx],
- conv_info, dilation);
+ std::tie(output_width, output_height) =
+ scaled_dimensions(input_shape[width_idx], input_shape[height_idx], weights_shape[weights_width_idx],
+ weights_shape[weights_height_idx], info.pad_stride_info, info.dilation);
- TensorShape output_shape{ input_shape };
+ TensorShape output_shape{input_shape};
output_shape.set(width_idx, output_width);
output_shape.set(height_idx, output_height);
- output_shape.set(channel_idx, input_shape[channel_idx] * depth_multiplier);
+ output_shape.set(channel_idx, input_shape[channel_idx] * info.depth_multiplier);
return output_shape;
}
+/** Calculate padding required for deconvolution
+ *
+ * @param[in] input Input tensor info
+ * @param[in] weights Weights tensor shape
+ * @param[in] sx Stride on x axis
+ * @param[in] sy Stride on y axis
+ * @param[in] out_dims Output shape dimensions
+ *
+ * @return the padding required
+ */
+inline std::pair<int32_t, int32_t> compute_deconvolution_padding(const ITensorInfo &input,
+ const ITensorInfo &weights,
+ int32_t sx,
+ int32_t sy,
+ std::pair<uint32_t, uint32_t> out_dims)
+{
+ const DataLayout data_layout = input.data_layout();
+ const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+ const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+
+ // Find the upsampled dimensions
+ int32_t out_x = (static_cast<int32_t>(input.dimension(idx_w)) - 1) * sx + 1;
+ int32_t out_y = (static_cast<int32_t>(input.dimension(idx_h)) - 1) * sy + 1;
+
+ // Find the padding needed for the convolution with stride 1 in order to match output shape
+ int32_t padx = out_dims.first - (out_x - static_cast<int32_t>(weights.dimension(idx_w)) + 1);
+ int32_t pady = out_dims.second - (out_y - static_cast<int32_t>(weights.dimension(idx_h)) + 1);
+
+ return std::make_pair(padx, pady);
+}
+
/** Calculate the upsampled output shape used for deconvolution
*
* @param[in] input Input tensor info
@@ -484,20 +509,28 @@ inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input,
*
* @return the calculated shape
*/
-inline TensorShape compute_deconvolution_upsampled_shape(const ITensorInfo &input, const ITensorInfo &weights, unsigned int sx, unsigned int sy,
- std::pair<unsigned int, unsigned int> &out_dims, uint32_t &padx, uint32_t &pady)
+inline TensorShape compute_deconvolution_upsampled_shape(const ITensorInfo &input,
+ const ITensorInfo &weights,
+ unsigned int sx,
+ unsigned int sy,
+ std::pair<unsigned int, unsigned int> &out_dims,
+ uint32_t &padx,
+ uint32_t &pady)
{
+ // Find the padding needed for the convolution with stride 1 in order to match output shape
+ const auto padxy =
+ compute_deconvolution_padding(input, weights, static_cast<int32_t>(sx), static_cast<int32_t>(sy), out_dims);
+ padx = static_cast<uint32_t>(padxy.first);
+ pady = static_cast<uint32_t>(padxy.second);
+
const DataLayout data_layout = input.data_layout();
const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
// Find the upsampled dimensions
- unsigned int out_x = (input.dimension(idx_w) - 1) * sx + 1;
- unsigned int out_y = (input.dimension(idx_h) - 1) * sy + 1;
+ uint32_t out_x = (input.dimension(idx_w) - 1) * sx + 1;
+ uint32_t out_y = (input.dimension(idx_h) - 1) * sy + 1;
- // Find the padding needed for the convolution with stride 1 in order to match output shape
- padx = out_dims.first - (out_x - weights.dimension(idx_w) + 1);
- pady = out_dims.second - (out_y - weights.dimension(idx_h) + 1);
out_x += padx;
out_y += pady;
@@ -516,10 +549,12 @@ inline TensorShape compute_deconvolution_upsampled_shape(const ITensorInfo &inpu
*
* @return the calculated shape
*/
-inline TensorShape compute_deconvolution_output_shape(const std::pair<unsigned int, unsigned int> &out_dims, const ITensorInfo &input, const ITensorInfo &weights)
+inline TensorShape compute_deconvolution_output_shape(const std::pair<unsigned int, unsigned int> &out_dims,
+ const ITensorInfo &input,
+ const ITensorInfo &weights)
{
- const TensorShape input_shape{ input.tensor_shape() };
- const TensorShape weights_shape{ weights.tensor_shape() };
+ const TensorShape input_shape{input.tensor_shape()};
+ const TensorShape weights_shape{weights.tensor_shape()};
const DataLayout data_layout = input.data_layout();
const int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
@@ -527,7 +562,7 @@ inline TensorShape compute_deconvolution_output_shape(const std::pair<unsigned i
const int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
const int batch_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES);
- TensorShape out_shape{ input_shape };
+ TensorShape out_shape{input_shape};
out_shape.set(width_idx, out_dims.first);
out_shape.set(height_idx, out_dims.second);
out_shape.set(channel_idx, weights_shape[batch_idx]);
@@ -543,11 +578,18 @@ inline TensorShape compute_deconvolution_output_shape(const std::pair<unsigned i
* @param[in] dilation Dilation, in elements, across x and y
* @param[in] batch_size_on_z True if batch size is on z axis
* @param[in] num_groups (Optional) Number of groups when performing a grouped convolution
+ * @param[in] input_pad_right (Optional) When fast-math is selected, per element padding for the im2col matrix may be necessary
*
* @return the calculated shape
*/
-inline TensorShape compute_im2col_conv_shape(const ITensorInfo *input, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation, bool batch_size_on_z,
- unsigned int num_groups = 1)
+inline TensorShape compute_im2col_conv_shape(const ITensorInfo *input,
+ const Size2D &kernel_dims,
+ const PadStrideInfo &conv_info,
+ bool has_bias,
+ const Size2D &dilation,
+ bool batch_size_on_z,
+ unsigned int num_groups = 1,
+ unsigned int input_pad_right = 0)
{
// The output shape will be the 3D shape [ out_channels * kernel_area, num_elems_per_out_channel, batches ] if batch_size_on_z == true
// or the 4D shape [ out_channels * kernel_area / num_groups, num_elems_per_out_channel, num_groups, batches ] if batch_size_on_z == false
@@ -556,17 +598,19 @@ inline TensorShape compute_im2col_conv_shape(const ITensorInfo *input, const Siz
ARM_COMPUTE_ERROR_ON(num_groups > 1 && input->data_layout() != DataLayout::NCHW);
ARM_COMPUTE_ERROR_ON(num_groups > 1 && batch_size_on_z);
- TensorShape output_shape{ input->tensor_shape() };
+ TensorShape output_shape{input->tensor_shape()};
const DataLayout data_layout = input->data_layout();
const int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
const int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
const int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
- std::pair<unsigned int, unsigned int> out_dims = scaled_dimensions(output_shape[width_idx], output_shape[height_idx], kernel_dims.width, kernel_dims.height, conv_info, dilation);
- output_shape.set(0, (output_shape[channel_idx] / num_groups * kernel_dims.area() + (has_bias ? 1 : 0))); // NOLINT
+ std::pair<unsigned int, unsigned int> out_dims = scaled_dimensions(
+ output_shape[width_idx], output_shape[height_idx], kernel_dims.width, kernel_dims.height, conv_info, dilation);
+ output_shape.set(0, ((output_shape[channel_idx] + input_pad_right) / num_groups * kernel_dims.area() +
+ (has_bias ? 1 : 0))); // NOLINT
output_shape.set(1, (out_dims.first * out_dims.second));
- if(batch_size_on_z && output_shape.num_dimensions() >= 3)
+ if (batch_size_on_z && output_shape.num_dimensions() >= 3)
{
output_shape.remove_dimension(2);
}
@@ -588,7 +632,7 @@ inline TensorShape compute_flatten_shape(const ITensorInfo *input)
{
// The output shape will be the flatten version of the input (i.e. [ width * height * channels, num_batches, ... ] ). Used for FlattenLayer and FullyConnectedLayer.
- TensorShape output_shape{ input->tensor_shape() };
+ TensorShape output_shape{input->tensor_shape()};
output_shape.collapse(3);
@@ -610,7 +654,7 @@ inline TensorShape compute_softmax_shape(const ITensorInfo *input, size_t axis =
// - [x,y,z,w] and axis 3 will return [x*y*z, w]
TensorShape shape2D = input->tensor_shape();
- if(axis < input->num_dimensions())
+ if (axis < input->num_dimensions())
{
// Collapse from axis onward (this changes the shape)
shape2D.collapse_from(axis);
@@ -624,7 +668,7 @@ inline TensorShape compute_softmax_shape(const ITensorInfo *input, size_t axis =
shape2D.collapse(shape2D.num_dimensions());
}
- if(axis == 0)
+ if (axis == 0)
{
// If axis is zero the first dim should be one. Since
// collapse is an inclusive operation we need to shift
@@ -643,15 +687,17 @@ inline TensorShape compute_softmax_shape(const ITensorInfo *input, size_t axis =
*/
inline TensorShape compute_winograd_filter_transform_shape(const ITensorInfo &input, const WinogradInfo &winograd_info)
{
- TensorShape tensor_shape{ input.tensor_shape() };
+ TensorShape tensor_shape{input.tensor_shape()};
const Size2D kernel_size = winograd_info.kernel_size;
const Size2D output_tile_size = winograd_info.output_tile_size;
- const Size2D input_tile_size = Size2D(output_tile_size.width + kernel_size.width - 1, output_tile_size.height + kernel_size.height - 1);
+ const Size2D input_tile_size =
+ Size2D(output_tile_size.width + kernel_size.width - 1, output_tile_size.height + kernel_size.height - 1);
tensor_shape.remove_dimension(get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH));
tensor_shape.set(Window::DimX, input.dimension(3));
- tensor_shape.set(Window::DimY, input.dimension(get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::CHANNEL)));
+ tensor_shape.set(Window::DimY, input.dimension(get_data_layout_dimension_index(input.data_layout(),
+ DataLayoutDimension::CHANNEL)));
tensor_shape.set(Window::DimZ, input_tile_size.area());
return tensor_shape;
@@ -669,23 +715,22 @@ inline TensorShape compute_winograd_input_transform_shape(const ITensorInfo &inp
const PadStrideInfo conv_info = winograd_info.convolution_info;
const Size2D kernel_size = winograd_info.kernel_size;
const Size2D output_tile_size = winograd_info.output_tile_size;
- const Size2D input_tile_size = Size2D(output_tile_size.width + kernel_size.width - 1, output_tile_size.height + kernel_size.height - 1);
+ const Size2D input_tile_size =
+ Size2D(output_tile_size.width + kernel_size.width - 1, output_tile_size.height + kernel_size.height - 1);
const size_t idx_w = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH);
const size_t idx_h = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::HEIGHT);
const size_t idx_c = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::CHANNEL);
// Compute the number of output tiles along the x and y direction of size "output_tile_size"
- const Size2D num_tiles = compute_winograd_convolution_tiles(Size2D(input.tensor_shape()[idx_w], input.tensor_shape()[idx_h]),
- kernel_size,
- output_tile_size,
- conv_info);
+ const Size2D num_tiles = compute_winograd_convolution_tiles(
+ Size2D(input.tensor_shape()[idx_w], input.tensor_shape()[idx_h]), kernel_size, output_tile_size, conv_info);
const unsigned int width = input.tensor_shape()[idx_c];
const unsigned int height = num_tiles.area();
const unsigned int depth = input_tile_size.area();
- TensorShape output_shape{ input.tensor_shape() };
+ TensorShape output_shape{input.tensor_shape()};
output_shape.set(0, width);
output_shape.set(1, height);
output_shape.set(2, depth);
@@ -708,12 +753,12 @@ inline TensorShape compute_winograd_output_transform_shape(const ITensorInfo &in
const DataLayout data_layout = winograd_info.output_data_layout;
// Compute output shape
- unsigned int output_width = 0;
- unsigned int output_height = 0;
+ unsigned int output_width = 0;
+ unsigned int output_height = 0;
std::tie(output_width, output_height) = scaled_dimensions(input_dimensions.width, input_dimensions.height,
kernel_size.width, kernel_size.height, conv_info);
- TensorShape tensor_shape{ input.tensor_shape() };
+ TensorShape tensor_shape{input.tensor_shape()};
// Output dimension
const unsigned int out_w = output_width;
@@ -729,20 +774,21 @@ inline TensorShape compute_winograd_output_transform_shape(const ITensorInfo &in
/** Calculate the deep convolution shape output shape of a tensor
*
- * @param[in] input Input tensor info
- * @param[in] weights Weights tensor info
- * @param[in] conv_info Contains padding and stride information
+ * @param[in] input_shape Input tensor shape
+ * @param[in] input_data_layout Input data layout
+ * @param[in] weights_shape Weights tensor shape
+ * @param[in] conv_info Contains padding and stride information
*
* @return the calculated shape
*/
-inline TensorShape compute_deep_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, PadStrideInfo conv_info)
+inline TensorShape compute_deep_convolution_shape(const TensorShape &input_shape,
+ DataLayout input_data_layout,
+ const TensorShape &weights_shape,
+ const PadStrideInfo &conv_info)
{
- const TensorShape input_shape{ input.tensor_shape() };
- const TensorShape weights_shape{ weights.tensor_shape() };
-
- const size_t idx_width = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH);
- const size_t idx_height = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::HEIGHT);
- const size_t idx_channel = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::CHANNEL);
+ const size_t idx_width = get_data_layout_dimension_index(input_data_layout, DataLayoutDimension::WIDTH);
+ const size_t idx_height = get_data_layout_dimension_index(input_data_layout, DataLayoutDimension::HEIGHT);
+ const size_t idx_channel = get_data_layout_dimension_index(input_data_layout, DataLayoutDimension::CHANNEL);
const unsigned int input_width = input_shape[idx_width];
const unsigned int input_height = input_shape[idx_height];
@@ -751,9 +797,10 @@ inline TensorShape compute_deep_convolution_shape(const ITensorInfo &input, cons
const unsigned int weights_out_channel = weights_shape[3];
unsigned int output_width = 0;
unsigned int output_height = 0;
- std::tie(output_width, output_height) = scaled_dimensions(input_width, input_height, weights_width, weights_height, conv_info);
+ std::tie(output_width, output_height) =
+ scaled_dimensions(input_width, input_height, weights_width, weights_height, conv_info);
- TensorShape output_shape{ input_shape };
+ TensorShape output_shape{input_shape};
output_shape.set(idx_width, output_width);
output_shape.set(idx_height, output_height);
output_shape.set(idx_channel, weights_out_channel);
@@ -761,6 +808,53 @@ inline TensorShape compute_deep_convolution_shape(const ITensorInfo &input, cons
return output_shape;
}
+/** Calculate the deep convolution shape output shape of a tensor
+ *
+ * @param[in] input Input tensor info
+ * @param[in] weights Weights tensor info
+ * @param[in] conv_info Contains padding and stride information
+ *
+ * @return the calculated shape
+ */
+inline TensorShape
+compute_deep_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, const PadStrideInfo &conv_info)
+{
+ return compute_deep_convolution_shape(input.tensor_shape(), input.data_layout(), weights.tensor_shape(), conv_info);
+}
+
+/** Calculate the indirect buffer output shape used by the indirect convolution function
+ *
+ * @param[in] input_shape Input tensor shape
+ * @param[in] input_data_layout Input data layout
+ * @param[in] weights_shape Weights tensor shape
+ * @param[in] conv_info Contains padding and stride information
+ * @param[in] desc Contains the direct/indirect convolution compute arguments, such as the tiling dimensions
+ *
+ * @return the calculated shape
+ */
+inline TensorShape compute_indirect_buffer_shape(const TensorShape &input_shape,
+ DataLayout input_data_layout,
+ const TensorShape &weights_shape,
+ const PadStrideInfo &conv_info,
+ const DirectConvComputeKernelInfo &desc)
+{
+ ARM_COMPUTE_ERROR_ON_MSG(input_data_layout != DataLayout::NHWC, "The data layout can only be NHWC");
+ ARM_COMPUTE_ERROR_ON_MSG(desc.m0 <= 0 || desc.m0 > 8, "M0 can only be greater than 0 and less than or equal to 8");
+
+ const unsigned int m0 = desc.m0;
+ const unsigned int kw = weights_shape[1];
+ const unsigned int kh = weights_shape[2];
+
+ TensorShape output_conv2d_shape =
+ compute_deep_convolution_shape(input_shape, input_data_layout, weights_shape, conv_info);
+
+ const unsigned int output_w = m0 * kw * kh;
+ const unsigned int output_h = DIV_CEIL(output_conv2d_shape[1] * output_conv2d_shape[2], m0);
+ const unsigned int output_b = output_conv2d_shape[3];
+
+ return TensorShape(output_w, output_h, output_b);
+}
+
/** Calculate the min/max shape output shape of a tensor
*
* @param[in] input Input tensor info
@@ -769,7 +863,7 @@ inline TensorShape compute_deep_convolution_shape(const ITensorInfo &input, cons
*/
inline TensorShape compute_min_max_shape(const ITensorInfo *input)
{
- TensorShape output_shape{ input->tensor_shape() };
+ TensorShape output_shape{input->tensor_shape()};
output_shape.set(Window::DimX, 2);
output_shape.remove_dimension(1);
output_shape.remove_dimension(1);
@@ -786,29 +880,63 @@ inline TensorShape compute_min_max_shape(const ITensorInfo *input)
*/
inline TensorShape compute_pool_shape(const ITensorInfo &input, PoolingLayerInfo pool_info)
{
- unsigned int pooled_w = 0;
- unsigned int pooled_h = 0;
+ int pooled_w = 0;
+ int pooled_h = 0;
+
+ TensorShape output_shape{input.tensor_shape()};
- TensorShape output_shape{ input.tensor_shape() };
+ const bool is_global_pooling = pool_info.is_global_pooling;
+ const int idx_width = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH);
+ const int idx_height = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::HEIGHT);
+ const int input_width = input.tensor_shape()[idx_width];
+ const int input_height = input.tensor_shape()[idx_height];
+ const int pool_size_x = is_global_pooling ? output_shape[idx_width] : pool_info.pool_size.width;
+ const int pool_size_y = is_global_pooling ? output_shape[idx_height] : pool_info.pool_size.height;
- const bool is_global_pooling = pool_info.is_global_pooling;
- const unsigned int idx_width = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH);
- const unsigned int idx_height = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::HEIGHT);
- const unsigned int pool_size_x = is_global_pooling ? output_shape[idx_width] : pool_info.pool_size.width;
- const unsigned int pool_size_y = is_global_pooling ? output_shape[idx_height] : pool_info.pool_size.height;
+ std::tie(pooled_w, pooled_h) =
+ scaled_dimensions_signed(input_width, input_height, pool_size_x, pool_size_y, pool_info.pad_stride_info);
- std::tie(pooled_w, pooled_h) = scaled_dimensions(output_shape[idx_width],
- output_shape[idx_height],
- pool_size_x,
- pool_size_y,
- pool_info.pad_stride_info);
+ ARM_COMPUTE_ERROR_ON_MSG((pooled_w < 1 || pooled_h < 1), "Calculated output dimension size is invalid");
- output_shape.set(idx_width, pooled_w);
- output_shape.set(idx_height, pooled_h);
+ output_shape.set(idx_width, static_cast<size_t>(pooled_w));
+ output_shape.set(idx_height, static_cast<size_t>(pooled_h));
return output_shape;
}
+/** Calculate the output unpool shape of a tensor
+ *
+ * @param[in] input Input tensor info
+ * @param[in] pool_info Pooling layer info
+ *
+ * @return the calculated shape
+ */
+inline TensorShape compute_unpool_shape(const ITensorInfo &input, PoolingLayerInfo pool_info)
+{
+ const unsigned int idx_width = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH);
+ const unsigned int idx_height = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::HEIGHT);
+ const TensorShape input_shape = input.tensor_shape();
+ ARM_COMPUTE_ERROR_ON(input_shape[idx_height] <= 1 || input_shape[idx_width] <= 1);
+ const PadStrideInfo pad_stride_info = pool_info.pad_stride_info;
+ const unsigned int stride_x = pad_stride_info.stride().first;
+ const unsigned int stride_y = pad_stride_info.stride().second;
+
+ const int pad_left = pad_stride_info.pad_left();
+ const int pad_top = pad_stride_info.pad_top();
+ const int pad_right = pad_stride_info.pad_right();
+ const int pad_bottom = pad_stride_info.pad_bottom();
+
+ TensorShape output_shape = input_shape;
+ const unsigned int out_width =
+ (input_shape[idx_width] - 1) * stride_x - pad_left - pad_right + pool_info.pool_size.width;
+ const unsigned int out_height =
+ (input_shape[idx_height] - 1) * stride_y - pad_top - pad_bottom + pool_info.pool_size.height;
+
+ output_shape.set(idx_width, out_width);
+ output_shape.set(idx_height, out_height);
+ return output_shape;
+}
+
/** Calculate the output roi align shape of a tensor
*
* @param[in] input Input tensor info
@@ -817,9 +945,10 @@ inline TensorShape compute_pool_shape(const ITensorInfo &input, PoolingLayerInfo
*
* @return the calculated shape
*/
-inline TensorShape compute_roi_align_shape(const ITensorInfo &input, const ITensorInfo &rois, ROIPoolingLayerInfo pool_info)
+inline TensorShape
+compute_roi_align_shape(const ITensorInfo &input, const ITensorInfo &rois, ROIPoolingLayerInfo pool_info)
{
- TensorShape output_shape{ input.tensor_shape() };
+ TensorShape output_shape{input.tensor_shape()};
const unsigned int idx_width = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH);
const unsigned int idx_height = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::HEIGHT);
@@ -840,7 +969,7 @@ inline TensorShape compute_roi_align_shape(const ITensorInfo &input, const ITens
*/
inline TensorShape compute_rnn_shape(const ITensorInfo *input, const unsigned int batch_size)
{
- TensorShape output_shape{ input->tensor_shape() };
+ TensorShape output_shape{input->tensor_shape()};
output_shape.set(1, batch_size);
return output_shape;
@@ -855,15 +984,21 @@ inline TensorShape compute_rnn_shape(const ITensorInfo *input, const unsigned in
*
* @return the calculated shape
*/
-inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo &input1, bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info)
+inline TensorShape compute_mm_shape(const ITensorInfo &input0,
+ const ITensorInfo &input1,
+ bool is_interleaved_transposed,
+ const GEMMReshapeInfo &reshape_info)
{
ARM_COMPUTE_ERROR_ON_MSG(input0.num_dimensions() > 4, "The number of dimensions for the matrix A must be <= 4");
- ARM_COMPUTE_ERROR_ON_MSG(is_interleaved_transposed && reshape_info.reinterpret_input_as_3d(), "The first input tensor cannot be reinterpreted as 3D if is_interleaved_transposed is true");
+ ARM_COMPUTE_ERROR_ON_MSG(
+ is_interleaved_transposed && reshape_info.reinterpret_input_as_3d(),
+ "The first input tensor cannot be reinterpreted as 3D if is_interleaved_transposed is true");
const bool reinterpret_input_as_3d = reshape_info.reinterpret_input_as_3d();
const bool reinterpret_output_as_3d = reshape_info.depth_output_gemm3d() != 0;
const int depth_output_gemm3d = reinterpret_output_as_3d ? reshape_info.depth_output_gemm3d() : 1;
- const int m = reshape_info.reinterpret_input_as_3d() ? input0.dimension(1) * input0.dimension(2) : input0.dimension(1);
+ const int m =
+ reshape_info.reinterpret_input_as_3d() ? input0.dimension(1) * input0.dimension(2) : input0.dimension(1);
// If the output of GEMM has to be reinterpreted as 3D, the number of input0 rows (M) is obtained collapsing the second and third
// dimension of the output tensor
@@ -872,7 +1007,7 @@ inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo
const int dim2 = reinterpret_input_as_3d ? input0.tensor_shape()[3] : input0.tensor_shape()[2];
const int dim3 = reinterpret_input_as_3d ? 1 : input0.tensor_shape()[3];
- TensorShape output_shape{ input0.tensor_shape() };
+ TensorShape output_shape{input0.tensor_shape()};
output_shape.set(0, dim0);
output_shape.set(1, dim1);
@@ -885,15 +1020,14 @@ inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo
/** Calculate the matrix multiplication output shape of two tensors
*
- * @note Deprecated. Remove when GEMMReshapeInfo is not used anymore by any other kernels
- *
* @param[in] input0 First input tensor info
* @param[in] input1 Second input tensor info
* @param[in] gemm_info GEMM reshape info
*
* @return the calculated shape
*/
-inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo &input1, const GEMMReshapeInfo &gemm_info)
+inline TensorShape
+compute_mm_shape(const ITensorInfo &input0, const ITensorInfo &input1, const GEMMReshapeInfo &gemm_info)
{
ARM_COMPUTE_UNUSED(input1);
ARM_COMPUTE_ERROR_ON_MSG(input0.num_dimensions() > 4, "The number of dimensions for the matrix A must be <= 4");
@@ -902,9 +1036,9 @@ inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo
const bool reinterpret_output_as_3d = gemm_info.depth_output_gemm3d() != 0;
const int depth_output_gemm3d = reinterpret_output_as_3d ? gemm_info.depth_output_gemm3d() : 1;
- TensorShape output_shape{ input0.tensor_shape() };
+ TensorShape output_shape{input0.tensor_shape()};
- if(!reinterpret_input_as_3d && !reinterpret_output_as_3d)
+ if (!reinterpret_input_as_3d && !reinterpret_output_as_3d)
{
output_shape.set(0, gemm_info.n());
output_shape.set(1, gemm_info.m());
@@ -931,7 +1065,8 @@ inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo
*
* @return the calculated shape
*/
-inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo &input1, const GEMMKernelInfo &gemm_info)
+inline TensorShape
+compute_mm_shape(const ITensorInfo &input0, const ITensorInfo &input1, const GEMMKernelInfo &gemm_info)
{
ARM_COMPUTE_UNUSED(input1);
ARM_COMPUTE_ERROR_ON_MSG(input0.num_dimensions() > 4, "The number of dimensions for the matrix A must be <= 4");
@@ -940,9 +1075,9 @@ inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo
const bool reinterpret_output_as_3d = gemm_info.depth_output_gemm3d != 0;
const unsigned int depth_output_gemm3d = reinterpret_output_as_3d ? gemm_info.depth_output_gemm3d : 1;
- TensorShape output_shape{ input0.tensor_shape() };
+ TensorShape output_shape{input0.tensor_shape()};
- if(!reinterpret_input_as_3d && !reinterpret_output_as_3d)
+ if (!reinterpret_input_as_3d && !reinterpret_output_as_3d)
{
output_shape.set(0, gemm_info.n);
output_shape.set(1, gemm_info.m);
@@ -963,20 +1098,50 @@ inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo
/** Calculate the matrix multiplication output shape of two tensors
*
+ * @param[in] input0 First input tensor info
+ * @param[in] input1 Second input tensor info
+ * @param[in] matmul_info Batch MatMul Kernel info to know which matrix is transposed
+ *
+ * @return the calculated shape
+ */
+inline TensorShape
+compute_matmul_shape(const TensorShape &input0, const TensorShape &input1, const MatMulKernelInfo &matmul_info)
+{
+ TensorShape output_shape{input0};
+
+ if (matmul_info.adj_lhs)
+ {
+ output_shape.set(1, input0[0]); // The vertical (M) dimension
+ }
+
+ if (matmul_info.adj_rhs)
+ {
+ output_shape.set(0, input1[1]); // The horizontal (N) dimension
+ }
+ else
+ {
+ output_shape.set(0, input1[0]); // The horizontal (N) dimension
+ }
+
+ return output_shape;
+}
+/** Calculate the matrix multiplication output shape of two tensors
+ *
* @param[in] input Input tensor info
* @param[in] gemm_3d_depth (Optional) GEMM 3d depth
* @param[in] batch_size_on_z (Optional) True if batch size is on z axis
*
* @return the calculated shape
*/
-inline TensorShape compute_output_stage_shape(const ITensorInfo &input, unsigned int gemm_3d_depth = 1, bool batch_size_on_z = false)
+inline TensorShape
+compute_output_stage_shape(const ITensorInfo &input, unsigned int gemm_3d_depth = 1, bool batch_size_on_z = false)
{
ARM_COMPUTE_ERROR_ON(input.data_layout() != DataLayout::NHWC && gemm_3d_depth > 1);
TensorShape output_shape = input.tensor_shape();
- if(gemm_3d_depth > 1)
+ if (gemm_3d_depth > 1)
{
- if(batch_size_on_z)
+ if (batch_size_on_z)
{
output_shape.shift_right(1);
}
@@ -1001,11 +1166,16 @@ inline TensorShape compute_output_stage_shape(const ITensorInfo &input, unsigned
* @return the calculated shape
*/
inline TensorShape compute_strided_slice_shape(const ITensorInfo &input,
- const Coordinates &starts, const Coordinates &ends, const Coordinates &strides,
- int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask)
+ const Coordinates &starts,
+ const Coordinates &ends,
+ const Coordinates &strides,
+ int32_t begin_mask,
+ int32_t end_mask,
+ int32_t shrink_axis_mask)
{
using namespace arm_compute::helpers::tensor_transform;
- return compute_strided_slice_output_shape(input.tensor_shape(), starts, ends, strides, begin_mask, end_mask, shrink_axis_mask);
+ return compute_strided_slice_output_shape(input.tensor_shape(), starts, ends, strides, begin_mask, end_mask,
+ shrink_axis_mask);
}
/** Calculate the slice output shape of a tensor
@@ -1016,60 +1186,72 @@ inline TensorShape compute_strided_slice_shape(const ITensorInfo &input,
*
* @return the calculated shape
*/
-inline TensorShape compute_slice_shape(const TensorShape &input_shape, const Coordinates &starts, const Coordinates &ends)
+inline TensorShape
+compute_slice_shape(const TensorShape &input_shape, const Coordinates &starts, const Coordinates &ends)
{
using namespace arm_compute::helpers::tensor_transform;
- return compute_strided_slice_output_shape(input_shape,
- starts, ends, BiStrides(),
- 0, construct_slice_end_mask(ends), 0);
+ return compute_strided_slice_output_shape(input_shape, starts, ends, BiStrides(), 0, construct_slice_end_mask(ends),
+ 0);
}
/** Calculate the batch to space output shape of a tensor
*
- * @param[in] input Input tensor info
- * @param[in] block_x Block shape x value
- * @param[in] block_y Block shape y value
+ * @param[in] data_layout Data layout
+ * @param[in] input Input tensor shape
+ * @param[in] block_x Block shape x value
+ * @param[in] block_y Block shape y value
+ * @param[in] crop_info Information about how the output shape is cropped after batch to space is performed
*
* @return the calculated shape
*/
-inline TensorShape compute_batch_to_space_shape(const ITensorInfo *input, const int block_x, const int block_y)
+inline TensorShape compute_batch_to_space_shape(
+ DataLayout data_layout, const TensorShape &input, int block_x, int block_y, const CropInfo &crop_info = CropInfo{})
{
- ARM_COMPUTE_ERROR_ON(block_x <= 0 || block_y <= 0);
+ ARM_COMPUTE_ERROR_ON(block_x < 1 || block_y < 1);
- const DataLayout data_layout = input->data_layout();
- const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
- const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
- const int idx_batch = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES);
+ const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+ const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+ const int idx_batch = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES);
+
+ TensorShape output_shape{input};
+
+ unsigned int new_width = input[idx_width] * static_cast<unsigned int>(block_x);
+ unsigned int new_height = input[idx_height] * static_cast<unsigned int>(block_y);
+ const unsigned int width_crop = crop_info.left + crop_info.right;
+ const unsigned int height_crop = crop_info.top + crop_info.bottom;
+ ARM_COMPUTE_ERROR_ON(new_width <= width_crop);
+ ARM_COMPUTE_ERROR_ON(new_height <= height_crop);
+ new_width -= width_crop;
+ new_height -= height_crop;
- TensorShape output_shape{ input->tensor_shape() };
- output_shape.set(idx_width, input->tensor_shape()[idx_width] * block_x);
- output_shape.set(idx_height, input->tensor_shape()[idx_height] * block_y);
- output_shape.set(idx_batch, input->tensor_shape()[idx_batch] / (block_x * block_y));
+ output_shape.set(idx_width, new_width);
+ output_shape.set(idx_height, new_height);
+ output_shape.set(idx_batch, input[idx_batch] / (block_x * block_y));
return output_shape;
}
/** Calculate the depth to space output shape of a tensor
*
- * @param[in] input Input tensor info
- * @param[in] block Block shape value
+ * @param[in] input_shape Input tensor shape
+ * @param[in] data_layout Operation data layout
+ * @param[in] block Block shape value
*
* @return the calculated shape
*/
-inline TensorShape compute_depth_to_space_shape(const ITensorInfo *input, int block)
+inline TensorShape compute_depth_to_space_shape(const TensorShape &input_shape, DataLayout data_layout, int block)
{
ARM_COMPUTE_ERROR_ON(block < 2);
- const DataLayout data_layout = input->data_layout();
- const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
- const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
- const int idx_channel = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
+ const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+ const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+ const int idx_channel = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
- TensorShape output_shape{ input->tensor_shape() };
- output_shape.set(idx_width, input->dimension(idx_width) * block);
- output_shape.set(idx_height, input->dimension(idx_height) * block);
- output_shape.set(idx_channel, input->dimension(idx_channel) / (block * block));
+ TensorShape output_shape{input_shape};
+ output_shape.set(idx_width, input_shape[idx_width] * block);
+ output_shape.set(idx_height, input_shape[idx_height] * block);
+ output_shape.set(idx_channel, input_shape[idx_channel] / (block * block));
return output_shape;
}
@@ -1087,10 +1269,10 @@ inline TensorShape compute_split_shape(const ITensorInfo *input, unsigned int ax
TensorShape empty_shape;
empty_shape.set(0, 0);
- TensorShape out_shape{ input->tensor_shape() };
+ TensorShape out_shape{input->tensor_shape()};
// Return empty shape if axis is invalid
- if(axis > input->tensor_shape().num_dimensions())
+ if (axis > input->tensor_shape().num_dimensions())
{
return empty_shape;
}
@@ -1098,7 +1280,7 @@ inline TensorShape compute_split_shape(const ITensorInfo *input, unsigned int ax
size_t axis_size = out_shape[axis];
// Return empty shape if num_split is not valid
- if(axis_size % num_splits)
+ if (axis_size % num_splits)
{
return empty_shape;
}
@@ -1117,18 +1299,22 @@ inline TensorShape compute_split_shape(const ITensorInfo *input, unsigned int ax
*
* @return the calculated shape
*/
-inline TensorShape compute_space_to_batch_shape(const ITensorInfo *input, const int block_x, const int block_y, const Size2D &padding_left, const Size2D &padding_right)
+inline TensorShape compute_space_to_batch_shape(
+ const ITensorInfo *input, int block_x, int block_y, const Size2D &padding_left, const Size2D &padding_right)
{
- TensorShape output_shape{ input->tensor_shape() };
+ TensorShape output_shape{input->tensor_shape()};
const DataLayout data_layout = input->data_layout();
const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
const int idx_batch = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES);
- output_shape.set(idx_width, input->tensor_shape()[idx_width] * block_x + padding_left.x() + padding_right.x());
- output_shape.set(idx_height, input->tensor_shape()[idx_height] * block_y + padding_left.y() + padding_right.y());
- output_shape.set(idx_batch, input->tensor_shape()[idx_batch] / (block_x * block_y));
+ ARM_COMPUTE_ERROR_ON((input->tensor_shape()[idx_width] + padding_left.x() + padding_right.x()) % block_x != 0);
+ ARM_COMPUTE_ERROR_ON((input->tensor_shape()[idx_height] + padding_left.y() + padding_right.y()) % block_y != 0);
+
+ output_shape.set(idx_width, (input->tensor_shape()[idx_width] + padding_left.x() + padding_right.x()) / block_x);
+ output_shape.set(idx_height, (input->tensor_shape()[idx_height] + padding_left.y() + padding_right.y()) / block_y);
+ output_shape.set(idx_batch, input->tensor_shape()[idx_batch] * block_x * block_y);
return output_shape;
}
@@ -1142,16 +1328,16 @@ inline TensorShape compute_space_to_batch_shape(const ITensorInfo *input, const
*/
inline TensorShape compute_space_to_depth_shape(const ITensorInfo *input, int32_t block_shape)
{
- TensorShape output_shape{ input->tensor_shape() };
+ TensorShape output_shape{input->tensor_shape()};
const DataLayout data_layout = input->data_layout();
const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
const int idx_depth = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
- output_shape.set(idx_width, input->tensor_shape()[idx_width] * block_shape);
- output_shape.set(idx_height, input->tensor_shape()[idx_height] * block_shape);
- output_shape.set(idx_depth, input->tensor_shape()[idx_depth] / (block_shape * block_shape));
+ output_shape.set(idx_width, input->tensor_shape()[idx_width] / block_shape);
+ output_shape.set(idx_height, input->tensor_shape()[idx_height] / block_shape);
+ output_shape.set(idx_depth, input->tensor_shape()[idx_depth] * (block_shape * block_shape));
return output_shape;
}
@@ -1187,7 +1373,7 @@ inline TensorShape compute_prior_box_shape(const ITensorInfo &input, const Prior
inline TensorShape compute_padded_shape(const TensorShape &input_shape, const PaddingList &padding)
{
TensorShape padded_shape = input_shape;
- for(size_t dim = 0; dim < padding.size(); ++dim)
+ for (size_t dim = 0; dim < padding.size(); ++dim)
{
const auto &padding_pair = padding[dim];
const uint32_t shape_on_index = (padded_shape.num_dimensions() <= dim) ? 1 : input_shape[dim];
@@ -1206,7 +1392,7 @@ inline TensorShape compute_padded_shape(const TensorShape &input_shape, const Pa
inline TensorShape compute_tiled_shape(const TensorShape &input_shape, const Multiples &multiples)
{
TensorShape tiled_shape = input_shape;
- for(size_t dim = 0; dim < multiples.size(); ++dim)
+ for (size_t dim = 0; dim < multiples.size(); ++dim)
{
tiled_shape.set(dim, input_shape[dim] * multiples[dim]);
}
@@ -1223,9 +1409,9 @@ inline TensorShape compute_tiled_shape(const TensorShape &input_shape, const Mul
*/
inline TensorShape compute_reduced_shape(const TensorShape &input, unsigned int axis, bool keep_dims = true)
{
- TensorShape output_shape{ input };
+ TensorShape output_shape{input};
- if(!keep_dims)
+ if (!keep_dims)
{
output_shape.remove_dimension(axis);
}
@@ -1318,14 +1504,14 @@ inline TensorShape calculate_concatenate_shape(const std::vector<T *> &input, si
#if defined(ARM_COMPUTE_ASSERTS_ENABLED)
// All dimensions must match except the axis one
- for(unsigned int i = 0; i < MAX_DIMS; ++i)
+ for (unsigned int i = 0; i < MAX_DIMS; ++i)
{
- if(i == axis)
+ if (i == axis)
{
continue;
}
- for(const auto &tensor : input)
+ for (const auto &tensor : input)
{
ARM_COMPUTE_ERROR_ON(tensor == nullptr);
const TensorShape shape = extract_shape(tensor);
@@ -1336,7 +1522,7 @@ inline TensorShape calculate_concatenate_shape(const std::vector<T *> &input, si
// Calculate output shape
size_t new_size = 0;
- for(const auto &tensor : input)
+ for (const auto &tensor : input)
{
const TensorShape shape = extract_shape(tensor);
new_size += shape[axis];
@@ -1359,14 +1545,14 @@ inline TensorShape compute_stack_shape(const ITensorInfo &a, unsigned int axis,
ARM_COMPUTE_ERROR_ON(axis > a.num_dimensions());
ARM_COMPUTE_ERROR_ON(a.num_dimensions() > 4);
- TensorShape shape_out{ a.tensor_shape() };
+ TensorShape shape_out{a.tensor_shape()};
shape_out.set(axis, num_tensors);
unsigned int i_shift = 0;
- for(unsigned int i = 0; i < a.num_dimensions(); ++i)
+ for (unsigned int i = 0; i < a.num_dimensions(); ++i)
{
- if(i == axis)
+ if (i == axis)
{
i_shift++;
}
@@ -1376,18 +1562,177 @@ inline TensorShape compute_stack_shape(const ITensorInfo &a, unsigned int axis,
return shape_out;
}
-inline TensorShape compute_gather_shape(const TensorShape &input_shape, const TensorShape &indices_shape, uint32_t actual_axis)
+/** Calculate the output shape of 3d Convolution
+ *
+ * @param[in] src Input tensor shape
+ * @param[in] weights Weights tensor shape
+ * @param[in] conv3d_info 3d Convolution Parameters object
+ *
+ * @return the calculated shape
+ */
+inline TensorShape
+compute_conv3d_shape(const TensorShape &src, const TensorShape &weights, const Conv3dInfo &conv3d_info)
+{
+ // Weight tensor shape indices (D H W Cin Cout)
+ constexpr unsigned int weights_depth_dim = 4u;
+ constexpr unsigned int weights_height_dim = 3u;
+ constexpr unsigned int weights_width_dim = 2u;
+ constexpr unsigned int weights_CHout_dim = 0u;
+
+ // Source/Destination Tensor shape indices (N D H W C)
+ constexpr unsigned int batch_dim = 4u;
+ constexpr unsigned int depth_dim = 3u;
+ constexpr unsigned int height_dim = 2u;
+ constexpr unsigned int width_dim = 1u;
+ constexpr unsigned int channel_dim = 0u;
+
+ TensorShape output_shape{src};
+ const size_t pad_left = conv3d_info.padding.left;
+ const size_t pad_right = conv3d_info.padding.right;
+ const size_t pad_top = conv3d_info.padding.top;
+ const size_t pad_bottom = conv3d_info.padding.bottom;
+ const size_t pad_front = conv3d_info.padding.front;
+ const size_t pad_back = conv3d_info.padding.back;
+ const size_t dilation_x = conv3d_info.dilation.width;
+ const size_t dilation_y = conv3d_info.dilation.height;
+ const size_t dilation_z = conv3d_info.dilation.depth;
+ const size_t stride_x = conv3d_info.stride.x();
+ const size_t stride_y = conv3d_info.stride.y();
+ const size_t stride_z = conv3d_info.stride.z();
+
+ int output_width_size = 0;
+ int output_height_size = 0;
+ int output_depth_size = 0;
+
+ switch (conv3d_info.round_type)
+ {
+ case DimensionRoundingType::FLOOR:
+ output_width_size =
+ static_cast<int>(std::floor((static_cast<float>(src[width_dim] + pad_left + pad_right -
+ (dilation_x * (weights[weights_width_dim] - 1) + 1)) /
+ stride_x) +
+ 1));
+ output_height_size =
+ static_cast<int>(std::floor((static_cast<float>(src[height_dim] + pad_top + pad_bottom -
+ (dilation_y * (weights[weights_height_dim] - 1) + 1)) /
+ stride_y) +
+ 1));
+ output_depth_size =
+ static_cast<int>(std::floor((static_cast<float>(src[depth_dim] + pad_front + pad_back -
+ (dilation_z * (weights[weights_depth_dim] - 1) + 1)) /
+ stride_z) +
+ 1));
+ break;
+ case DimensionRoundingType::CEIL:
+ output_width_size =
+ static_cast<int>(std::ceil((static_cast<float>(src[width_dim] + pad_left + pad_right -
+ (dilation_x * (weights[weights_width_dim] - 1) + 1)) /
+ stride_x) +
+ 1));
+ output_height_size =
+ static_cast<int>(std::ceil((static_cast<float>(src[height_dim] + pad_top + pad_bottom -
+ (dilation_y * (weights[weights_height_dim] - 1) + 1)) /
+ stride_y) +
+ 1));
+ output_depth_size =
+ static_cast<int>(std::ceil((static_cast<float>(src[depth_dim] + pad_front + pad_back -
+ (dilation_z * (weights[weights_depth_dim] - 1) + 1)) /
+ stride_z) +
+ 1));
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Unsupported rounding type");
+ }
+
+ output_shape.set(batch_dim, src[batch_dim]);
+ output_shape.set(width_dim, output_width_size);
+ output_shape.set(height_dim, output_height_size);
+ output_shape.set(depth_dim, output_depth_size);
+ output_shape.set(channel_dim, weights[weights_CHout_dim]);
+ return output_shape;
+}
+
+/** Calculate the output pool3d shape of a tensor
+ *
+ * @param[in] src Input tensor info
+ * @param[in] pool3d_info Pooling layer info
+ *
+ * @return the calculated shape
+ */
+inline TensorShape compute_pool3d_shape(const TensorShape &src, Pooling3dLayerInfo pool3d_info)
+{
+ TensorShape output_shape{src};
+
+ const auto data_layout = DataLayout::NDHWC;
+ const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+ const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+ const int idx_depth = get_data_layout_dimension_index(data_layout, DataLayoutDimension::DEPTH);
+ const int pool_size_width = pool3d_info.is_global_pooling ? src[idx_width] : pool3d_info.pool_size.width;
+ const int pool_size_height = pool3d_info.is_global_pooling ? src[idx_height] : pool3d_info.pool_size.height;
+ const int pool_size_depth = pool3d_info.is_global_pooling ? src[idx_depth] : pool3d_info.pool_size.depth;
+ int output_width = 0;
+ int output_height = 0;
+ int output_depth = 0;
+
+ std::tie(output_width, output_height, output_depth) =
+ scaled_3d_dimensions_signed(src[idx_width], src[idx_height], src[idx_depth], pool_size_width, pool_size_height,
+ pool_size_depth, pool3d_info);
+
+ ARM_COMPUTE_ERROR_ON_MSG((output_width < 1 || output_height < 1 || output_depth < 1),
+ "Calculated output dimension size is invalid");
+
+ output_shape.set(idx_width, static_cast<size_t>(output_width));
+ output_shape.set(idx_height, static_cast<size_t>(output_height));
+ output_shape.set(idx_depth, static_cast<size_t>(output_depth));
+
+ return output_shape;
+}
+
+/** Calculate the gather output shape of a tensor
+ *
+ * @param[in] input_shape Input tensor shape
+ * @param[in] indices_shape Indices tensor shape. Only supports for 2d and 3d indices
+ * @param[in] actual_axis Axis to be used in the computation
+ *
+ * @note Let input_shape be (X,Y,Z) and indices shape (W,O,P) and axis 1
+ * the new shape is computed by replacing the axis in the input shape with
+ * the indice shape so the output shape will be (X,W,O,P,Z)
+ *
+ * @return the calculated shape
+ */
+inline TensorShape
+compute_gather_shape(const TensorShape &input_shape, const TensorShape &indices_shape, uint32_t actual_axis)
{
- ARM_COMPUTE_ERROR_ON(indices_shape.num_dimensions() > 1);
- ARM_COMPUTE_ERROR_ON(input_shape.num_dimensions() > 4);
- ARM_COMPUTE_ERROR_ON(actual_axis >= input_shape.num_dimensions());
+ const auto input_num_dims = input_shape.num_dimensions();
+ const auto indices_num_dims = indices_shape.num_dimensions();
+
+ ARM_COMPUTE_ERROR_ON(actual_axis >= input_num_dims);
+ ARM_COMPUTE_ERROR_ON(input_num_dims + indices_num_dims - 1 > Coordinates::num_max_dimensions);
+
+ TensorShape output_shape;
+ size_t dim_no = 0;
+
+ for (; dim_no < actual_axis; ++dim_no)
+ {
+ output_shape.set(dim_no, input_shape[dim_no]);
+ }
+
+ for (; dim_no < actual_axis + indices_num_dims; ++dim_no)
+ {
+ output_shape.set(dim_no, indices_shape[dim_no - actual_axis]);
+ }
+
+ for (; dim_no < input_num_dims + indices_num_dims - 1; ++dim_no)
+ {
+ output_shape.set(dim_no, input_shape[dim_no + 1 - indices_num_dims]);
+ }
- TensorShape output_shape = input_shape;
- output_shape[actual_axis] = indices_shape[0];
+ ARM_COMPUTE_ERROR_ON(input_shape.total_size() * indices_shape.total_size() !=
+ output_shape.total_size() * input_shape[actual_axis]);
return output_shape;
}
} // namespace shape_calculator
} // namespace misc
} // namespace arm_compute
-#endif /* ARM_COMPUTE_MISC_SHAPE_CALCULATOR_H */
+#endif // ACL_ARM_COMPUTE_CORE_UTILS_MISC_SHAPECALCULATOR_H
diff --git a/arm_compute/core/utils/misc/Traits.h b/arm_compute/core/utils/misc/Traits.h
index 1cbdbfe16f..944fcb95f9 100644
--- a/arm_compute/core/utils/misc/Traits.h
+++ b/arm_compute/core/utils/misc/Traits.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,6 +24,8 @@
#ifndef ARM_COMPUTE_UTILS_TRAITS_TRAITS_H
#define ARM_COMPUTE_UTILS_TRAITS_TRAITS_H
+#include "arm_compute/core/Types.h"
+
#include <type_traits>
namespace arm_compute
diff --git a/arm_compute/core/utils/misc/Utility.h b/arm_compute/core/utils/misc/Utility.h
index b2bb63f5c8..22f10d74cc 100644
--- a/arm_compute/core/utils/misc/Utility.h
+++ b/arm_compute/core/utils/misc/Utility.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,8 +24,11 @@
#ifndef ARM_COMPUTE_MISC_UTILITY_H
#define ARM_COMPUTE_MISC_UTILITY_H
+#include "arm_compute/core/Error.h"
+
#include <algorithm>
#include <array>
+#include <cstdint>
#include <limits>
#include <numeric>
#include <vector>
@@ -41,7 +44,7 @@ struct index_sequence
};
template <std::size_t N, std::size_t... S>
-struct index_sequence_generator : index_sequence_generator < N - 1, N - 1, S... >
+struct index_sequence_generator : index_sequence_generator<N - 1, N - 1, S...>
{
};
@@ -55,17 +58,17 @@ template <std::size_t N>
using index_sequence_t = typename index_sequence_generator<N>::type;
template <typename T, std::size_t N, T val, T... vals>
-struct generate_array : generate_array < T, N - 1, val, val, vals... >
+struct generate_array : generate_array<T, N - 1, val, val, vals...>
{
};
template <typename T, T val, T... vals>
struct generate_array<T, 0, val, vals...>
{
- static constexpr std::array<T, sizeof...(vals)> value{ vals... };
+ static constexpr std::array<T, sizeof...(vals)> value{vals...};
};
-template <typename T, T val, T... vals>
+template <typename T, T val, T... vals>
constexpr std::array<T, sizeof...(vals)> generate_array<T, 0, val, vals...>::value;
/** @endcond */
@@ -76,7 +79,7 @@ template <std::size_t... S,
typename T = std::array<typename std::iterator_traits<Iterator>::value_type, sizeof...(S)>>
T make_array(Iterator first, index_sequence<S...>)
{
- return T{ { first[S]... } };
+ return T{{first[S]...}};
}
} // namespace detail
@@ -84,7 +87,7 @@ template <std::size_t N, typename Iterator>
std::array<typename std::iterator_traits<Iterator>::value_type, N> make_array(Iterator first, Iterator last)
{
ARM_COMPUTE_UNUSED(last);
- return detail::make_array(first, index_sequence_t<N> {});
+ return detail::make_array(first, index_sequence_t<N>{});
}
/** Performs clamping among a lower and upper value.
@@ -116,7 +119,7 @@ inline void for_each(F &&)
* @param[in] args Remaining arguments
*/
template <typename F, typename T, typename... Ts>
-inline void for_each(F &&func, T &&arg, Ts &&... args)
+inline void for_each(F &&func, T &&arg, Ts &&...args)
{
func(std::forward<T>(arg));
for_each(std::forward<F>(func), std::forward<Ts>(args)...);
@@ -140,9 +143,11 @@ inline T &&foldl(F &&, T &&value)
* @param[in] values Remaining arguments
*/
template <typename F, typename T, typename U, typename... Us>
-inline auto foldl(F &&func, T &&initial, U &&value, Us &&... values) -> decltype(func(std::forward<T>(initial), std::forward<U>(value)))
+inline auto foldl(F &&func, T &&initial, U &&value, Us &&...values)
+ -> decltype(func(std::forward<T>(initial), std::forward<U>(value)))
{
- return foldl(std::forward<F>(func), func(std::forward<T>(initial), std::forward<U>(value)), std::forward<Us>(values)...);
+ return foldl(std::forward<F>(func), func(std::forward<T>(initial), std::forward<U>(value)),
+ std::forward<Us>(values)...);
}
/** Perform an index sort of a given vector.
@@ -157,11 +162,7 @@ std::vector<size_t> sort_indices(const std::vector<T> &v)
std::vector<size_t> idx(v.size());
std::iota(idx.begin(), idx.end(), 0);
- std::sort(idx.begin(), idx.end(),
- [&v](size_t i1, size_t i2)
- {
- return v[i1] < v[i2];
- });
+ std::sort(idx.begin(), idx.end(), [&v](size_t i1, size_t i2) { return v[i1] < v[i2]; });
return idx;
}
@@ -175,7 +176,7 @@ std::vector<size_t> sort_indices(const std::vector<T> &v)
*/
inline bool endswith(const std::string &str, const std::string &suffix)
{
- if(str.size() < suffix.size())
+ if (str.size() < suffix.size())
{
return false;
}
@@ -202,12 +203,28 @@ inline bool check_aligned(void *ptr, const size_t alignment)
*/
inline std::string tolower(std::string string)
{
- std::transform(string.begin(), string.end(), string.begin(), [](unsigned char c)
- {
- return std::tolower(c);
- });
+ std::transform(string.begin(), string.end(), string.begin(), [](unsigned char c) { return std::tolower(c); });
return string;
}
+
+/** Get environment variable as a string
+ *
+ * @note Return empty string on bare-metal
+ *
+ * @param[in] env_name Name of the Environment variable to retrieve
+ *
+ * @return Environment variable content, or empty string if the variable is undefined or on bare-metal
+ */
+inline std::string getenv(const std::string &env_name)
+{
+#ifdef BARE_METAL
+ ARM_COMPUTE_UNUSED(env_name);
+ return std::string{};
+#else // BARE_METAL
+ const auto env_chr = std::getenv(env_name.c_str());
+ return env_chr == nullptr ? std::string{} : std::string{env_chr};
+#endif // BARE_METAL
+}
} // namespace utility
} // namespace arm_compute
#endif /* ARM_COMPUTE_MISC_UTILITY_H */
diff --git a/arm_compute/core/utils/quantization/AsymmHelpers.h b/arm_compute/core/utils/quantization/AsymmHelpers.h
index 4ef49476b2..2324fe1838 100644
--- a/arm_compute/core/utils/quantization/AsymmHelpers.h
+++ b/arm_compute/core/utils/quantization/AsymmHelpers.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 ARM Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -41,7 +41,10 @@ namespace quantization
*
* @return a status
*/
-Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplier, int32_t *shift, bool ignore_epsilon = false);
+Status calculate_quantized_multiplier(float multiplier,
+ int32_t *quant_multiplier,
+ int32_t *shift,
+ bool ignore_epsilon = false);
/** Calculate quantized representation of multiplier with value less than one.
*
* @param[in] multiplier Real multiplier.
@@ -51,7 +54,10 @@ Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplie
*
* @return a status
*/
-Status calculate_quantized_multiplier_less_than_one(float multiplier, int32_t *quant_multiplier, int32_t *right_shift, bool ignore_epsilon = false);
+Status calculate_quantized_multiplier_less_than_one(float multiplier,
+ int32_t *quant_multiplier,
+ int32_t *right_shift,
+ bool ignore_epsilon = false);
/** Calculate quantized representation of multiplier having value greater than one.
*
* @param[in] multiplier Real multiplier.
@@ -60,7 +66,8 @@ Status calculate_quantized_multiplier_less_than_one(float multiplier, int32_t *q
*
* @return a status
*/
-Status calculate_quantized_multiplier_greater_than_one(float multiplier, int32_t *quantized_multiplier, int32_t *left_shift);
+Status
+calculate_quantized_multiplier_greater_than_one(float multiplier, int32_t *quantized_multiplier, int32_t *left_shift);
/** Calculate quantized representation of per-channel multipliers
*
@@ -71,9 +78,9 @@ Status calculate_quantized_multiplier_greater_than_one(float multiplier, int32_t
*
* @return a status
*/
-Status calculate_quantized_multipliers(const QuantizationInfo &iq_info,
- const QuantizationInfo &wq_info,
- const QuantizationInfo &oq_info,
+Status calculate_quantized_multipliers(const QuantizationInfo &iq_info,
+ const QuantizationInfo &wq_info,
+ const QuantizationInfo &oq_info,
GEMMLowpOutputStageInfo &stage_info);
/** Get minimum and maximum values for the input quantized data type
@@ -81,6 +88,7 @@ Status calculate_quantized_multipliers(const QuantizationInfo &iq_info,
* @return min and max values for the quantized data type
*/
std::pair<int, int> get_min_max_values_from_quantized_data_type(DataType data_type);
+
/** Compute quantized per-channel multipliers and shifts. As many multipliers
* and shifts as output channels are computed. If weights are not quantized
* per-channel, multipliers and shifts will end up being the same for each
@@ -89,16 +97,12 @@ std::pair<int, int> get_min_max_values_from_quantized_data_type(DataType data_ty
* @param[in] input Input tensor info.
* @param[in] weights Weights tensor info.
* @param[in] output Output tensor info.
- * @param[in] idx_ofms Dimension index to get OFMs from the weights tensor.
* @param[out] output_multipliers_ptr Pointer to the buffer where to store per-channel multipliers.
* @param[out] output_shifts_ptr Pointer to the buffer where to store per-channel shifts.
- *
- * @return min and max values for the quantized data type
*/
void compute_quantized_multipliers_and_shifts(const ITensorInfo *input,
const ITensorInfo *weights,
const ITensorInfo *output,
- unsigned int idx_ofms,
int32_t *output_multipliers_ptr,
int32_t *output_shifts_ptr);
@@ -150,7 +154,10 @@ int32_t saturating_rounding_multiply_by_pow2(int32_t exponent, int32_t v);
* @param[out] output_shift Shift for inverse square root
*
*/
-void get_invsqrt_quantized_multiplier_exp(int32_t input, int32_t reverse_shift, int32_t &output_inv_sqrt, int32_t &output_shift);
+void get_invsqrt_quantized_multiplier_exp(int32_t input,
+ int32_t reverse_shift,
+ int32_t &output_inv_sqrt,
+ int32_t &output_shift);
} // namespace quantization
} // namespace arm_compute