aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSang-Hoon Park <sang-hoon.park@arm.com>2020-10-21 15:58:54 +0100
committerSang-Hoon Park <sang-hoon.park@arm.com>2020-11-07 08:07:22 +0000
commitbef7fa27b0d231a8649952f60808132d109b6345 (patch)
tree7543c66a473d90e28b4860986fad77afa5115043
parentb9531540dadce8331a703c32456f3c9defdfefa9 (diff)
downloadComputeLibrary-bef7fa27b0d231a8649952f60808132d109b6345.tar.gz
COMPMID-3639: (3RDPARTY_UPDATE) Move CL kernels to src
Change-Id: I10d27db788e5086adae1841e3e2441cd9b76ef84 Signed-off-by: Sang-Hoon Park <sang-hoon.park@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4310 Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
m---------3rdparty0
-rw-r--r--arm_compute/core/CL/CLKernels.h163
-rw-r--r--arm_compute/core/CL/CLTypes.h35
-rw-r--r--arm_compute/runtime/CL/ICLOperator.h4
-rw-r--r--arm_compute/runtime/CL/ICLSimpleFunction.h14
-rw-r--r--arm_compute/runtime/CL/functions/CLAbsoluteDifference.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLAccumulate.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLActivationLayer.h2
-rw-r--r--arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h26
-rw-r--r--arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h20
-rw-r--r--arm_compute/runtime/CL/functions/CLBatchToSpaceLayer.h20
-rw-r--r--arm_compute/runtime/CL/functions/CLBitwiseAnd.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLBitwiseNot.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLBitwiseOr.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLBitwiseXor.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h6
-rw-r--r--arm_compute/runtime/CL/functions/CLBox3x3.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLCannyEdge.h37
-rw-r--r--arm_compute/runtime/CL/functions/CLCast.h2
-rw-r--r--arm_compute/runtime/CL/functions/CLChannelCombine.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLChannelExtract.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLChannelShuffleLayer.h3
-rw-r--r--arm_compute/runtime/CL/functions/CLColorConvert.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLComparison.h2
-rw-r--r--arm_compute/runtime/CL/functions/CLComputeAllAnchors.h5
-rw-r--r--arm_compute/runtime/CL/functions/CLConcatenateLayer.h3
-rw-r--r--arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h5
-rw-r--r--arm_compute/runtime/CL/functions/CLConvolution.h33
-rw-r--r--arm_compute/runtime/CL/functions/CLConvolutionLayer.h10
-rw-r--r--arm_compute/runtime/CL/functions/CLCopy.h2
-rw-r--r--arm_compute/runtime/CL/functions/CLCropResize.h10
-rw-r--r--arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h19
-rw-r--r--arm_compute/runtime/CL/functions/CLDepthConvertLayer.h2
-rw-r--r--arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h2
-rw-r--r--arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h59
-rw-r--r--arm_compute/runtime/CL/functions/CLDequantizationLayer.h2
-rw-r--r--arm_compute/runtime/CL/functions/CLDerivative.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLDilate.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h18
-rw-r--r--arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h2
-rw-r--r--arm_compute/runtime/CL/functions/CLElementwiseOperations.h2
-rw-r--r--arm_compute/runtime/CL/functions/CLEqualizeHistogram.h29
-rw-r--r--arm_compute/runtime/CL/functions/CLErode.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLFFT1D.h28
-rw-r--r--arm_compute/runtime/CL/functions/CLFFT2D.h6
-rw-r--r--arm_compute/runtime/CL/functions/CLFastCorners.h29
-rw-r--r--arm_compute/runtime/CL/functions/CLFill.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLFillBorder.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLFlattenLayer.h2
-rw-r--r--arm_compute/runtime/CL/functions/CLFloor.h2
-rw-r--r--arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLFuseBatchNormalization.h10
-rw-r--r--arm_compute/runtime/CL/functions/CLGEMM.h102
-rw-r--r--arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h25
-rw-r--r--arm_compute/runtime/CL/functions/CLGEMMDeconvolutionLayer.h22
-rw-r--r--arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h35
-rw-r--r--arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h7
-rw-r--r--arm_compute/runtime/CL/functions/CLGather.h3
-rw-r--r--arm_compute/runtime/CL/functions/CLGaussian3x3.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLGaussian5x5.h26
-rw-r--r--arm_compute/runtime/CL/functions/CLGaussianPyramid.h28
-rw-r--r--arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h39
-rw-r--r--arm_compute/runtime/CL/functions/CLHOGDescriptor.h23
-rw-r--r--arm_compute/runtime/CL/functions/CLHOGDetector.h19
-rw-r--r--arm_compute/runtime/CL/functions/CLHOGGradient.h16
-rw-r--r--arm_compute/runtime/CL/functions/CLHOGMultiDetection.h36
-rw-r--r--arm_compute/runtime/CL/functions/CLHarrisCorners.h40
-rw-r--r--arm_compute/runtime/CL/functions/CLHistogram.h2
-rw-r--r--arm_compute/runtime/CL/functions/CLInstanceNormalizationLayer.h3
-rw-r--r--arm_compute/runtime/CL/functions/CLIntegralImage.h16
-rw-r--r--arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h22
-rw-r--r--arm_compute/runtime/CL/functions/CLLSTMLayer.h184
-rw-r--r--arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h30
-rw-r--r--arm_compute/runtime/CL/functions/CLMagnitude.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLMaxUnpoolingLayer.h21
-rw-r--r--arm_compute/runtime/CL/functions/CLMeanStdDev.h39
-rw-r--r--arm_compute/runtime/CL/functions/CLMeanStdDevNormalizationLayer.h2
-rw-r--r--arm_compute/runtime/CL/functions/CLMedian3x3.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLMinMaxLocation.h28
-rw-r--r--arm_compute/runtime/CL/functions/CLNonLinearFilter.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLNormalizationLayer.h27
-rw-r--r--arm_compute/runtime/CL/functions/CLNormalizePlanarYUVLayer.h2
-rw-r--r--arm_compute/runtime/CL/functions/CLOpticalFlow.h41
-rw-r--r--arm_compute/runtime/CL/functions/CLPReluLayer.h3
-rw-r--r--arm_compute/runtime/CL/functions/CLPadLayer.h14
-rw-r--r--arm_compute/runtime/CL/functions/CLPermute.h2
-rw-r--r--arm_compute/runtime/CL/functions/CLPhase.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h8
-rw-r--r--arm_compute/runtime/CL/functions/CLPoolingLayer.h2
-rw-r--r--arm_compute/runtime/CL/functions/CLPriorBoxLayer.h5
-rw-r--r--arm_compute/runtime/CL/functions/CLQLSTMLayer.h167
-rw-r--r--arm_compute/runtime/CL/functions/CLQuantizationLayer.h3
-rw-r--r--arm_compute/runtime/CL/functions/CLRNNLayer.h30
-rw-r--r--arm_compute/runtime/CL/functions/CLROIAlignLayer.h4
-rw-r--r--arm_compute/runtime/CL/functions/CLROIPoolingLayer.h6
-rw-r--r--arm_compute/runtime/CL/functions/CLRange.h2
-rw-r--r--arm_compute/runtime/CL/functions/CLReductionOperation.h33
-rw-r--r--arm_compute/runtime/CL/functions/CLRemap.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLReorgLayer.h2
-rw-r--r--arm_compute/runtime/CL/functions/CLReshapeLayer.h2
-rw-r--r--arm_compute/runtime/CL/functions/CLReverse.h3
-rw-r--r--arm_compute/runtime/CL/functions/CLScale.h2
-rw-r--r--arm_compute/runtime/CL/functions/CLScharr3x3.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLSelect.h5
-rw-r--r--arm_compute/runtime/CL/functions/CLSlice.h2
-rw-r--r--arm_compute/runtime/CL/functions/CLSobel3x3.h9
-rw-r--r--arm_compute/runtime/CL/functions/CLSobel5x5.h24
-rw-r--r--arm_compute/runtime/CL/functions/CLSobel7x7.h24
-rw-r--r--arm_compute/runtime/CL/functions/CLSoftmaxLayer.h37
-rw-r--r--arm_compute/runtime/CL/functions/CLSpaceToBatchLayer.h19
-rw-r--r--arm_compute/runtime/CL/functions/CLSpaceToDepthLayer.h19
-rw-r--r--arm_compute/runtime/CL/functions/CLStackLayer.h21
-rw-r--r--arm_compute/runtime/CL/functions/CLTableLookup.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLThreshold.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLTile.h5
-rw-r--r--arm_compute/runtime/CL/functions/CLTranspose.h3
-rw-r--r--arm_compute/runtime/CL/functions/CLUpsampleLayer.h12
-rw-r--r--arm_compute/runtime/CL/functions/CLWarpAffine.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLWarpPerspective.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h28
-rw-r--r--arm_compute/runtime/CL/functions/CLWinogradInputTransform.h2
-rw-r--r--arm_compute/runtime/CL/functions/CLYOLOLayer.h5
-rw-r--r--arm_compute/runtime/IOperator.h3
-rw-r--r--docs/04_adding_operator.dox10
-rw-r--r--docs/ComputeLibrary.dir10
-rw-r--r--examples/cl_cache.cpp6
-rw-r--r--examples/cl_convolution.cpp5
-rw-r--r--examples/cl_events.cpp4
-rw-r--r--examples/cl_sgemm.cpp4
-rw-r--r--examples/gemm_tuner/cl_gemm_native.cpp3
-rw-r--r--examples/gemm_tuner/cl_gemm_reshaped.cpp5
-rw-r--r--examples/gemm_tuner/cl_gemm_reshaped_rhs_only.cpp3
-rw-r--r--examples/neoncl_scale_median_gaussian.cpp3
-rw-r--r--src/core/CL/CLKernels.h163
-rw-r--r--src/core/CL/CLTracePoint.cpp2
-rw-r--r--src/core/CL/CLValidate.h1
-rw-r--r--src/core/CL/ICLKernel.cpp2
-rw-r--r--src/core/CL/ICLKernel.h (renamed from arm_compute/core/CL/ICLKernel.h)0
-rw-r--r--src/core/CL/ICLSimple2DKernel.cpp2
-rw-r--r--src/core/CL/ICLSimple2DKernel.h (renamed from arm_compute/core/CL/ICLSimple2DKernel.h)4
-rw-r--r--src/core/CL/ICLSimple3DKernel.cpp4
-rw-r--r--src/core/CL/ICLSimple3DKernel.h (renamed from arm_compute/core/CL/ICLSimple3DKernel.h)4
-rw-r--r--src/core/CL/ICLSimpleKernel.cpp3
-rw-r--r--src/core/CL/ICLSimpleKernel.h (renamed from arm_compute/core/CL/ICLSimpleKernel.h)4
-rw-r--r--src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLAbsoluteDifferenceKernel.h (renamed from arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h)2
-rw-r--r--src/core/CL/kernels/CLAccumulateKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLAccumulateKernel.h (renamed from arm_compute/core/CL/kernels/CLAccumulateKernel.h)2
-rw-r--r--src/core/CL/kernels/CLActivationLayerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLActivationLayerKernel.h (renamed from arm_compute/core/CL/kernels/CLActivationLayerKernel.h)2
-rw-r--r--src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLArgMinMaxLayerKernel.h (renamed from arm_compute/core/CL/kernels/CLArgMinMaxLayerKernel.h)2
-rw-r--r--src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLBatchConcatenateLayerKernel.h (renamed from arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h)2
-rw-r--r--src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLBatchNormalizationLayerKernel.h (renamed from arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h)2
-rw-r--r--src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLBatchToSpaceLayerKernel.h (renamed from arm_compute/core/CL/kernels/CLBatchToSpaceLayerKernel.h)2
-rw-r--r--src/core/CL/kernels/CLBitwiseAndKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLBitwiseAndKernel.h (renamed from arm_compute/core/CL/kernels/CLBitwiseAndKernel.h)2
-rw-r--r--src/core/CL/kernels/CLBitwiseNotKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLBitwiseNotKernel.h (renamed from arm_compute/core/CL/kernels/CLBitwiseNotKernel.h)2
-rw-r--r--src/core/CL/kernels/CLBitwiseOrKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLBitwiseOrKernel.h (renamed from arm_compute/core/CL/kernels/CLBitwiseOrKernel.h)2
-rw-r--r--src/core/CL/kernels/CLBitwiseXorKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLBitwiseXorKernel.h (renamed from arm_compute/core/CL/kernels/CLBitwiseXorKernel.h)2
-rw-r--r--src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLBoundingBoxTransformKernel.h (renamed from arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h)2
-rw-r--r--src/core/CL/kernels/CLBox3x3Kernel.cpp2
-rw-r--r--src/core/CL/kernels/CLBox3x3Kernel.h (renamed from arm_compute/core/CL/kernels/CLBox3x3Kernel.h)2
-rw-r--r--src/core/CL/kernels/CLCannyEdgeKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLCannyEdgeKernel.h (renamed from arm_compute/core/CL/kernels/CLCannyEdgeKernel.h)2
-rw-r--r--src/core/CL/kernels/CLChannelCombineKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLChannelCombineKernel.h (renamed from arm_compute/core/CL/kernels/CLChannelCombineKernel.h)2
-rw-r--r--src/core/CL/kernels/CLChannelExtractKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLChannelExtractKernel.h (renamed from arm_compute/core/CL/kernels/CLChannelExtractKernel.h)2
-rw-r--r--src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLChannelShuffleLayerKernel.h (renamed from arm_compute/core/CL/kernels/CLChannelShuffleLayerKernel.h)2
-rw-r--r--src/core/CL/kernels/CLCol2ImKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLCol2ImKernel.h (renamed from arm_compute/core/CL/kernels/CLCol2ImKernel.h)2
-rw-r--r--src/core/CL/kernels/CLColorConvertKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLColorConvertKernel.h (renamed from arm_compute/core/CL/kernels/CLColorConvertKernel.h)2
-rw-r--r--src/core/CL/kernels/CLComparisonKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLComparisonKernel.h (renamed from arm_compute/core/CL/kernels/CLComparisonKernel.h)2
-rw-r--r--src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h (renamed from arm_compute/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h)2
-rw-r--r--src/core/CL/kernels/CLConvolutionKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLConvolutionKernel.h (renamed from arm_compute/core/CL/kernels/CLConvolutionKernel.h)2
-rw-r--r--src/core/CL/kernels/CLCopyKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLCopyKernel.h (renamed from arm_compute/core/CL/kernels/CLCopyKernel.h)2
-rw-r--r--src/core/CL/kernels/CLCropKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLCropKernel.h (renamed from arm_compute/core/CL/kernels/CLCropKernel.h)2
-rw-r--r--src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h (renamed from arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h)2
-rw-r--r--src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h (renamed from arm_compute/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h)2
-rw-r--r--src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLDepthConcatenateLayerKernel.h (renamed from arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h)2
-rw-r--r--src/core/CL/kernels/CLDepthConvertLayerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLDepthConvertLayerKernel.h (renamed from arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h)2
-rw-r--r--src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLDepthToSpaceLayerKernel.h (renamed from arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h)2
-rw-r--r--src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h (renamed from arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h)2
-rw-r--r--src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h (renamed from arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h)2
-rw-r--r--src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h (renamed from arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h)2
-rw-r--r--src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h (renamed from arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h)2
-rw-r--r--src/core/CL/kernels/CLDequantizationLayerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLDequantizationLayerKernel.h (renamed from arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h)2
-rw-r--r--src/core/CL/kernels/CLDerivativeKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLDerivativeKernel.h (renamed from arm_compute/core/CL/kernels/CLDerivativeKernel.h)2
-rw-r--r--src/core/CL/kernels/CLDilateKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLDilateKernel.h (renamed from arm_compute/core/CL/kernels/CLDilateKernel.h)2
-rw-r--r--src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLDirectConvolutionLayerKernel.h (renamed from arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h)2
-rw-r--r--src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLElementWiseUnaryLayerKernel.h (renamed from arm_compute/core/CL/kernels/CLElementWiseUnaryLayerKernel.h)4
-rw-r--r--src/core/CL/kernels/CLElementwiseOperationKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLElementwiseOperationKernel.h (renamed from arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h)2
-rw-r--r--src/core/CL/kernels/CLErodeKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLErodeKernel.h (renamed from arm_compute/core/CL/kernels/CLErodeKernel.h)2
-rw-r--r--src/core/CL/kernels/CLFFTDigitReverseKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLFFTDigitReverseKernel.h (renamed from arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h)2
-rw-r--r--src/core/CL/kernels/CLFFTRadixStageKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLFFTRadixStageKernel.h (renamed from arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h)2
-rw-r--r--src/core/CL/kernels/CLFFTScaleKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLFFTScaleKernel.h (renamed from arm_compute/core/CL/kernels/CLFFTScaleKernel.h)2
-rw-r--r--src/core/CL/kernels/CLFastCornersKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLFastCornersKernel.h (renamed from arm_compute/core/CL/kernels/CLFastCornersKernel.h)2
-rw-r--r--src/core/CL/kernels/CLFillBorderKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLFillBorderKernel.h (renamed from arm_compute/core/CL/kernels/CLFillBorderKernel.h)2
-rw-r--r--src/core/CL/kernels/CLFlattenLayerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLFlattenLayerKernel.h (renamed from arm_compute/core/CL/kernels/CLFlattenLayerKernel.h)2
-rw-r--r--src/core/CL/kernels/CLFloorKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLFloorKernel.h (renamed from arm_compute/core/CL/kernels/CLFloorKernel.h)2
-rw-r--r--src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLFuseBatchNormalizationKernel.h (renamed from arm_compute/core/CL/kernels/CLFuseBatchNormalizationKernel.h)2
-rw-r--r--src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h (renamed from arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h)2
-rw-r--r--src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h (renamed from arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h)2
-rw-r--r--src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp16
-rw-r--r--src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h (renamed from arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h)2
-rw-r--r--src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h (renamed from arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h)2
-rw-r--r--src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h (renamed from arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h)5
-rw-r--r--src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.h (renamed from arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.h)2
-rw-r--r--src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h (renamed from arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h)2
-rw-r--r--src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h (renamed from arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h)2
-rw-r--r--src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLGEMMLowpReductionKernel.h (renamed from arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h)2
-rw-r--r--src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h (renamed from arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h)2
-rw-r--r--src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h (renamed from arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h)2
-rw-r--r--src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h (renamed from arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h)2
-rw-r--r--src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp6
-rw-r--r--src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h (renamed from arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h)2
-rw-r--r--src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h (renamed from arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h)2
-rw-r--r--src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h (renamed from arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h)2
-rw-r--r--src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h (renamed from arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h)4
-rw-r--r--src/core/CL/kernels/CLGatherKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLGatherKernel.h (renamed from arm_compute/core/CL/kernels/CLGatherKernel.h)2
-rw-r--r--src/core/CL/kernels/CLGaussian3x3Kernel.cpp2
-rw-r--r--src/core/CL/kernels/CLGaussian3x3Kernel.h (renamed from arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h)2
-rw-r--r--src/core/CL/kernels/CLGaussian5x5Kernel.cpp2
-rw-r--r--src/core/CL/kernels/CLGaussian5x5Kernel.h (renamed from arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h)2
-rw-r--r--src/core/CL/kernels/CLGaussianPyramidKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLGaussianPyramidKernel.h (renamed from arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h)2
-rw-r--r--src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLGenerateProposalsLayerKernel.h (renamed from arm_compute/core/CL/kernels/CLGenerateProposalsLayerKernel.h)2
-rw-r--r--src/core/CL/kernels/CLHOGDescriptorKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLHOGDescriptorKernel.h (renamed from arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h)2
-rw-r--r--src/core/CL/kernels/CLHOGDetectorKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLHOGDetectorKernel.h (renamed from arm_compute/core/CL/kernels/CLHOGDetectorKernel.h)2
-rw-r--r--src/core/CL/kernels/CLHarrisCornersKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLHarrisCornersKernel.h (renamed from arm_compute/core/CL/kernels/CLHarrisCornersKernel.h)2
-rw-r--r--src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLHeightConcatenateLayerKernel.h (renamed from arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h)2
-rw-r--r--src/core/CL/kernels/CLHistogramKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLHistogramKernel.h (renamed from arm_compute/core/CL/kernels/CLHistogramKernel.h)2
-rw-r--r--src/core/CL/kernels/CLIm2ColKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLIm2ColKernel.h (renamed from arm_compute/core/CL/kernels/CLIm2ColKernel.h)2
-rw-r--r--src/core/CL/kernels/CLInstanceNormalizationLayerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLInstanceNormalizationLayerKernel.h (renamed from arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernel.h)2
-rw-r--r--src/core/CL/kernels/CLIntegralImageKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLIntegralImageKernel.h (renamed from arm_compute/core/CL/kernels/CLIntegralImageKernel.h)4
-rw-r--r--src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLL2NormalizeLayerKernel.h (renamed from arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h)2
-rw-r--r--src/core/CL/kernels/CLLKTrackerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLLKTrackerKernel.h (renamed from arm_compute/core/CL/kernels/CLLKTrackerKernel.h)36
-rw-r--r--src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h (renamed from arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h)2
-rw-r--r--src/core/CL/kernels/CLMagnitudePhaseKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLMagnitudePhaseKernel.h (renamed from arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h)2
-rw-r--r--src/core/CL/kernels/CLMaxUnpoolingLayerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLMaxUnpoolingLayerKernel.h (renamed from arm_compute/core/CL/kernels/CLMaxUnpoolingLayerKernel.h)2
-rw-r--r--src/core/CL/kernels/CLMeanStdDevKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLMeanStdDevKernel.h (renamed from arm_compute/core/CL/kernels/CLMeanStdDevKernel.h)2
-rw-r--r--src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLMeanStdDevNormalizationKernel.h (renamed from arm_compute/core/CL/kernels/CLMeanStdDevNormalizationKernel.h)2
-rw-r--r--src/core/CL/kernels/CLMedian3x3Kernel.cpp2
-rw-r--r--src/core/CL/kernels/CLMedian3x3Kernel.h (renamed from arm_compute/core/CL/kernels/CLMedian3x3Kernel.h)2
-rw-r--r--src/core/CL/kernels/CLMemsetKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLMemsetKernel.h (renamed from arm_compute/core/CL/kernels/CLMemsetKernel.h)2
-rw-r--r--src/core/CL/kernels/CLMinMaxLayerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLMinMaxLayerKernel.h (renamed from arm_compute/core/CL/kernels/CLMinMaxLayerKernel.h)2
-rw-r--r--src/core/CL/kernels/CLMinMaxLocationKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLMinMaxLocationKernel.h (renamed from arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h)2
-rw-r--r--src/core/CL/kernels/CLNonLinearFilterKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLNonLinearFilterKernel.h (renamed from arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h)2
-rw-r--r--src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp2
-rw-r--r--src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h (renamed from arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h)2
-rw-r--r--src/core/CL/kernels/CLNormalizationLayerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLNormalizationLayerKernel.h (renamed from arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h)2
-rw-r--r--src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h (renamed from arm_compute/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h)2
-rw-r--r--src/core/CL/kernels/CLPadLayerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLPadLayerKernel.h (renamed from arm_compute/core/CL/kernels/CLPadLayerKernel.h)2
-rw-r--r--src/core/CL/kernels/CLPermuteKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLPermuteKernel.h (renamed from arm_compute/core/CL/kernels/CLPermuteKernel.h)2
-rw-r--r--src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLPixelWiseMultiplicationKernel.h (renamed from arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h)2
-rw-r--r--src/core/CL/kernels/CLPoolingLayerKernel.cpp6
-rw-r--r--src/core/CL/kernels/CLPoolingLayerKernel.h (renamed from arm_compute/core/CL/kernels/CLPoolingLayerKernel.h)2
-rw-r--r--src/core/CL/kernels/CLPriorBoxLayerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLPriorBoxLayerKernel.h (renamed from arm_compute/core/CL/kernels/CLPriorBoxLayerKernel.h)2
-rw-r--r--src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h (renamed from arm_compute/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h)2
-rw-r--r--src/core/CL/kernels/CLQuantizationLayerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLQuantizationLayerKernel.h (renamed from arm_compute/core/CL/kernels/CLQuantizationLayerKernel.h)2
-rw-r--r--src/core/CL/kernels/CLROIAlignLayerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLROIAlignLayerKernel.h (renamed from arm_compute/core/CL/kernels/CLROIAlignLayerKernel.h)2
-rw-r--r--src/core/CL/kernels/CLROIPoolingLayerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLROIPoolingLayerKernel.h (renamed from arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h)2
-rw-r--r--src/core/CL/kernels/CLRangeKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLRangeKernel.h (renamed from arm_compute/core/CL/kernels/CLRangeKernel.h)2
-rw-r--r--src/core/CL/kernels/CLReductionOperationKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLReductionOperationKernel.h (renamed from arm_compute/core/CL/kernels/CLReductionOperationKernel.h)2
-rw-r--r--src/core/CL/kernels/CLRemapKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLRemapKernel.h (renamed from arm_compute/core/CL/kernels/CLRemapKernel.h)2
-rw-r--r--src/core/CL/kernels/CLReorgLayerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLReorgLayerKernel.h (renamed from arm_compute/core/CL/kernels/CLReorgLayerKernel.h)2
-rw-r--r--src/core/CL/kernels/CLReshapeLayerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLReshapeLayerKernel.h (renamed from arm_compute/core/CL/kernels/CLReshapeLayerKernel.h)2
-rw-r--r--src/core/CL/kernels/CLReverseKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLReverseKernel.h (renamed from arm_compute/core/CL/kernels/CLReverseKernel.h)2
-rw-r--r--src/core/CL/kernels/CLScaleKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLScaleKernel.h (renamed from arm_compute/core/CL/kernels/CLScaleKernel.h)2
-rw-r--r--src/core/CL/kernels/CLScharr3x3Kernel.cpp2
-rw-r--r--src/core/CL/kernels/CLScharr3x3Kernel.h (renamed from arm_compute/core/CL/kernels/CLScharr3x3Kernel.h)2
-rw-r--r--src/core/CL/kernels/CLSelectKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLSelectKernel.h (renamed from arm_compute/core/CL/kernels/CLSelectKernel.h)2
-rw-r--r--src/core/CL/kernels/CLSobel3x3Kernel.cpp2
-rw-r--r--src/core/CL/kernels/CLSobel3x3Kernel.h (renamed from arm_compute/core/CL/kernels/CLSobel3x3Kernel.h)2
-rw-r--r--src/core/CL/kernels/CLSobel5x5Kernel.cpp2
-rw-r--r--src/core/CL/kernels/CLSobel5x5Kernel.h (renamed from arm_compute/core/CL/kernels/CLSobel5x5Kernel.h)2
-rw-r--r--src/core/CL/kernels/CLSobel7x7Kernel.cpp2
-rw-r--r--src/core/CL/kernels/CLSobel7x7Kernel.h (renamed from arm_compute/core/CL/kernels/CLSobel7x7Kernel.h)2
-rw-r--r--src/core/CL/kernels/CLSoftmaxLayerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLSoftmaxLayerKernel.h (renamed from arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h)2
-rw-r--r--src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLSpaceToBatchLayerKernel.h (renamed from arm_compute/core/CL/kernels/CLSpaceToBatchLayerKernel.h)5
-rw-r--r--src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLSpaceToDepthLayerKernel.h (renamed from arm_compute/core/CL/kernels/CLSpaceToDepthLayerKernel.h)2
-rw-r--r--src/core/CL/kernels/CLStackLayerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLStackLayerKernel.h (renamed from arm_compute/core/CL/kernels/CLStackLayerKernel.h)2
-rw-r--r--src/core/CL/kernels/CLStridedSliceKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLStridedSliceKernel.h (renamed from arm_compute/core/CL/kernels/CLStridedSliceKernel.h)2
-rw-r--r--src/core/CL/kernels/CLTableLookupKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLTableLookupKernel.h (renamed from arm_compute/core/CL/kernels/CLTableLookupKernel.h)2
-rw-r--r--src/core/CL/kernels/CLThresholdKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLThresholdKernel.h (renamed from arm_compute/core/CL/kernels/CLThresholdKernel.h)2
-rw-r--r--src/core/CL/kernels/CLTileKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLTileKernel.h (renamed from arm_compute/core/CL/kernels/CLTileKernel.h)2
-rw-r--r--src/core/CL/kernels/CLTransposeKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLTransposeKernel.h (renamed from arm_compute/core/CL/kernels/CLTransposeKernel.h)2
-rw-r--r--src/core/CL/kernels/CLUpsampleLayerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLUpsampleLayerKernel.h (renamed from arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h)2
-rw-r--r--src/core/CL/kernels/CLWarpAffineKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLWarpAffineKernel.h (renamed from arm_compute/core/CL/kernels/CLWarpAffineKernel.h)2
-rw-r--r--src/core/CL/kernels/CLWarpPerspectiveKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLWarpPerspectiveKernel.h (renamed from arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h)2
-rw-r--r--src/core/CL/kernels/CLWeightsReshapeKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLWeightsReshapeKernel.h (renamed from arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h)2
-rw-r--r--src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h (renamed from arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h)2
-rw-r--r--src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h (renamed from arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h)2
-rw-r--r--src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLWidthConcatenateLayerKernel.h (renamed from arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h)2
-rw-r--r--src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLWinogradFilterTransformKernel.h (renamed from arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h)2
-rw-r--r--src/core/CL/kernels/CLWinogradInputTransformKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLWinogradInputTransformKernel.h (renamed from arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h)2
-rw-r--r--src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLWinogradOutputTransformKernel.h (renamed from arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h)2
-rw-r--r--src/core/CL/kernels/CLYOLOLayerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLYOLOLayerKernel.h (renamed from arm_compute/core/CL/kernels/CLYOLOLayerKernel.h)2
-rw-r--r--src/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h (renamed from arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h)2
-rw-r--r--src/graph/backends/CL/CLFunctionsFactory.cpp1
-rw-r--r--src/graph/backends/CL/CLNodeValidator.cpp11
-rw-r--r--src/runtime/CL/CLOperator.cpp2
-rw-r--r--src/runtime/CL/CLScheduler.cpp2
-rw-r--r--src/runtime/CL/CLTuner.cpp2
-rw-r--r--src/runtime/CL/ICLSimpleFunction.cpp11
-rw-r--r--src/runtime/CL/functions/CLAbsoluteDifference.cpp2
-rw-r--r--src/runtime/CL/functions/CLAccumulate.cpp2
-rw-r--r--src/runtime/CL/functions/CLActivationLayer.cpp2
-rw-r--r--src/runtime/CL/functions/CLArgMinMaxLayer.cpp22
-rw-r--r--src/runtime/CL/functions/CLBatchNormalizationLayer.cpp14
-rw-r--r--src/runtime/CL/functions/CLBatchToSpaceLayer.cpp16
-rw-r--r--src/runtime/CL/functions/CLBitwiseAnd.cpp2
-rw-r--r--src/runtime/CL/functions/CLBitwiseNot.cpp2
-rw-r--r--src/runtime/CL/functions/CLBitwiseOr.cpp2
-rw-r--r--src/runtime/CL/functions/CLBitwiseXor.cpp2
-rw-r--r--src/runtime/CL/functions/CLBoundingBoxTransform.cpp2
-rw-r--r--src/runtime/CL/functions/CLBox3x3.cpp5
-rw-r--r--src/runtime/CL/functions/CLCannyEdge.cpp30
-rw-r--r--src/runtime/CL/functions/CLCast.cpp2
-rw-r--r--src/runtime/CL/functions/CLChannelCombine.cpp2
-rw-r--r--src/runtime/CL/functions/CLChannelExtract.cpp2
-rw-r--r--src/runtime/CL/functions/CLChannelShuffleLayer.cpp2
-rw-r--r--src/runtime/CL/functions/CLColorConvert.cpp2
-rw-r--r--src/runtime/CL/functions/CLComparison.cpp7
-rw-r--r--src/runtime/CL/functions/CLComputeAllAnchors.cpp1
-rw-r--r--src/runtime/CL/functions/CLConcatenateLayer.cpp12
-rw-r--r--src/runtime/CL/functions/CLConvertFullyConnectedWeights.cpp2
-rw-r--r--src/runtime/CL/functions/CLConvolution.cpp32
-rw-r--r--src/runtime/CL/functions/CLConvolutionLayer.cpp3
-rw-r--r--src/runtime/CL/functions/CLCopy.cpp2
-rw-r--r--src/runtime/CL/functions/CLCropResize.cpp6
-rw-r--r--src/runtime/CL/functions/CLDeconvolutionLayer.cpp1
-rw-r--r--src/runtime/CL/functions/CLDeconvolutionLayerUpsample.cpp17
-rw-r--r--src/runtime/CL/functions/CLDepthConvertLayer.cpp2
-rw-r--r--src/runtime/CL/functions/CLDepthToSpaceLayer.cpp2
-rw-r--r--src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp52
-rw-r--r--src/runtime/CL/functions/CLDequantizationLayer.cpp2
-rw-r--r--src/runtime/CL/functions/CLDerivative.cpp5
-rw-r--r--src/runtime/CL/functions/CLDilate.cpp5
-rw-r--r--src/runtime/CL/functions/CLDirectConvolutionLayer.cpp21
-rw-r--r--src/runtime/CL/functions/CLDirectDeconvolutionLayer.cpp6
-rw-r--r--src/runtime/CL/functions/CLElementWiseUnaryLayer.cpp2
-rw-r--r--src/runtime/CL/functions/CLElementwiseOperations.cpp2
-rw-r--r--src/runtime/CL/functions/CLEqualizeHistogram.cpp24
-rw-r--r--src/runtime/CL/functions/CLErode.cpp5
-rw-r--r--src/runtime/CL/functions/CLFFT1D.cpp30
-rw-r--r--src/runtime/CL/functions/CLFFT2D.cpp5
-rw-r--r--src/runtime/CL/functions/CLFFTConvolutionLayer.cpp7
-rw-r--r--src/runtime/CL/functions/CLFastCorners.cpp20
-rw-r--r--src/runtime/CL/functions/CLFill.cpp2
-rw-r--r--src/runtime/CL/functions/CLFillBorder.cpp2
-rw-r--r--src/runtime/CL/functions/CLFlattenLayer.cpp2
-rw-r--r--src/runtime/CL/functions/CLFloor.cpp2
-rw-r--r--src/runtime/CL/functions/CLFullyConnectedLayer.cpp13
-rw-r--r--src/runtime/CL/functions/CLFuseBatchNormalization.cpp10
-rw-r--r--src/runtime/CL/functions/CLGEMM.cpp142
-rw-r--r--src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp47
-rw-r--r--src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp25
-rw-r--r--src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp80
-rw-r--r--src/runtime/CL/functions/CLGEMMLowpOutputStage.cpp9
-rw-r--r--src/runtime/CL/functions/CLGather.cpp2
-rw-r--r--src/runtime/CL/functions/CLGaussian3x3.cpp5
-rw-r--r--src/runtime/CL/functions/CLGaussian5x5.cpp24
-rw-r--r--src/runtime/CL/functions/CLGaussianPyramid.cpp47
-rw-r--r--src/runtime/CL/functions/CLGenerateProposalsLayer.cpp57
-rw-r--r--src/runtime/CL/functions/CLHOGDescriptor.cpp22
-rw-r--r--src/runtime/CL/functions/CLHOGDetector.cpp11
-rw-r--r--src/runtime/CL/functions/CLHOGGradient.cpp15
-rw-r--r--src/runtime/CL/functions/CLHOGMultiDetection.cpp21
-rw-r--r--src/runtime/CL/functions/CLHarrisCorners.cpp26
-rw-r--r--src/runtime/CL/functions/CLInstanceNormalizationLayer.cpp4
-rw-r--r--src/runtime/CL/functions/CLIntegralImage.cpp16
-rw-r--r--src/runtime/CL/functions/CLL2NormalizeLayer.cpp16
-rw-r--r--src/runtime/CL/functions/CLLSTMLayer.cpp57
-rw-r--r--src/runtime/CL/functions/CLLSTMLayerQuantized.cpp8
-rw-r--r--src/runtime/CL/functions/CLLaplacianPyramid.cpp3
-rw-r--r--src/runtime/CL/functions/CLLaplacianReconstruct.cpp2
-rw-r--r--src/runtime/CL/functions/CLLocallyConnectedLayer.cpp35
-rw-r--r--src/runtime/CL/functions/CLMagnitude.cpp2
-rw-r--r--src/runtime/CL/functions/CLMaxUnpoolingLayer.cpp17
-rw-r--r--src/runtime/CL/functions/CLMeanStdDev.cpp18
-rw-r--r--src/runtime/CL/functions/CLMeanStdDevNormalizationLayer.cpp2
-rw-r--r--src/runtime/CL/functions/CLMedian3x3.cpp5
-rw-r--r--src/runtime/CL/functions/CLMinMaxLocation.cpp17
-rw-r--r--src/runtime/CL/functions/CLNonLinearFilter.cpp5
-rw-r--r--src/runtime/CL/functions/CLNonMaximaSuppression3x3.cpp7
-rw-r--r--src/runtime/CL/functions/CLNormalizationLayer.cpp17
-rw-r--r--src/runtime/CL/functions/CLNormalizePlanarYUVLayer.cpp2
-rw-r--r--src/runtime/CL/functions/CLOpticalFlow.cpp38
-rw-r--r--src/runtime/CL/functions/CLPReluLayer.cpp2
-rw-r--r--src/runtime/CL/functions/CLPadLayer.cpp17
-rw-r--r--src/runtime/CL/functions/CLPermute.cpp2
-rw-r--r--src/runtime/CL/functions/CLPhase.cpp2
-rw-r--r--src/runtime/CL/functions/CLPixelWiseMultiplication.cpp15
-rw-r--r--src/runtime/CL/functions/CLPoolingLayer.cpp5
-rw-r--r--src/runtime/CL/functions/CLPriorBoxLayer.cpp4
-rw-r--r--src/runtime/CL/functions/CLQLSTMLayer.cpp91
-rw-r--r--src/runtime/CL/functions/CLQuantizationLayer.cpp2
-rw-r--r--src/runtime/CL/functions/CLRNNLayer.cpp24
-rw-r--r--src/runtime/CL/functions/CLROIAlignLayer.cpp3
-rw-r--r--src/runtime/CL/functions/CLROIPoolingLayer.cpp4
-rw-r--r--src/runtime/CL/functions/CLRange.cpp2
-rw-r--r--src/runtime/CL/functions/CLReduceMean.cpp3
-rw-r--r--src/runtime/CL/functions/CLReductionOperation.cpp42
-rw-r--r--src/runtime/CL/functions/CLRemap.cpp5
-rw-r--r--src/runtime/CL/functions/CLReorgLayer.cpp2
-rw-r--r--src/runtime/CL/functions/CLReshapeLayer.cpp2
-rw-r--r--src/runtime/CL/functions/CLReverse.cpp2
-rw-r--r--src/runtime/CL/functions/CLScale.cpp5
-rw-r--r--src/runtime/CL/functions/CLScharr3x3.cpp5
-rw-r--r--src/runtime/CL/functions/CLSelect.cpp2
-rw-r--r--src/runtime/CL/functions/CLSlice.cpp2
-rw-r--r--src/runtime/CL/functions/CLSobel3x3.cpp7
-rw-r--r--src/runtime/CL/functions/CLSobel5x5.cpp33
-rw-r--r--src/runtime/CL/functions/CLSobel7x7.cpp33
-rw-r--r--src/runtime/CL/functions/CLSoftmaxLayer.cpp30
-rw-r--r--src/runtime/CL/functions/CLSpaceToBatchLayer.cpp21
-rw-r--r--src/runtime/CL/functions/CLSpaceToDepthLayer.cpp10
-rw-r--r--src/runtime/CL/functions/CLStackLayer.cpp11
-rw-r--r--src/runtime/CL/functions/CLStridedSlice.cpp2
-rw-r--r--src/runtime/CL/functions/CLTableLookup.cpp2
-rw-r--r--src/runtime/CL/functions/CLThreshold.cpp2
-rw-r--r--src/runtime/CL/functions/CLTile.cpp2
-rw-r--r--src/runtime/CL/functions/CLTranspose.cpp2
-rw-r--r--src/runtime/CL/functions/CLUpsampleLayer.cpp10
-rw-r--r--src/runtime/CL/functions/CLWarpAffine.cpp5
-rw-r--r--src/runtime/CL/functions/CLWarpPerspective.cpp5
-rw-r--r--src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp23
-rw-r--r--src/runtime/CL/functions/CLWinogradInputTransform.cpp5
-rw-r--r--src/runtime/CL/functions/CLYOLOLayer.cpp2
-rw-r--r--src/runtime/CL/tuners/BifrostTuner.cpp2
-rw-r--r--src/runtime/CL/tuners/MidgardTuner.cpp2
-rw-r--r--tests/CL/Helper.h10
-rw-r--r--tests/benchmark/CL/Scale.cpp3
-rw-r--r--tests/validate_examples/cl_gemm.cpp19
-rw-r--r--tests/validation/CL/ArgMinMax.cpp6
-rw-r--r--tests/validation/CL/BatchNormalizationLayer.cpp22
-rw-r--r--tests/validation/CL/BatchToSpaceLayer.cpp2
-rw-r--r--tests/validation/CL/BoundingBoxTransform.cpp2
-rw-r--r--tests/validation/CL/CannyEdge.cpp5
-rw-r--r--tests/validation/CL/ChannelCombine.cpp2
-rw-r--r--tests/validation/CL/ChannelExtract.cpp2
-rw-r--r--tests/validation/CL/Col2Im.cpp3
-rw-r--r--tests/validation/CL/ConvertFullyConnectedWeights.cpp2
-rw-r--r--tests/validation/CL/CropResize.cpp1
-rw-r--r--tests/validation/CL/DeconvolutionLayer.cpp1
-rw-r--r--tests/validation/CL/DepthToSpaceLayer.cpp2
-rw-r--r--tests/validation/CL/DepthwiseConvolutionLayerNative.cpp2
-rw-r--r--tests/validation/CL/Fill.cpp2
-rw-r--r--tests/validation/CL/FillBorder.cpp4
-rw-r--r--tests/validation/CL/Flatten.cpp2
-rw-r--r--tests/validation/CL/Floor.cpp2
-rw-r--r--tests/validation/CL/FuseBatchNormalization.cpp2
-rw-r--r--tests/validation/CL/GEMM.cpp2
-rw-r--r--tests/validation/CL/GEMMLowpMatrixMultiplyNative.cpp2
-rw-r--r--tests/validation/CL/GEMMLowpMatrixMultiplyReshaped.cpp6
-rw-r--r--tests/validation/CL/GEMMLowpMatrixMultiplyReshapedOnlyRHS.cpp4
-rw-r--r--tests/validation/CL/GEMMMatrixMultiply.cpp2
-rw-r--r--tests/validation/CL/GEMMMatrixMultiplyInterleavedTransposed.cpp6
-rw-r--r--tests/validation/CL/GEMMMatrixMultiplyNative.cpp2
-rw-r--r--tests/validation/CL/GEMMMatrixMultiplyReshaped.cpp6
-rw-r--r--tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp4
-rw-r--r--tests/validation/CL/GEMMReshapeLHSMatrix.cpp2
-rw-r--r--tests/validation/CL/GEMMReshapeRHSMatrix.cpp2
-rw-r--r--tests/validation/CL/Gather.cpp1
-rw-r--r--tests/validation/CL/GlobalPoolingLayer.cpp2
-rw-r--r--tests/validation/CL/HOGDescriptor.cpp2
-rw-r--r--tests/validation/CL/HOGDetector.cpp2
-rw-r--r--tests/validation/CL/HOGMultiDetection.cpp2
-rw-r--r--tests/validation/CL/Im2Col.cpp3
-rw-r--r--tests/validation/CL/InstanceNormalizationLayer.cpp2
-rw-r--r--tests/validation/CL/L2NormalizeLayer.cpp2
-rw-r--r--tests/validation/CL/LSTMLayerQuantized.cpp3
-rw-r--r--tests/validation/CL/LogSoftmaxLayer.cpp1
-rw-r--r--tests/validation/CL/MeanStdDevNormalizationLayer.cpp2
-rw-r--r--tests/validation/CL/NormalizationLayer.cpp2
-rw-r--r--tests/validation/CL/OpticalFlow.cpp2
-rw-r--r--tests/validation/CL/PriorBoxLayer.cpp2
-rw-r--r--tests/validation/CL/QLSTMLayerNormalization.cpp2
-rw-r--r--tests/validation/CL/RNNLayer.cpp2
-rw-r--r--tests/validation/CL/Range.cpp2
-rw-r--r--tests/validation/CL/ReduceMean.cpp1
-rw-r--r--tests/validation/CL/Reverse.cpp2
-rw-r--r--tests/validation/CL/SoftmaxLayer.cpp1
-rw-r--r--tests/validation/CL/SpaceToBatchLayer.cpp2
-rw-r--r--tests/validation/CL/SpaceToDepthLayer.cpp2
-rw-r--r--tests/validation/CL/TableLookup.cpp2
-rw-r--r--tests/validation/CL/Tile.cpp2
-rw-r--r--tests/validation/CL/UNIT/DynamicTensor.cpp6
-rw-r--r--tests/validation/CL/UNIT/Tuner.cpp2
-rw-r--r--tests/validation/CL/UNIT/WeightsRetention.cpp12
-rw-r--r--tests/validation/CL/WeightsReshape.cpp2
-rw-r--r--tests/validation/CL/Winograd.cpp4
-rw-r--r--tests/validation/CL/YOLOLayer.cpp2
607 files changed, 2894 insertions, 1862 deletions
diff --git a/3rdparty b/3rdparty
-Subproject ff15a53098d2545808e2c840ad0d5bace6ae87a
+Subproject ba65985c4a47effae4620b95b158ecae8764d2e
diff --git a/arm_compute/core/CL/CLKernels.h b/arm_compute/core/CL/CLKernels.h
deleted file mode 100644
index 231534fe50..0000000000
--- a/arm_compute/core/CL/CLKernels.h
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLKERNELS_H
-#define ARM_COMPUTE_CLKERNELS_H
-
-/* Header regrouping all the CL kernels */
-#include "arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h"
-#include "arm_compute/core/CL/kernels/CLAccumulateKernel.h"
-#include "arm_compute/core/CL/kernels/CLActivationLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLArgMinMaxLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLBatchToSpaceLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLBitwiseAndKernel.h"
-#include "arm_compute/core/CL/kernels/CLBitwiseNotKernel.h"
-#include "arm_compute/core/CL/kernels/CLBitwiseOrKernel.h"
-#include "arm_compute/core/CL/kernels/CLBitwiseXorKernel.h"
-#include "arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h"
-#include "arm_compute/core/CL/kernels/CLBox3x3Kernel.h"
-#include "arm_compute/core/CL/kernels/CLCannyEdgeKernel.h"
-#include "arm_compute/core/CL/kernels/CLChannelCombineKernel.h"
-#include "arm_compute/core/CL/kernels/CLChannelExtractKernel.h"
-#include "arm_compute/core/CL/kernels/CLChannelShuffleLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLCol2ImKernel.h"
-#include "arm_compute/core/CL/kernels/CLColorConvertKernel.h"
-#include "arm_compute/core/CL/kernels/CLComparisonKernel.h"
-#include "arm_compute/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h"
-#include "arm_compute/core/CL/kernels/CLConvolutionKernel.h"
-#include "arm_compute/core/CL/kernels/CLCopyKernel.h"
-#include "arm_compute/core/CL/kernels/CLCropKernel.h"
-#include "arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h"
-#include "arm_compute/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h"
-#include "arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLDerivativeKernel.h"
-#include "arm_compute/core/CL/kernels/CLDilateKernel.h"
-#include "arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLElementWiseUnaryLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h"
-#include "arm_compute/core/CL/kernels/CLErodeKernel.h"
-#include "arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h"
-#include "arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h"
-#include "arm_compute/core/CL/kernels/CLFFTScaleKernel.h"
-#include "arm_compute/core/CL/kernels/CLFastCornersKernel.h"
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
-#include "arm_compute/core/CL/kernels/CLFlattenLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLFloorKernel.h"
-#include "arm_compute/core/CL/kernels/CLFuseBatchNormalizationKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
-#include "arm_compute/core/CL/kernels/CLGatherKernel.h"
-#include "arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h"
-#include "arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h"
-#include "arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h"
-#include "arm_compute/core/CL/kernels/CLGenerateProposalsLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h"
-#include "arm_compute/core/CL/kernels/CLHOGDetectorKernel.h"
-#include "arm_compute/core/CL/kernels/CLHarrisCornersKernel.h"
-#include "arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLHistogramKernel.h"
-#include "arm_compute/core/CL/kernels/CLIm2ColKernel.h"
-#include "arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLIntegralImageKernel.h"
-#include "arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLLKTrackerKernel.h"
-#include "arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h"
-#include "arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h"
-#include "arm_compute/core/CL/kernels/CLMaxUnpoolingLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLMeanStdDevKernel.h"
-#include "arm_compute/core/CL/kernels/CLMeanStdDevNormalizationKernel.h"
-#include "arm_compute/core/CL/kernels/CLMedian3x3Kernel.h"
-#include "arm_compute/core/CL/kernels/CLMemsetKernel.h"
-#include "arm_compute/core/CL/kernels/CLMinMaxLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h"
-#include "arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h"
-#include "arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h"
-#include "arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLPadLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLPermuteKernel.h"
-#include "arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h"
-#include "arm_compute/core/CL/kernels/CLPoolingLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLPriorBoxLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h"
-#include "arm_compute/core/CL/kernels/CLQuantizationLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLROIAlignLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLRangeKernel.h"
-#include "arm_compute/core/CL/kernels/CLReductionOperationKernel.h"
-#include "arm_compute/core/CL/kernels/CLRemapKernel.h"
-#include "arm_compute/core/CL/kernels/CLReorgLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLReshapeLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLReverseKernel.h"
-#include "arm_compute/core/CL/kernels/CLScaleKernel.h"
-#include "arm_compute/core/CL/kernels/CLScharr3x3Kernel.h"
-#include "arm_compute/core/CL/kernels/CLSelectKernel.h"
-#include "arm_compute/core/CL/kernels/CLSobel3x3Kernel.h"
-#include "arm_compute/core/CL/kernels/CLSobel5x5Kernel.h"
-#include "arm_compute/core/CL/kernels/CLSobel7x7Kernel.h"
-#include "arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLSpaceToBatchLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLSpaceToDepthLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLStackLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLStridedSliceKernel.h"
-#include "arm_compute/core/CL/kernels/CLTableLookupKernel.h"
-#include "arm_compute/core/CL/kernels/CLThresholdKernel.h"
-#include "arm_compute/core/CL/kernels/CLTileKernel.h"
-#include "arm_compute/core/CL/kernels/CLTransposeKernel.h"
-#include "arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLWarpAffineKernel.h"
-#include "arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h"
-#include "arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h"
-#include "arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h"
-#include "arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h"
-#include "arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h"
-#include "arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h"
-#include "arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h"
-#include "arm_compute/core/CL/kernels/CLYOLOLayerKernel.h"
-#include "arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h"
-
-#endif /* ARM_COMPUTE_CLKERNELS_H */
diff --git a/arm_compute/core/CL/CLTypes.h b/arm_compute/core/CL/CLTypes.h
index c44e2c4f3f..0f6eb0dfa4 100644
--- a/arm_compute/core/CL/CLTypes.h
+++ b/arm_compute/core/CL/CLTypes.h
@@ -75,5 +75,40 @@ struct CLQuantization
const ICLFloatArray *scale; /**< Quantization scale array */
const ICLInt32Array *offset; /**< Quantization offset array */
};
+
+/** Internal keypoint structure for Lucas-Kanade Optical Flow */
+struct CLLKInternalKeypoint
+{
+ float x{ 0.f }; /**< x coordinate of the keypoint */
+ float y{ 0.f }; /**< y coordinate of the keypoint */
+ float tracking_status{ 0.f }; /**< the tracking status of the keypoint */
+ float dummy{ 0.f }; /**< Dummy field, to make sure the data structure 128-bit align, so that GPU can use vload4 */
+};
+
+/** Structure for storing Spatial Gradient Matrix and the minimum eigenvalue for each keypoint */
+struct CLCoefficientTable
+{
+ float A11; /**< iA11 * FLT_SCALE */
+ float A12; /**< iA11 * FLT_SCALE */
+ float A22; /**< iA11 * FLT_SCALE */
+ float min_eig; /**< Minimum eigenvalue */
+};
+
+/** Structure for storing ival, ixval and iyval for each point inside the window */
+struct CLOldValue
+{
+ int16_t ival; /**< ival extracts from old image */
+ int16_t ixval; /**< ixval extracts from scharr Gx image */
+ int16_t iyval; /**< iyval extracts from scharr Gy image */
+ int16_t dummy; /**< Dummy field, to make sure the data structure 128-bit align, so that GPU can use vload4 */
+};
+
+/** Interface for OpenCL Array of Internal Key Points. */
+using ICLLKInternalKeypointArray = ICLArray<CLLKInternalKeypoint>;
+/** Interface for OpenCL Array of Coefficient Tables. */
+using ICLCoefficientTableArray = ICLArray<CLCoefficientTable>;
+/** Interface for OpenCL Array of Old Values. */
+using ICLOldValArray = ICLArray<CLOldValue>;
+
} // namespace arm_compute
#endif /* ARM_COMPUTE_CL_TYPES_H */
diff --git a/arm_compute/runtime/CL/ICLOperator.h b/arm_compute/runtime/CL/ICLOperator.h
index 526b7e93e9..38bcaf32f2 100644
--- a/arm_compute/runtime/CL/ICLOperator.h
+++ b/arm_compute/runtime/CL/ICLOperator.h
@@ -24,7 +24,8 @@
#ifndef ARM_COMPUTE_ICLOPERATOR_H
#define ARM_COMPUTE_ICLOPERATOR_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "arm_compute/core/Types.h"
+
#include "arm_compute/runtime/IOperator.h"
#include "arm_compute/runtime/IRuntimeContext.h"
#include "arm_compute/runtime/Types.h"
@@ -33,6 +34,7 @@
namespace arm_compute
{
+class ICLKernel;
namespace experimental
{
/** Basic interface for functions which have a single async CL kernel */
diff --git a/arm_compute/runtime/CL/ICLSimpleFunction.h b/arm_compute/runtime/CL/ICLSimpleFunction.h
index 4b1d5b1485..310bf770c4 100644
--- a/arm_compute/runtime/CL/ICLSimpleFunction.h
+++ b/arm_compute/runtime/CL/ICLSimpleFunction.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,8 +24,6 @@
#ifndef ARM_COMPUTE_ICLSIMPLEFUNCTION_H
#define ARM_COMPUTE_ICLSIMPLEFUNCTION_H
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
#include "arm_compute/runtime/IFunction.h"
#include <memory>
@@ -34,6 +32,8 @@ namespace arm_compute
{
// Forward declarations
class CLRuntimeContext;
+class CLFillBorderKernel;
+class ICLKernel;
/** Basic interface for functions which have a single OpenCL kernel */
class ICLSimpleFunction : public IFunction
@@ -53,14 +53,16 @@ public:
ICLSimpleFunction &operator=(const ICLSimpleFunction &) = delete;
/** Default move assignment operator */
ICLSimpleFunction &operator=(ICLSimpleFunction &&) = default;
+ /** Default destructor */
+ ~ICLSimpleFunction();
// Inherited methods overridden:
void run() override final;
protected:
- std::unique_ptr<ICLKernel> _kernel; /**< Kernel to run */
- CLFillBorderKernel _border_handler; /**< Kernel to handle borders */
- CLRuntimeContext *_ctx; /**< Context to use */
+ std::unique_ptr<ICLKernel> _kernel; /**< Kernel to run */
+ std::unique_ptr<CLFillBorderKernel> _border_handler; /**< Kernel to handle borders */
+ CLRuntimeContext *_ctx; /**< Context to use */
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_ICLSIMPLEFUNCTION_H */
diff --git a/arm_compute/runtime/CL/functions/CLAbsoluteDifference.h b/arm_compute/runtime/CL/functions/CLAbsoluteDifference.h
index b0f1948beb..f2831e2a99 100644
--- a/arm_compute/runtime/CL/functions/CLAbsoluteDifference.h
+++ b/arm_compute/runtime/CL/functions/CLAbsoluteDifference.h
@@ -28,6 +28,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to run @ref CLAbsoluteDifferenceKernel
diff --git a/arm_compute/runtime/CL/functions/CLAccumulate.h b/arm_compute/runtime/CL/functions/CLAccumulate.h
index 9dbf13b873..20d3476d2e 100644
--- a/arm_compute/runtime/CL/functions/CLAccumulate.h
+++ b/arm_compute/runtime/CL/functions/CLAccumulate.h
@@ -30,6 +30,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to run @ref CLAccumulateKernel */
diff --git a/arm_compute/runtime/CL/functions/CLActivationLayer.h b/arm_compute/runtime/CL/functions/CLActivationLayer.h
index 632487c78d..dc2cb62b71 100644
--- a/arm_compute/runtime/CL/functions/CLActivationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLActivationLayer.h
@@ -31,7 +31,9 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLActivationLayerKernel
*
diff --git a/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h b/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h
index dc0c37e860..c254284cd7 100644
--- a/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h
+++ b/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h
@@ -24,7 +24,6 @@
#ifndef ARM_COMPUTE_CLARGMINMAXLAYER_H
#define ARM_COMPUTE_CLARGMINMAXLAYER_H
-#include "arm_compute/core/CL/kernels/CLArgMinMaxLayerKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLReshapeLayer.h"
@@ -36,6 +35,7 @@ namespace arm_compute
{
class ITensorInfo;
class ICLTensor;
+class CLArgMinMaxLayerKernel;
/** Function to calculate the index of the minimum or maximum values in a
* tensor based on an axis.
@@ -53,6 +53,16 @@ public:
* @param[in] memory_manager (Optional) Memory manager.
*/
CLArgMinMaxLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied */
+ CLArgMinMaxLayer(const CLArgMinMaxLayer &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLArgMinMaxLayer &operator=(const CLArgMinMaxLayer &) = delete;
+ /** Prevent instances of this class to be moved */
+ CLArgMinMaxLayer(CLArgMinMaxLayer &&) = delete;
+ /** Prevent instances of this class to be moved */
+ CLArgMinMaxLayer &operator=(CLArgMinMaxLayer &&) = delete;
+ /** Default destructor */
+ ~CLArgMinMaxLayer();
/** Set the input and output tensors.
*
* @param[in] input Input source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/S32/F16/F32.
@@ -85,13 +95,13 @@ public:
void run() override;
private:
- MemoryGroup _memory_group;
- std::vector<CLTensor> _results_vector;
- CLTensor _not_reshaped_output;
- std::vector<CLArgMinMaxLayerKernel> _reduction_kernels_vector;
- CLReshapeLayer _reshape;
- unsigned int _num_of_stages;
- unsigned int _reduction_axis;
+ MemoryGroup _memory_group;
+ std::vector<CLTensor> _results_vector;
+ CLTensor _not_reshaped_output;
+ std::vector<std::unique_ptr<CLArgMinMaxLayerKernel>> _reduction_kernels_vector;
+ CLReshapeLayer _reshape;
+ unsigned int _num_of_stages;
+ unsigned int _reduction_axis;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLARGMINMAXLAYER_H */
diff --git a/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h
index c22991da7c..c8acf9fc6b 100644
--- a/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h
@@ -26,12 +26,16 @@
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h"
#include "arm_compute/core/Types.h"
+#include <memory>
+
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
+class CLBatchNormalizationLayerKernel;
/** Basic function to run @ref CLNormalizationLayerKernel and simulate a batch normalization layer.
*
@@ -44,6 +48,16 @@ class CLBatchNormalizationLayer : public IFunction
public:
/** Default constructor */
CLBatchNormalizationLayer();
+ /** Prevent instances of this class from being copied */
+ CLBatchNormalizationLayer(const CLBatchNormalizationLayer &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLBatchNormalizationLayer &operator=(const CLBatchNormalizationLayer &) = delete;
+ /** Prevent instances of this class to be moved */
+ CLBatchNormalizationLayer(CLBatchNormalizationLayer &&) = delete;
+ /** Prevent instances of this class to be moved */
+ CLBatchNormalizationLayer &operator=(CLBatchNormalizationLayer &&) = delete;
+ /** Default destructor */
+ ~CLBatchNormalizationLayer();
/** Set the input and output tensors.
*
* @note If the output tensor is a nullptr or is equal to the input, the batch normalization function will be performed in-place
@@ -104,7 +118,7 @@ public:
void run() override;
private:
- CLBatchNormalizationLayerKernel _norm_kernel; /**< BatchNormalization layer kernel to run */
+ std::unique_ptr<CLBatchNormalizationLayerKernel> _norm_kernel; /**< BatchNormalization layer kernel to run */
};
-}
+} // namespace arm_compute
#endif /* ARM_COMPUTE_CLBATCHNORMALIZATIONLAYER_H */
diff --git a/arm_compute/runtime/CL/functions/CLBatchToSpaceLayer.h b/arm_compute/runtime/CL/functions/CLBatchToSpaceLayer.h
index ba57921cc2..bdb58531d0 100644
--- a/arm_compute/runtime/CL/functions/CLBatchToSpaceLayer.h
+++ b/arm_compute/runtime/CL/functions/CLBatchToSpaceLayer.h
@@ -26,11 +26,15 @@
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/CL/kernels/CLBatchToSpaceLayerKernel.h"
#include "arm_compute/core/Types.h"
+#include <memory>
+
namespace arm_compute
{
+class CLCompileContext;
+class ITensorInfo;
+class CLBatchToSpaceLayerKernel;
class ICLTensor;
/** Basic function to run @ref CLBatchToSpaceLayerKernel. */
@@ -39,6 +43,16 @@ class CLBatchToSpaceLayer : public IFunction
public:
/** Default constructor */
CLBatchToSpaceLayer();
+ /** Prevent instances of this class from being copied */
+ CLBatchToSpaceLayer(const CLBatchToSpaceLayer &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLBatchToSpaceLayer &operator=(const CLBatchToSpaceLayer &) = delete;
+ /** Prevent instances of this class to be moved */
+ CLBatchToSpaceLayer(CLBatchToSpaceLayer &&) = delete;
+ /** Prevent instances of this class to be moved */
+ CLBatchToSpaceLayer &operator=(CLBatchToSpaceLayer &&) = delete;
+ /** Default destructor */
+ ~CLBatchToSpaceLayer();
/** Set the input and output tensors.
*
* @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
@@ -95,7 +109,7 @@ public:
void run() override;
private:
- CLBatchToSpaceLayerKernel _batch_to_space_kernel; /**< CLBatchToSpaceLayerKernel to run */
+ std::unique_ptr<CLBatchToSpaceLayerKernel> _batch_to_space_kernel; /**< CLBatchToSpaceLayerKernel to run */
};
-}
+} // namespace arm_compute
#endif /* ARM_COMPUTE_CLBATCHTOSPACELAYER_H */
diff --git a/arm_compute/runtime/CL/functions/CLBitwiseAnd.h b/arm_compute/runtime/CL/functions/CLBitwiseAnd.h
index 3c28938807..bf5993f4b0 100644
--- a/arm_compute/runtime/CL/functions/CLBitwiseAnd.h
+++ b/arm_compute/runtime/CL/functions/CLBitwiseAnd.h
@@ -28,6 +28,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to run @ref CLBitwiseAndKernel.
diff --git a/arm_compute/runtime/CL/functions/CLBitwiseNot.h b/arm_compute/runtime/CL/functions/CLBitwiseNot.h
index 4c21d5647f..1d8531a176 100644
--- a/arm_compute/runtime/CL/functions/CLBitwiseNot.h
+++ b/arm_compute/runtime/CL/functions/CLBitwiseNot.h
@@ -28,6 +28,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to run @ref CLBitwiseNotKernel.
diff --git a/arm_compute/runtime/CL/functions/CLBitwiseOr.h b/arm_compute/runtime/CL/functions/CLBitwiseOr.h
index 8a481737e3..7876cbf196 100644
--- a/arm_compute/runtime/CL/functions/CLBitwiseOr.h
+++ b/arm_compute/runtime/CL/functions/CLBitwiseOr.h
@@ -28,6 +28,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to run @ref CLBitwiseOrKernel.
diff --git a/arm_compute/runtime/CL/functions/CLBitwiseXor.h b/arm_compute/runtime/CL/functions/CLBitwiseXor.h
index 6928e59d38..4f054062cd 100644
--- a/arm_compute/runtime/CL/functions/CLBitwiseXor.h
+++ b/arm_compute/runtime/CL/functions/CLBitwiseXor.h
@@ -28,6 +28,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to run @ref CLBitwiseXorKernel.
diff --git a/arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h b/arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h
index 5e4e89071b..d6409106da 100644
--- a/arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h
+++ b/arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h
@@ -24,12 +24,16 @@
#ifndef ARM_COMPUTE_CLBOUNDINGBOXTRANSOFORM_H
#define ARM_COMPUTE_CLBOUNDINGBOXTRANSOFORM_H
-#include "arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h"
+#include "arm_compute/core/Error.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
+class CLCompileContext;
+class CLBoundingBoxTransformKernel;
+class BoundingBoxTransformInfo;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLBoundingBoxTransformKernel.
*
diff --git a/arm_compute/runtime/CL/functions/CLBox3x3.h b/arm_compute/runtime/CL/functions/CLBox3x3.h
index 2d2aa4705c..cff780614c 100644
--- a/arm_compute/runtime/CL/functions/CLBox3x3.h
+++ b/arm_compute/runtime/CL/functions/CLBox3x3.h
@@ -31,6 +31,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to execute box filter 3x3. This function calls the following OpenCL kernels:
diff --git a/arm_compute/runtime/CL/functions/CLCannyEdge.h b/arm_compute/runtime/CL/functions/CLCannyEdge.h
index f9d9f8f66a..9e41c31728 100644
--- a/arm_compute/runtime/CL/functions/CLCannyEdge.h
+++ b/arm_compute/runtime/CL/functions/CLCannyEdge.h
@@ -26,8 +26,6 @@
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/CL/kernels/CLCannyEdgeKernel.h"
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/IMemoryManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
@@ -36,6 +34,11 @@
namespace arm_compute
{
+class CLCompileContext;
+class CLFillBorderKernel;
+class CLGradientKernel;
+class CLEdgeNonMaxSuppressionKernel;
+class CLEdgeTraceKernel;
class ICLTensor;
/** Basic function to execute canny edge on OpenCL. This function calls the following OpenCL kernels and functions:
@@ -56,6 +59,8 @@ public:
CLCannyEdge(const CLCannyEdge &) = delete;
/** Prevent instances of this class from being copied (As this class contains pointers) */
CLCannyEdge &operator=(const CLCannyEdge &) = delete;
+ /** Default destructor */
+ ~CLCannyEdge();
/** Initialise the function's source, destination, thresholds, gradient size, normalization type and border mode.
*
* @param[in,out] input Source tensor. Data types supported: U8. (Written to only for border_mode != UNDEFINED)
@@ -88,20 +93,20 @@ public:
virtual void run() override;
private:
- MemoryGroup _memory_group; /**< Function's memory group */
- std::unique_ptr<IFunction> _sobel; /**< Pointer to Sobel kernel. */
- CLGradientKernel _gradient; /**< Gradient kernel. */
- CLFillBorderKernel _border_mag_gradient; /**< Fill border on magnitude tensor kernel */
- CLEdgeNonMaxSuppressionKernel _non_max_suppr; /**< Non-Maxima suppression kernel. */
- CLEdgeTraceKernel _edge_trace; /**< Edge tracing kernel. */
- CLImage _gx; /**< Source tensor - Gx component. */
- CLImage _gy; /**< Source tensor - Gy component. */
- CLImage _mag; /**< Source tensor - Magnitude. */
- CLImage _phase; /**< Source tensor - Phase. */
- CLImage _nonmax; /**< Source tensor - Non-Maxima suppressed. */
- CLImage _visited, _recorded, _l1_list_counter, _l1_stack; /**< Temporary tensors */
- ICLTensor *_output; /**< Output tensor provided by the user. */
+ MemoryGroup _memory_group; /**< Function's memory group */
+ std::unique_ptr<IFunction> _sobel; /**< Pointer to Sobel kernel. */
+ std::unique_ptr<CLGradientKernel> _gradient; /**< Gradient kernel. */
+ std::unique_ptr<CLFillBorderKernel> _border_mag_gradient; /**< Fill border on magnitude tensor kernel */
+ std::unique_ptr<CLEdgeNonMaxSuppressionKernel> _non_max_suppr; /**< Non-Maxima suppression kernel. */
+ std::unique_ptr<CLEdgeTraceKernel> _edge_trace; /**< Edge tracing kernel. */
+ CLImage _gx; /**< Source tensor - Gx component. */
+ CLImage _gy; /**< Source tensor - Gy component. */
+ CLImage _mag; /**< Source tensor - Magnitude. */
+ CLImage _phase; /**< Source tensor - Phase. */
+ CLImage _nonmax; /**< Source tensor - Non-Maxima suppressed. */
+ CLImage _visited, _recorded, _l1_list_counter, _l1_stack; /**< Temporary tensors */
+ ICLTensor *_output; /**< Output tensor provided by the user. */
};
-}
+} // namespace arm_compute
#endif /* ARM_COMPUTE_CLCANNYEDGE_H */
diff --git a/arm_compute/runtime/CL/functions/CLCast.h b/arm_compute/runtime/CL/functions/CLCast.h
index 592368d135..bd333d4e72 100644
--- a/arm_compute/runtime/CL/functions/CLCast.h
+++ b/arm_compute/runtime/CL/functions/CLCast.h
@@ -31,7 +31,9 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLDepthConvertLayerKernel. */
class CLCast : public ICLSimpleFunction
diff --git a/arm_compute/runtime/CL/functions/CLChannelCombine.h b/arm_compute/runtime/CL/functions/CLChannelCombine.h
index 4e3d10cc10..5927662fc2 100644
--- a/arm_compute/runtime/CL/functions/CLChannelCombine.h
+++ b/arm_compute/runtime/CL/functions/CLChannelCombine.h
@@ -28,6 +28,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLMultiImage;
class ICLTensor;
using ICLImage = ICLTensor;
diff --git a/arm_compute/runtime/CL/functions/CLChannelExtract.h b/arm_compute/runtime/CL/functions/CLChannelExtract.h
index cf042b4519..9ce9bcdd8a 100644
--- a/arm_compute/runtime/CL/functions/CLChannelExtract.h
+++ b/arm_compute/runtime/CL/functions/CLChannelExtract.h
@@ -29,6 +29,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLMultiImage;
class ICLTensor;
using ICLImage = ICLTensor;
diff --git a/arm_compute/runtime/CL/functions/CLChannelShuffleLayer.h b/arm_compute/runtime/CL/functions/CLChannelShuffleLayer.h
index e0bb3d01c9..54cf59f59a 100644
--- a/arm_compute/runtime/CL/functions/CLChannelShuffleLayer.h
+++ b/arm_compute/runtime/CL/functions/CLChannelShuffleLayer.h
@@ -24,11 +24,14 @@
#ifndef ARM_COMPUTE_CLCHANNELSHUFFLELAYER_H
#define ARM_COMPUTE_CLCHANNELSHUFFLELAYER_H
+#include "arm_compute/core/Error.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLChannelShuffleLayerKernel
*
diff --git a/arm_compute/runtime/CL/functions/CLColorConvert.h b/arm_compute/runtime/CL/functions/CLColorConvert.h
index e4017c2686..47bcabfb63 100644
--- a/arm_compute/runtime/CL/functions/CLColorConvert.h
+++ b/arm_compute/runtime/CL/functions/CLColorConvert.h
@@ -28,6 +28,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLMultiImage;
class ICLTensor;
using ICLImage = ICLTensor;
diff --git a/arm_compute/runtime/CL/functions/CLComparison.h b/arm_compute/runtime/CL/functions/CLComparison.h
index c6d61e45f2..8cc3e96ec5 100644
--- a/arm_compute/runtime/CL/functions/CLComparison.h
+++ b/arm_compute/runtime/CL/functions/CLComparison.h
@@ -30,7 +30,9 @@
namespace arm_compute
{
// Forward declarations
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLComparisonKernel */
class CLComparison : public ICLSimpleFunction
diff --git a/arm_compute/runtime/CL/functions/CLComputeAllAnchors.h b/arm_compute/runtime/CL/functions/CLComputeAllAnchors.h
index a2f1a4eb66..d6a2ab423d 100644
--- a/arm_compute/runtime/CL/functions/CLComputeAllAnchors.h
+++ b/arm_compute/runtime/CL/functions/CLComputeAllAnchors.h
@@ -24,12 +24,15 @@
#ifndef ARM_COMPUTE_CLCOMPUTEALLANCHORS_H
#define ARM_COMPUTE_CLCOMPUTEALLANCHORS_H
-#include "arm_compute/core/CL/kernels/CLGenerateProposalsLayerKernel.h"
+#include "arm_compute/core/Error.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
+class ComputeAnchorsInfo;
/** Basic function to run @ref CLComputeAllAnchorsKernel.
*
diff --git a/arm_compute/runtime/CL/functions/CLConcatenateLayer.h b/arm_compute/runtime/CL/functions/CLConcatenateLayer.h
index f535c8ea97..5e7003a112 100644
--- a/arm_compute/runtime/CL/functions/CLConcatenateLayer.h
+++ b/arm_compute/runtime/CL/functions/CLConcatenateLayer.h
@@ -27,7 +27,6 @@
#include "arm_compute/runtime/CL/ICLOperator.h"
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/Types.h"
#include <memory>
@@ -36,7 +35,9 @@
namespace arm_compute
{
// Forward declarations
+class CLCompileContext;
class ICLTensor;
+class ICLKernel;
class ITensorInfo;
class Status;
diff --git a/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h b/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h
index 9298be2e53..75a3d3213e 100644
--- a/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h
+++ b/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h
@@ -24,14 +24,17 @@
#ifndef ARM_COMPUTE_CLCONVERTFULLYCONNECTEDWEIGHTS_H
#define ARM_COMPUTE_CLCONVERTFULLYCONNECTEDWEIGHTS_H
-#include "arm_compute/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
#include "arm_compute/runtime/ITransformWeights.h"
namespace arm_compute
{
+class CLCompileContext;
+class CLConvertFullyConnectedWeightsKernel;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLConvertFullyConnectedWeightsKernel. */
class CLConvertFullyConnectedWeights : public ICLSimpleFunction
diff --git a/arm_compute/runtime/CL/functions/CLConvolution.h b/arm_compute/runtime/CL/functions/CLConvolution.h
index c06ad0d969..4a1631a702 100644
--- a/arm_compute/runtime/CL/functions/CLConvolution.h
+++ b/arm_compute/runtime/CL/functions/CLConvolution.h
@@ -24,8 +24,6 @@
#ifndef ARM_COMPUTE_CLCONVOLUTION_H
#define ARM_COMPUTE_CLCONVOLUTION_H
-#include "arm_compute/core/CL/kernels/CLConvolutionKernel.h"
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
@@ -38,6 +36,13 @@
namespace arm_compute
{
+template <unsigned int matrix_size>
+class CLConvolutionKernel;
+template <unsigned int matrix_size>
+class CLSeparableConvolutionHorKernel;
+template <unsigned int matrix_size>
+class CLSeparableConvolutionVertKernel;
+class CLFillBorderKernel;
class ICLTensor;
/** Basic function to execute convolution of size 3x3. This function calls the following OpenCL kernels:
@@ -85,6 +90,16 @@ class CLConvolutionSquare : public IFunction
public:
/** Default constructor */
CLConvolutionSquare(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLConvolutionSquare(const CLConvolutionSquare &) = delete;
+ /** Default move constructor */
+ CLConvolutionSquare(CLConvolutionSquare &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLConvolutionSquare &operator=(const CLConvolutionSquare &) = delete;
+ /** Default move assignment operator */
+ CLConvolutionSquare &operator=(CLConvolutionSquare &&) = default;
+ /** Default destructor */
+ ~CLConvolutionSquare();
/** Initialize the function's source, destination, conv and border_mode.
*
* @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
@@ -111,13 +126,13 @@ public:
void run() override;
private:
- MemoryGroup _memory_group; /**< Function's memory group */
- CLTensor _tmp; /**< temporary buffer for output of horizontal pass */
- bool _is_separable; /**< true if the convolution can be separated */
- CLSeparableConvolutionHorKernel<matrix_size> _kernel_hor; /**< kernel for horizontal pass of separated convolution */
- CLSeparableConvolutionVertKernel<matrix_size> _kernel_vert; /**< kernel for vertical pass of separated convolution */
- CLConvolutionKernel<matrix_size> _kernel; /**< kernel for non-separated convolution **/
- CLFillBorderKernel _border_handler; /**< kernel for border handling */
+ MemoryGroup _memory_group; /**< Function's memory group */
+ CLTensor _tmp; /**< temporary buffer for output of horizontal pass */
+ bool _is_separable; /**< true if the convolution can be separated */
+ std::unique_ptr<CLSeparableConvolutionHorKernel<matrix_size>> _kernel_hor; /**< kernel for horizontal pass of separated convolution */
+ std::unique_ptr<CLSeparableConvolutionVertKernel<matrix_size>> _kernel_vert; /**< kernel for vertical pass of separated convolution */
+ std::unique_ptr<CLConvolutionKernel<matrix_size>> _kernel; /**< kernel for non-separated convolution **/
+ std::unique_ptr<CLFillBorderKernel> _border_handler; /**< kernel for border handling */
};
/** Basic function to run 5x5 convolution. */
diff --git a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
index ac36523682..d1de721193 100644
--- a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
@@ -73,6 +73,16 @@ class CLConvolutionLayer : public IFunction
public:
/** Default constructor */
CLConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Default Destructor */
+ ~CLConvolutionLayer();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLConvolutionLayer(const CLConvolutionLayer &) = delete;
+ /** Default move constructor */
+ CLConvolutionLayer(CLConvolutionLayer &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLConvolutionLayer &operator=(const CLConvolutionLayer &) = delete;
+ /** Default move assignment operator */
+ CLConvolutionLayer &operator=(CLConvolutionLayer &&) = default;
/** Set the input and output tensors.
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
diff --git a/arm_compute/runtime/CL/functions/CLCopy.h b/arm_compute/runtime/CL/functions/CLCopy.h
index c20d75eea8..f1a091df84 100644
--- a/arm_compute/runtime/CL/functions/CLCopy.h
+++ b/arm_compute/runtime/CL/functions/CLCopy.h
@@ -31,7 +31,9 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
class CLCopy : public ICLSimpleFunction
{
diff --git a/arm_compute/runtime/CL/functions/CLCropResize.h b/arm_compute/runtime/CL/functions/CLCropResize.h
index e940928b90..e781cfe61f 100644
--- a/arm_compute/runtime/CL/functions/CLCropResize.h
+++ b/arm_compute/runtime/CL/functions/CLCropResize.h
@@ -25,9 +25,7 @@
#define ARM_COMPUTE_CL_CROP_RESIZE_H
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLCopyKernel.h"
-#include "arm_compute/core/CL/kernels/CLCropKernel.h"
-#include "arm_compute/core/CL/kernels/CLMemsetKernel.h"
+
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLScale.h"
@@ -37,7 +35,11 @@
namespace arm_compute
{
// Forward Declarations
+class CLCompileContext;
+class CLCopyKernel;
+class CLCropKernel;
class ITensor;
+class ITensorInfo;
/** Function to perform cropping and resizing */
class CLCropResize : public IFunction
@@ -54,7 +56,7 @@ public:
/** Allow instances of this class to be moved */
CLCropResize &operator=(CLCropResize &&) = default;
/** Default destructor */
- virtual ~CLCropResize() = default;
+ ~CLCropResize();
/** Configure kernel
*
diff --git a/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h b/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h
index 19a44f7b93..3ebc858d32 100644
--- a/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h
+++ b/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h
@@ -24,17 +24,20 @@
#ifndef ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLE_H
#define ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLE_H
-#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h"
-#include "arm_compute/core/CL/kernels/CLMemsetKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
namespace arm_compute
{
// Forward declarations
+class CLDeconvolutionLayerUpsampleKernel;
+class CLCompileContext;
+class CLMemsetKernel;
class ICLTensor;
+class ITensorInfo;
/** Basic function to execute deconvolution upsample on OpenCL. This function calls the following OpenCL kernels and functions:
*
@@ -55,7 +58,7 @@ public:
/** Allow instances of this class to be moved */
CLDeconvolutionLayerUpsample &operator=(CLDeconvolutionLayerUpsample &&) = default;
/** Default destructor */
- virtual ~CLDeconvolutionLayerUpsample() = default;
+ ~CLDeconvolutionLayerUpsample();
/** Initialize the function's source, destination, interpolation type and border_mode.
*
@@ -86,9 +89,9 @@ public:
void run() override;
private:
- CLDeconvolutionLayerUpsampleKernel _upsample;
- CLMemsetKernel _memset;
- ICLTensor *_output;
+ std::unique_ptr<CLDeconvolutionLayerUpsampleKernel> _upsample;
+ std::unique_ptr<CLMemsetKernel> _memset;
+ ICLTensor *_output;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLE_H */
diff --git a/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h b/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h
index d125584c97..b0f297aec5 100644
--- a/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h
@@ -31,7 +31,9 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLDepthConvertLayerKernel. */
class CLDepthConvertLayer : public ICLSimpleFunction
diff --git a/arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h b/arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h
index 5e197cb9b8..a0aa288dbf 100644
--- a/arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h
@@ -29,7 +29,9 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLDepthToSpaceLayerKernel. */
class CLDepthToSpaceLayer : public ICLSimpleFunction
diff --git a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
index 558c4540fa..8e594bc09f 100644
--- a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
@@ -24,12 +24,6 @@
#ifndef ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H
#define ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H
-#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h"
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
-#include "arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLPermute.h"
@@ -38,6 +32,11 @@
namespace arm_compute
{
+class CLCompileContext;
+class CLFillBorderKernel;
+class CLDepthwiseConvolutionLayerNativeKernel;
+class CLDepthwiseConvolutionLayerReshapeWeightsKernel;
+class ICLDepthwiseConvolutionLayer3x3Kernel;
class ICLTensor;
/** Function to execute a depthwise convolution
@@ -55,6 +54,8 @@ public:
CLDepthwiseConvolutionLayer &operator=(const CLDepthwiseConvolutionLayer &) = delete;
/** Default move assignment operator */
CLDepthwiseConvolutionLayer &operator=(CLDepthwiseConvolutionLayer &&) = default;
+ /** Default destructor */
+ ~CLDepthwiseConvolutionLayer();
/** Initialize the function's source, destination, weights and convolution information.
*
* @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/FP16/FP32. Data layout supported: NHWC, NCHW
@@ -211,25 +212,25 @@ private:
};
private:
- MemoryGroup _memory_group;
- std::unique_ptr<ICLDepthwiseConvolutionLayer3x3Kernel> _kernel;
- CLFillBorderKernel _border_handler;
- CLPermute _permute_input_to_nchw;
- CLPermute _permute_weights_to_nchw;
- CLPermute _permute_output_to_nhwc;
- CLDepthwiseConvolutionLayerReshapeWeightsKernel _reshape_weights;
- CLTensor _permuted_input;
- CLTensor _permuted_weights;
- CLTensor _permuted_output;
- CLTensor _output_multipliers;
- CLTensor _output_shifts;
- const ITensor *_original_weights;
- const ITensor *_input;
- const ITensor *_output;
- bool _needs_permute;
- bool _needs_weights_reshape;
- bool _is_prepared;
- bool _is_quantized;
+ MemoryGroup _memory_group;
+ std::unique_ptr<ICLDepthwiseConvolutionLayer3x3Kernel> _kernel;
+ std::unique_ptr<CLFillBorderKernel> _border_handler;
+ CLPermute _permute_input_to_nchw;
+ CLPermute _permute_weights_to_nchw;
+ CLPermute _permute_output_to_nhwc;
+ std::unique_ptr<CLDepthwiseConvolutionLayerReshapeWeightsKernel> _reshape_weights;
+ CLTensor _permuted_input;
+ CLTensor _permuted_weights;
+ CLTensor _permuted_output;
+ CLTensor _output_multipliers;
+ CLTensor _output_shifts;
+ const ITensor *_original_weights;
+ const ITensor *_input;
+ const ITensor *_output;
+ bool _needs_permute;
+ bool _needs_weights_reshape;
+ bool _is_prepared;
+ bool _is_quantized;
};
/** Basic function to execute a generic depthwise convolution. This function calls the following OpenCL kernels:
@@ -313,10 +314,10 @@ private:
private:
MemoryGroup _memory_group;
- CLDepthwiseConvolutionLayerNativeKernel _dwc_native_kernel;
- CLPermute _permute_input_to_nhwc;
- CLPermute _permute_weights_to_nhwc;
- CLPermute _permute_output_to_nchw;
+ std::unique_ptr<CLDepthwiseConvolutionLayerNativeKernel> _dwc_native_kernel;
+ CLPermute _permute_input_to_nhwc;
+ CLPermute _permute_weights_to_nhwc;
+ CLPermute _permute_output_to_nchw;
CLTensor _permuted_input;
CLTensor _permuted_weights;
diff --git a/arm_compute/runtime/CL/functions/CLDequantizationLayer.h b/arm_compute/runtime/CL/functions/CLDequantizationLayer.h
index 88ed915421..b2cf3356f4 100644
--- a/arm_compute/runtime/CL/functions/CLDequantizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDequantizationLayer.h
@@ -31,7 +31,9 @@
namespace arm_compute
{
// Forward declarations
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLDequantizationLayerKernel that dequantizes an input tensor */
class CLDequantizationLayer : public ICLSimpleFunction
diff --git a/arm_compute/runtime/CL/functions/CLDerivative.h b/arm_compute/runtime/CL/functions/CLDerivative.h
index 1aba6a9f6c..4a91d5d50b 100644
--- a/arm_compute/runtime/CL/functions/CLDerivative.h
+++ b/arm_compute/runtime/CL/functions/CLDerivative.h
@@ -31,6 +31,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to execute first order derivative operator. This function calls the following CL kernels:
diff --git a/arm_compute/runtime/CL/functions/CLDilate.h b/arm_compute/runtime/CL/functions/CLDilate.h
index adb9cf4e6c..bf72cd3b26 100644
--- a/arm_compute/runtime/CL/functions/CLDilate.h
+++ b/arm_compute/runtime/CL/functions/CLDilate.h
@@ -31,6 +31,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to execute dilate. This function calls the following OpenCL kernels:
diff --git a/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h
index 8107fa24f3..0afc9d3f38 100644
--- a/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h
@@ -24,8 +24,6 @@
#ifndef ARM_COMPUTE_CLDIRECTCONVOLUTIONLAYER_H
#define ARM_COMPUTE_CLDIRECTCONVOLUTIONLAYER_H
-#include "arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
#include "arm_compute/runtime/IFunction.h"
@@ -34,7 +32,11 @@
namespace arm_compute
{
+class CLCompileContext;
+class CLDirectConvolutionLayerKernel;
+class CLFillBorderKernel;
class ICLTensor;
+class ITensorInfo;
/** Basic function to execute direct convolution function:
*/
@@ -43,6 +45,12 @@ class CLDirectConvolutionLayer : public IFunction
public:
/** Default constructor */
CLDirectConvolutionLayer();
+ /** Prevent instances of this class from being copied */
+ CLDirectConvolutionLayer(const CLDirectConvolutionLayer &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLDirectConvolutionLayer &operator=(const CLDirectConvolutionLayer &) = delete;
+ /** Default destructor */
+ ~CLDirectConvolutionLayer();
/** Set the input and output tensors.
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
@@ -95,9 +103,9 @@ public:
void run() override;
private:
- CLDirectConvolutionLayerKernel _direct_conv_kernel;
- CLFillBorderKernel _input_border_handler;
- CLActivationLayer _activationlayer_function;
+ std::unique_ptr<CLDirectConvolutionLayerKernel> _direct_conv_kernel;
+ std::unique_ptr<CLFillBorderKernel> _input_border_handler;
+ CLActivationLayer _activationlayer_function;
bool _is_activationlayer_enabled;
};
diff --git a/arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h b/arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h
index 5208bfe404..72b5b7dee8 100644
--- a/arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h
+++ b/arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h
@@ -29,7 +29,9 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to perform inverse square root on an input tensor. */
class CLRsqrtLayer : public IFunction
diff --git a/arm_compute/runtime/CL/functions/CLElementwiseOperations.h b/arm_compute/runtime/CL/functions/CLElementwiseOperations.h
index 31d4f2e745..55c5fb3455 100644
--- a/arm_compute/runtime/CL/functions/CLElementwiseOperations.h
+++ b/arm_compute/runtime/CL/functions/CLElementwiseOperations.h
@@ -30,6 +30,8 @@
namespace arm_compute
{
class ICLTensor;
+class CLCompileContext;
+class ITensorInfo;
namespace experimental
{
diff --git a/arm_compute/runtime/CL/functions/CLEqualizeHistogram.h b/arm_compute/runtime/CL/functions/CLEqualizeHistogram.h
index 883f330b33..17352d1a9b 100644
--- a/arm_compute/runtime/CL/functions/CLEqualizeHistogram.h
+++ b/arm_compute/runtime/CL/functions/CLEqualizeHistogram.h
@@ -24,16 +24,19 @@
#ifndef ARM_COMPUTE_CLEQUALIZEHISTOGRAM_H
#define ARM_COMPUTE_CLEQUALIZEHISTOGRAM_H
-#include "arm_compute/core/CL/kernels/CLHistogramKernel.h"
-#include "arm_compute/core/CL/kernels/CLTableLookupKernel.h"
#include "arm_compute/runtime/CL/CLDistribution1D.h"
#include "arm_compute/runtime/CL/CLLut.h"
#include "arm_compute/runtime/IFunction.h"
#include <cstdint>
+#include <memory>
namespace arm_compute
{
+class CLCompileContext;
+class CLHistogramKernel;
+class CLHistogramBorderKernel;
+class CLTableLookupKernel;
class ICLTensor;
using ICLImage = ICLTensor;
@@ -48,6 +51,12 @@ class CLEqualizeHistogram : public IFunction
public:
/** Default Constructor. */
CLEqualizeHistogram();
+ /** Prevent instances of this class from being copied */
+ CLEqualizeHistogram(const CLEqualizeHistogram &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLEqualizeHistogram &operator=(const CLEqualizeHistogram &) = delete;
+ /** Default destructor */
+ ~CLEqualizeHistogram();
/** Initialise the kernel's inputs.
*
* @param[in] input Input image. Data types supported: U8.
@@ -66,14 +75,14 @@ public:
void run() override;
private:
- CLHistogramKernel _histogram_kernel; /**< Kernel that calculates the histogram of input. */
- CLHistogramBorderKernel _border_histogram_kernel; /**< Kernel that calculates the histogram on the borders. */
- CLTableLookupKernel _map_histogram_kernel; /**< Kernel that maps the input to output using the lut. */
- CLDistribution1D _hist; /**< Distribution that holds the histogram of the input image. */
- CLDistribution1D _cum_dist; /**< Distribution that holds the cummulative distribution of the input histogram. */
- CLLut _cd_lut; /**< Holds the equalization lookuptable. */
- static const uint32_t max_range = 256; /**< Histogram range of the internal histograms. */
- static const uint32_t nr_bins = 256; /**< Histogram bins of the internal histograms. */
+ std::unique_ptr<CLHistogramKernel> _histogram_kernel; /**< Kernel that calculates the histogram of input. */
+ std::unique_ptr<CLHistogramBorderKernel> _border_histogram_kernel; /**< Kernel that calculates the histogram on the borders. */
+ std::unique_ptr<CLTableLookupKernel> _map_histogram_kernel; /**< Kernel that maps the input to output using the lut. */
+ CLDistribution1D _hist; /**< Distribution that holds the histogram of the input image. */
+ CLDistribution1D _cum_dist; /**< Distribution that holds the cummulative distribution of the input histogram. */
+ CLLut _cd_lut; /**< Holds the equalization lookuptable. */
+ static const uint32_t max_range = 256; /**< Histogram range of the internal histograms. */
+ static const uint32_t nr_bins = 256; /**< Histogram bins of the internal histograms. */
};
}
#endif /*ARM_COMPUTE_CLEQUALIZEHISTOGRAM_H */
diff --git a/arm_compute/runtime/CL/functions/CLErode.h b/arm_compute/runtime/CL/functions/CLErode.h
index f8f1c72bc0..9d799bc91e 100644
--- a/arm_compute/runtime/CL/functions/CLErode.h
+++ b/arm_compute/runtime/CL/functions/CLErode.h
@@ -31,6 +31,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to execute erode. This function calls the following OpenCL kernels:
diff --git a/arm_compute/runtime/CL/functions/CLFFT1D.h b/arm_compute/runtime/CL/functions/CLFFT1D.h
index a6a35ab320..31a2cc6b06 100644
--- a/arm_compute/runtime/CL/functions/CLFFT1D.h
+++ b/arm_compute/runtime/CL/functions/CLFFT1D.h
@@ -26,9 +26,6 @@
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h"
-#include "arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h"
-#include "arm_compute/core/CL/kernels/CLFFTScaleKernel.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/FunctionDescriptors.h"
#include "arm_compute/runtime/MemoryGroup.h"
@@ -36,6 +33,9 @@
namespace arm_compute
{
// Forward declaration
+class CLFFTDigitReverseKernel;
+class CLFFTRadixStageKernel;
+class CLFFTScaleKernel;
class ICLTensor;
/** Basic function to execute one dimensional FFT. This function calls the following OpenCL kernels:
@@ -49,6 +49,12 @@ class CLFFT1D : public IFunction
public:
/** Default Constructor */
CLFFT1D(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied */
+ CLFFT1D(const CLFFT1D &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLFFT1D &operator=(const CLFFT1D &) = delete;
+ /** Default destructor */
+ ~CLFFT1D();
/** Initialise the function's source, destinations and border mode.
*
* @param[in] input Source tensor. Data types supported: F32.
@@ -78,14 +84,14 @@ public:
void run() override;
protected:
- MemoryGroup _memory_group;
- CLFFTDigitReverseKernel _digit_reverse_kernel;
- std::vector<CLFFTRadixStageKernel> _fft_kernels;
- CLFFTScaleKernel _scale_kernel;
- CLTensor _digit_reversed_input;
- CLTensor _digit_reverse_indices;
- unsigned int _num_ffts;
- bool _run_scale;
+ MemoryGroup _memory_group;
+ std::unique_ptr<CLFFTDigitReverseKernel> _digit_reverse_kernel;
+ std::vector<std::unique_ptr<CLFFTRadixStageKernel>> _fft_kernels;
+ std::unique_ptr<CLFFTScaleKernel> _scale_kernel;
+ CLTensor _digit_reversed_input;
+ CLTensor _digit_reverse_indices;
+ unsigned int _num_ffts;
+ bool _run_scale;
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_CLFFT1D_H */
diff --git a/arm_compute/runtime/CL/functions/CLFFT2D.h b/arm_compute/runtime/CL/functions/CLFFT2D.h
index 9ceebeaa32..126944b323 100644
--- a/arm_compute/runtime/CL/functions/CLFFT2D.h
+++ b/arm_compute/runtime/CL/functions/CLFFT2D.h
@@ -46,6 +46,12 @@ class CLFFT2D : public IFunction
public:
/** Default Constructor */
CLFFT2D(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied */
+ CLFFT2D(const CLFFT2D &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLFFT2D &operator=(const CLFFT2D &) = delete;
+ /** Default destructor */
+ ~CLFFT2D();
/** Initialise the function's source, destinations and border mode.
*
* @param[in] input Source tensor. Data types supported: F32.
diff --git a/arm_compute/runtime/CL/functions/CLFastCorners.h b/arm_compute/runtime/CL/functions/CLFastCorners.h
index 698cc67995..e110582c50 100644
--- a/arm_compute/runtime/CL/functions/CLFastCorners.h
+++ b/arm_compute/runtime/CL/functions/CLFastCorners.h
@@ -25,7 +25,6 @@
#define ARM_COMPUTE_CLFASTCORNERS_H
#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/CL/kernels/CLFastCornersKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Window.h"
#include "arm_compute/runtime/CL/CLArray.h"
@@ -40,6 +39,8 @@
namespace arm_compute
{
+class CLFastCornersKernel;
+class CLCopyToArrayKernel;
class ICLTensor;
using ICLImage = ICLTensor;
@@ -59,6 +60,8 @@ public:
CLFastCorners(const CLFastCorners &) = delete;
/** Prevent instances of this class from being copied (As this class contains pointers) */
const CLFastCorners &operator=(const CLFastCorners &) = delete;
+ /** Default destructor */
+ ~CLFastCorners();
/** Initialize the function's source, destination, conv and border_mode.
*
* @param[in] input Source image. Data types supported: U8.
@@ -88,18 +91,18 @@ public:
void run() override;
private:
- MemoryGroup _memory_group;
- CLFastCornersKernel _fast_corners_kernel;
- CLNonMaximaSuppression3x3 _suppr_func;
- CLCopyToArrayKernel _copy_array_kernel;
- CLImage _output;
- CLImage _suppr;
- Window _win;
- bool _non_max;
- unsigned int *_num_corners;
- cl::Buffer _num_buffer;
- ICLKeyPointArray *_corners;
- uint8_t _constant_border_value;
+ MemoryGroup _memory_group;
+ std::unique_ptr<CLFastCornersKernel> _fast_corners_kernel;
+ CLNonMaximaSuppression3x3 _suppr_func;
+ std::unique_ptr<CLCopyToArrayKernel> _copy_array_kernel;
+ CLImage _output;
+ CLImage _suppr;
+ Window _win;
+ bool _non_max;
+ unsigned int *_num_corners;
+ cl::Buffer _num_buffer;
+ ICLKeyPointArray *_corners;
+ uint8_t _constant_border_value;
};
}
#endif /*ARM_COMPUTE_CLFASTCORNERS_H */
diff --git a/arm_compute/runtime/CL/functions/CLFill.h b/arm_compute/runtime/CL/functions/CLFill.h
index b79b234158..fef8324432 100644
--- a/arm_compute/runtime/CL/functions/CLFill.h
+++ b/arm_compute/runtime/CL/functions/CLFill.h
@@ -30,6 +30,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Function to run @ref CLMemsetKernel to fill a tensor with a scalar value */
diff --git a/arm_compute/runtime/CL/functions/CLFillBorder.h b/arm_compute/runtime/CL/functions/CLFillBorder.h
index 18bc20e654..a4ad82dfd4 100644
--- a/arm_compute/runtime/CL/functions/CLFillBorder.h
+++ b/arm_compute/runtime/CL/functions/CLFillBorder.h
@@ -30,6 +30,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to run @ref CLFillBorderKernel */
diff --git a/arm_compute/runtime/CL/functions/CLFlattenLayer.h b/arm_compute/runtime/CL/functions/CLFlattenLayer.h
index b8139c2260..f5f4ff554f 100644
--- a/arm_compute/runtime/CL/functions/CLFlattenLayer.h
+++ b/arm_compute/runtime/CL/functions/CLFlattenLayer.h
@@ -29,7 +29,9 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to execute flatten. This function calls the following OpenCL kernel:
*
diff --git a/arm_compute/runtime/CL/functions/CLFloor.h b/arm_compute/runtime/CL/functions/CLFloor.h
index 93c3639f89..85d7071194 100644
--- a/arm_compute/runtime/CL/functions/CLFloor.h
+++ b/arm_compute/runtime/CL/functions/CLFloor.h
@@ -30,7 +30,9 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLFloorKernel */
class CLFloor : public ICLSimpleFunction
diff --git a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
index 29788742d7..3f17e4a921 100644
--- a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
+++ b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
@@ -26,7 +26,6 @@
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-#include "arm_compute/core/CL/kernels/CLTransposeKernel.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h"
#include "arm_compute/runtime/CL/functions/CLFlattenLayer.h"
diff --git a/arm_compute/runtime/CL/functions/CLFuseBatchNormalization.h b/arm_compute/runtime/CL/functions/CLFuseBatchNormalization.h
index de6d5617c2..e35905fcf1 100644
--- a/arm_compute/runtime/CL/functions/CLFuseBatchNormalization.h
+++ b/arm_compute/runtime/CL/functions/CLFuseBatchNormalization.h
@@ -24,14 +24,18 @@
#ifndef ARM_COMPUTE_CLFUSEBATCHNORMALIZATION_H
#define ARM_COMPUTE_CLFUSEBATCHNORMALIZATION_H
-#include "arm_compute/core/CL/kernels/CLFuseBatchNormalizationKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
+#include <memory>
+
namespace arm_compute
{
// Forward declarations
+class CLCompileContext;
+class CLFuseBatchNormalizationKernel;
class ICLTensor;
+class ITensorInfo;
/** Basic function to fuse the batch normalization node to a preceding convolution node */
class CLFuseBatchNormalization : public IFunction
@@ -48,7 +52,7 @@ public:
/** Allow instances of this class to be moved */
CLFuseBatchNormalization &operator=(CLFuseBatchNormalization &&) = default;
/** Default destructor */
- ~CLFuseBatchNormalization() = default;
+ ~CLFuseBatchNormalization();
/** Set the input and output tensors.
*
* @param[in] input_weights Input weights tensor for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC
@@ -112,7 +116,7 @@ public:
void run() override;
private:
- CLFuseBatchNormalizationKernel _fuse_bn_kernel;
+ std::unique_ptr<CLFuseBatchNormalizationKernel> _fuse_bn_kernel;
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_CLFUSEBATCHNORMALIZATION_H */
diff --git a/arm_compute/runtime/CL/functions/CLGEMM.h b/arm_compute/runtime/CL/functions/CLGEMM.h
index 92f9736e35..0b13e7dbbf 100644
--- a/arm_compute/runtime/CL/functions/CLGEMM.h
+++ b/arm_compute/runtime/CL/functions/CLGEMM.h
@@ -24,11 +24,6 @@
#ifndef ARM_COMPUTE_CLGEMM_H
#define ARM_COMPUTE_CLGEMM_H
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTypes.h"
#include "arm_compute/runtime/IFunction.h"
@@ -36,9 +31,18 @@
#include "arm_compute/runtime/IWeightsManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
+#include <memory>
+
namespace arm_compute
{
+class CLCompileContext;
+class CLGEMMReshapeRHSMatrixKernel;
+class CLGEMMMatrixMultiplyKernel;
+class CLGEMMMatrixMultiplyReshapedKernel;
+class CLGEMMMatrixMultiplyReshapedOnlyRHSKernel;
+class CLGEMMReshapeLHSMatrixKernel;
class ICLTensor;
+class ITensorInfo;
namespace weights_transformations
{
@@ -46,41 +50,36 @@ namespace weights_transformations
class CLGEMMReshapeRHSMatrixKernelManaged : public ITransformWeights
{
public:
+ /** Default constructor */
+ CLGEMMReshapeRHSMatrixKernelManaged();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGEMMReshapeRHSMatrixKernelManaged(const CLGEMMReshapeRHSMatrixKernelManaged &) = delete;
+ /** Default move constructor */
+ CLGEMMReshapeRHSMatrixKernelManaged(CLGEMMReshapeRHSMatrixKernelManaged &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGEMMReshapeRHSMatrixKernelManaged &operator=(const CLGEMMReshapeRHSMatrixKernelManaged &) = delete;
+ /** Default move assignment operator */
+ CLGEMMReshapeRHSMatrixKernelManaged &operator=(CLGEMMReshapeRHSMatrixKernelManaged &&) = default;
+ /** Default desctructor */
+ ~CLGEMMReshapeRHSMatrixKernelManaged();
//Inherited method override
- void run() override
- {
- _output.allocator()->allocate();
- CLScheduler::get().enqueue(_kernel, false);
- _reshape_run = true;
- }
+ void run() override;
//Inherited method override
- void release() override
- {
- _output.allocator()->free();
- }
+ void release() override;
//Inherited method override
- ICLTensor *get_weights() override
- {
- return &_output;
- }
+ ICLTensor *get_weights() override;
//Inherited method override
- uint32_t uid() override
- {
- return _uid;
- }
+ uint32_t uid() override;
/** Configures the @ref CLGEMMReshapeRHSMatrixKernel kernel
*
* @param[in] input Input tensor. Data types supported: All
* @param[in] info RHS matrix information to be used for reshaping.
*/
- void configure(const ICLTensor *input, GEMMRHSMatrixInfo info)
- {
- configure(CLKernelLibrary::get().get_compile_context(), input, info);
- }
+ void configure(const ICLTensor *input, GEMMRHSMatrixInfo info);
/** Configures the @ref CLGEMMReshapeRHSMatrixKernel kernel
*
@@ -88,15 +87,12 @@ public:
* @param[in] input Input tensor. Data types supported: All
* @param[in] info RHS matrix information to be used for reshaping.
*/
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, GEMMRHSMatrixInfo info)
- {
- _kernel.configure(compile_context, input, &_output, info);
- }
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, GEMMRHSMatrixInfo info);
private:
- static constexpr uint32_t _uid = 0x15;
- CLTensor _output{};
- CLGEMMReshapeRHSMatrixKernel _kernel{};
+ static constexpr uint32_t _uid{ 0x15 };
+ CLTensor _output{};
+ std::unique_ptr<CLGEMMReshapeRHSMatrixKernel> _kernel;
};
} // namespace weights_transformations
@@ -126,6 +122,8 @@ public:
CLGEMM &operator=(const CLGEMM &) = delete;
/** Default move assignment operator */
CLGEMM &operator=(CLGEMM &&) = default;
+ /** Default destructor */
+ ~CLGEMM();
/** Initialise the kernel's inputs and output
*
* @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C].
@@ -198,24 +196,24 @@ private:
static Status validate_reshaped(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info);
static Status validate_reshaped_only_rhs(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info);
- MemoryGroup _memory_group;
- IWeightsManager *_weights_manager;
- CLGEMMMatrixMultiplyKernel _mm_kernel;
- CLGEMMReshapeLHSMatrixKernel _reshape_lhs_kernel;
- CLGEMMReshapeRHSMatrixKernel _reshape_rhs_kernel;
- weights_transformations::CLGEMMReshapeRHSMatrixKernelManaged _reshape_rhs_kernel_managed;
- CLGEMMMatrixMultiplyReshapedKernel _mm_reshaped_kernel;
- CLGEMMMatrixMultiplyReshapedOnlyRHSKernel _mm_reshaped_only_rhs_kernel;
- CLGEMMMatrixMultiplyReshapedOnlyRHSKernel _mm_reshaped_only_rhs_fallback_kernel;
- CLTensor _tmp_a;
- CLTensor _tmp_b;
- const ICLTensor *_original_b;
- const ICLTensor *_lhs;
- ICLTensor *_dst;
- bool _reshape_b_only_on_first_run;
- bool _is_prepared;
- bool _has_pad_y;
- CLGEMMKernelType _gemm_kernel_type;
+ MemoryGroup _memory_group;
+ IWeightsManager *_weights_manager;
+ std::unique_ptr<CLGEMMMatrixMultiplyKernel> _mm_kernel;
+ std::unique_ptr<CLGEMMReshapeLHSMatrixKernel> _reshape_lhs_kernel;
+ std::unique_ptr<CLGEMMReshapeRHSMatrixKernel> _reshape_rhs_kernel;
+ std::unique_ptr<weights_transformations::CLGEMMReshapeRHSMatrixKernelManaged> _reshape_rhs_kernel_managed;
+ std::unique_ptr<CLGEMMMatrixMultiplyReshapedKernel> _mm_reshaped_kernel;
+ std::unique_ptr<CLGEMMMatrixMultiplyReshapedOnlyRHSKernel> _mm_reshaped_only_rhs_kernel;
+ std::unique_ptr<CLGEMMMatrixMultiplyReshapedOnlyRHSKernel> _mm_reshaped_only_rhs_fallback_kernel;
+ CLTensor _tmp_a;
+ CLTensor _tmp_b;
+ const ICLTensor *_original_b;
+ const ICLTensor *_lhs;
+ ICLTensor *_dst;
+ bool _reshape_b_only_on_first_run;
+ bool _is_prepared;
+ bool _has_pad_y;
+ CLGEMMKernelType _gemm_kernel_type;
};
} // namespace arm_compute
diff --git a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
index 467045cd86..340ac6e749 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
@@ -26,9 +26,7 @@
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/CL/kernels/CLCol2ImKernel.h"
-#include "arm_compute/core/CL/kernels/CLIm2ColKernel.h"
-#include "arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
@@ -43,6 +41,9 @@
namespace arm_compute
{
+class CLCol2ImKernel;
+class CLIm2ColKernel;
+class CLWeightsReshapeKernel;
class ICLTensor;
/** Function to reshape and transpose the weights. This function calls the following kernels:
@@ -53,6 +54,16 @@ class CLConvolutionLayerReshapeWeights : public IFunction
public:
/** Constructor */
CLConvolutionLayerReshapeWeights();
+ /** Prevent instances of this class from being copied */
+ CLConvolutionLayerReshapeWeights(const CLConvolutionLayerReshapeWeights &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLConvolutionLayerReshapeWeights &operator=(const CLConvolutionLayerReshapeWeights &) = delete;
+ /** Prevent instances of this class to be moved */
+ CLConvolutionLayerReshapeWeights(CLConvolutionLayerReshapeWeights &&) = delete;
+ /** Prevent instances of this class to be moved */
+ CLConvolutionLayerReshapeWeights &operator=(CLConvolutionLayerReshapeWeights &&) = delete;
+ /** Default destructor */
+ ~CLConvolutionLayerReshapeWeights();
/** Set the input and output tensors.
*
* @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
@@ -87,7 +98,7 @@ public:
void run() override;
private:
- CLWeightsReshapeKernel _weights_reshape_kernel;
+ std::unique_ptr<CLWeightsReshapeKernel> _weights_reshape_kernel;
};
namespace weights_transformations
@@ -179,6 +190,8 @@ public:
CLGEMMConvolutionLayer &operator=(const CLGEMMConvolutionLayer &) = delete;
/** Default move assignment operator */
CLGEMMConvolutionLayer &operator=(CLGEMMConvolutionLayer &&) = default;
+ /**Default destructor */
+ ~CLGEMMConvolutionLayer();
/** Set the input and output tensors.
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
@@ -288,10 +301,10 @@ private:
IWeightsManager *_weights_manager;
CLConvolutionLayerReshapeWeights _reshape_weights;
weights_transformations::CLConvolutionLayerReshapeWeightsTransform _reshape_weights_managed;
- CLIm2ColKernel _im2col_kernel;
+ std::unique_ptr<CLIm2ColKernel> _im2col_kernel;
CLGEMM _mm_gemm;
CLGEMMLowpMatrixMultiplyCore _mm_gemmlowp;
- CLCol2ImKernel _col2im_kernel;
+ std::unique_ptr<CLCol2ImKernel> _col2im_kernel;
CLActivationLayer _activationlayer_function;
const ICLTensor *_original_weights;
diff --git a/arm_compute/runtime/CL/functions/CLGEMMDeconvolutionLayer.h b/arm_compute/runtime/CL/functions/CLGEMMDeconvolutionLayer.h
index 1fedeff444..32af0f9427 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMDeconvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMDeconvolutionLayer.h
@@ -24,7 +24,6 @@
#ifndef ARM_COMPUTE_CLGEMMDECONVOLUTIONLAYER_H
#define ARM_COMPUTE_CLGEMMDECONVOLUTIONLAYER_H
-#include "arm_compute/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h"
#include "arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h"
@@ -40,6 +39,7 @@
namespace arm_compute
{
+class CLDeconvolutionReshapeOutputKernel;
class ICLTensor;
/** Function to run the deconvolution layer through a call to GEMM.
*
@@ -89,6 +89,8 @@ public:
CLGEMMDeconvolutionLayer &operator=(const CLGEMMDeconvolutionLayer &) = delete;
/** Default move assignment operator */
CLGEMMDeconvolutionLayer &operator=(CLGEMMDeconvolutionLayer &&) = default;
+ /** Default desctructor */
+ ~CLGEMMDeconvolutionLayer();
/** Set the input, weights, biases and output tensors.
*
* @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs.
@@ -130,15 +132,15 @@ public:
private:
MemoryGroup _memory_group;
- CLGEMM _mm_gemm;
- CLGEMMLowpMatrixMultiplyCore _mm_gemmlowp;
- CLGEMMLowpOutputStage _gemmlowp_output_stage;
- CLPermute _permute_input_to_nhwc;
- CLPermute _permute_weights_to_nhwc;
- CLReshapeLayer _reshape_weights;
- CLTranspose _transpose_weights;
- CLDeconvolutionReshapeOutputKernel _deconv_reshape;
- CLSlice _slice_gemm;
+ CLGEMM _mm_gemm;
+ CLGEMMLowpMatrixMultiplyCore _mm_gemmlowp;
+ CLGEMMLowpOutputStage _gemmlowp_output_stage;
+ CLPermute _permute_input_to_nhwc;
+ CLPermute _permute_weights_to_nhwc;
+ CLReshapeLayer _reshape_weights;
+ CLTranspose _transpose_weights;
+ std::unique_ptr<CLDeconvolutionReshapeOutputKernel> _deconv_reshape;
+ CLSlice _slice_gemm;
CLTensor _gemmlowp_final;
CLTensor _reshaped_weights;
diff --git a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
index 57b1e30df5..4cc8899690 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
@@ -24,21 +24,24 @@
#ifndef ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYCORE_H
#define ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYCORE_H
-#include "arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/MemoryGroup.h"
namespace arm_compute
{
+class CLCompileContext;
class IMemoryManager;
class ICLTensor;
+class ITensorInfo;
+class CLDepthConvertLayerKernel;
+class CLGEMMLowpMatrixMultiplyNativeKernel;
+class CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel;
+class CLGEMMLowpOffsetContributionKernel;
+class CLGEMMLowpOffsetContributionOutputStageKernel;
+class CLGEMMLowpMatrixAReductionKernel;
+class CLGEMMLowpMatrixBReductionKernel;
+class CLGEMMReshapeRHSMatrixKernel;
/** Basic function to execute GEMMLowpMatrixMultiplyCore on OpenCL. */
class CLGEMMLowpMatrixMultiplyCore : public IFunction
@@ -54,6 +57,8 @@ public:
CLGEMMLowpMatrixMultiplyCore &operator=(const CLGEMMLowpMatrixMultiplyCore &) = delete;
/** Default move assignment operator */
CLGEMMLowpMatrixMultiplyCore &operator=(CLGEMMLowpMatrixMultiplyCore &&) = default;
+ /** Default destructor */
+ ~CLGEMMLowpMatrixMultiplyCore();
/** Initialise the kernel's inputs, output
*
* @note GEMMLowp: low precision GEMM kernel. [A * B + C]
@@ -112,14 +117,14 @@ private:
MemoryGroup _memory_group;
// Kernels used
- CLDepthConvertLayerKernel _weights_to_qasymm8;
- CLGEMMLowpMatrixMultiplyNativeKernel _mm_native_kernel;
- CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel _mm_reshaped_only_rhs_kernel;
- CLGEMMReshapeRHSMatrixKernel _mtx_b_reshape_kernel;
- CLGEMMLowpMatrixAReductionKernel _mtx_a_reduction_kernel;
- CLGEMMLowpMatrixBReductionKernel _mtx_b_reduction_kernel;
- CLGEMMLowpOffsetContributionKernel _offset_contribution_kernel;
- CLGEMMLowpOffsetContributionOutputStageKernel _offset_contribution_output_stage_kernel;
+ std::unique_ptr<CLDepthConvertLayerKernel> _weights_to_qasymm8;
+ std::unique_ptr<CLGEMMLowpMatrixMultiplyNativeKernel> _mm_native_kernel;
+ std::unique_ptr<CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel> _mm_reshaped_only_rhs_kernel;
+ std::unique_ptr<CLGEMMReshapeRHSMatrixKernel> _mtx_b_reshape_kernel;
+ std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _mtx_a_reduction_kernel;
+ std::unique_ptr<CLGEMMLowpMatrixBReductionKernel> _mtx_b_reduction_kernel;
+ std::unique_ptr<CLGEMMLowpOffsetContributionKernel> _offset_contribution_kernel;
+ std::unique_ptr<CLGEMMLowpOffsetContributionOutputStageKernel> _offset_contribution_output_stage_kernel;
// Temporary tensors
CLTensor _qasymm8_weights;
diff --git a/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h b/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h
index 44c52ffb79..a4edab9b8f 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h
@@ -24,8 +24,11 @@
#ifndef ARM_COMPUTE_CLGEMMLOWPOUTPUTSTAGE_H
#define ARM_COMPUTE_CLGEMMLOWPOUTPUTSTAGE_H
+#include "arm_compute/core/Error.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+#include <limits>
+
/** This file contains all available output stages for GEMMLowp on OpenCL.
*
* In gemmlowp, the "output stage" is the process that takes a final int32 accumulator value (the output of @ref CLGEMMLowpMatrixMultiplyCore),
@@ -36,7 +39,11 @@
namespace arm_compute
{
+class CLCompileContext;
class ITensor;
+class ICLTensor;
+class ITensorInfo;
+struct GEMMLowpOutputStageInfo;
/** Basic function to execute CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint on OpenCL.
*
diff --git a/arm_compute/runtime/CL/functions/CLGather.h b/arm_compute/runtime/CL/functions/CLGather.h
index e87a120ba1..9c659be6fc 100644
--- a/arm_compute/runtime/CL/functions/CLGather.h
+++ b/arm_compute/runtime/CL/functions/CLGather.h
@@ -25,11 +25,14 @@
#ifndef ARM_COMPUTE_CLGATHER_H
#define ARM_COMPUTE_CLGATHER_H
+#include "arm_compute/core/Error.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLGatherKernel */
class CLGather : public ICLSimpleFunction
diff --git a/arm_compute/runtime/CL/functions/CLGaussian3x3.h b/arm_compute/runtime/CL/functions/CLGaussian3x3.h
index 9fe3e9bb00..286a17618b 100644
--- a/arm_compute/runtime/CL/functions/CLGaussian3x3.h
+++ b/arm_compute/runtime/CL/functions/CLGaussian3x3.h
@@ -31,6 +31,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to execute gaussian filter 3x3. This function calls the following OpenCL kernels:
diff --git a/arm_compute/runtime/CL/functions/CLGaussian5x5.h b/arm_compute/runtime/CL/functions/CLGaussian5x5.h
index fb369d750b..cf5b79eaac 100644
--- a/arm_compute/runtime/CL/functions/CLGaussian5x5.h
+++ b/arm_compute/runtime/CL/functions/CLGaussian5x5.h
@@ -24,8 +24,6 @@
#ifndef ARM_COMPUTE_CLGAUSSIAN5X5_H
#define ARM_COMPUTE_CLGAUSSIAN5X5_H
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
-#include "arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/IFunction.h"
@@ -37,6 +35,10 @@
namespace arm_compute
{
+class CLCompileContext;
+class CLFillBorderKernel;
+class CLGaussian5x5HorKernel;
+class CLGaussian5x5VertKernel;
class ICLTensor;
/** Basic function to execute gaussian filter 5x5. This function calls the following OpenCL kernels:
@@ -54,6 +56,16 @@ public:
* @param[in] memory_manager (Optional) Memory manager.
*/
CLGaussian5x5(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied */
+ CLGaussian5x5(const CLGaussian5x5 &) = delete;
+ /** Default move constructor */
+ CLGaussian5x5(CLGaussian5x5 &&) = default;
+ /** Prevent instances of this class from being copied */
+ CLGaussian5x5 &operator=(const CLGaussian5x5 &) = delete;
+ /** Default move assignment operator */
+ CLGaussian5x5 &operator=(CLGaussian5x5 &&) = default;
+ /** Default destructor */
+ ~CLGaussian5x5();
/** Initialise the function's source, destinations and border mode.
*
* @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
@@ -76,11 +88,11 @@ public:
void run() override;
protected:
- MemoryGroup _memory_group; /**< Function's memory group */
- CLGaussian5x5HorKernel _kernel_hor; /**< Horizontal pass kernel */
- CLGaussian5x5VertKernel _kernel_vert; /**< Vertical pass kernel */
- CLFillBorderKernel _border_handler; /**< Kernel to handle image borders */
- CLImage _tmp; /**< Temporary buffer */
+ MemoryGroup _memory_group; /**< Function's memory group */
+ std::unique_ptr<CLGaussian5x5HorKernel> _kernel_hor; /**< Horizontal pass kernel */
+ std::unique_ptr<CLGaussian5x5VertKernel> _kernel_vert; /**< Vertical pass kernel */
+ std::unique_ptr<CLFillBorderKernel> _border_handler; /**< Kernel to handle image borders */
+ CLImage _tmp; /**< Temporary buffer */
};
}
#endif /*ARM_COMPUTE_CLGAUSSIAN5X5_H */
diff --git a/arm_compute/runtime/CL/functions/CLGaussianPyramid.h b/arm_compute/runtime/CL/functions/CLGaussianPyramid.h
index 70f324be11..b18e5f98f0 100644
--- a/arm_compute/runtime/CL/functions/CLGaussianPyramid.h
+++ b/arm_compute/runtime/CL/functions/CLGaussianPyramid.h
@@ -24,9 +24,6 @@
#ifndef ARM_COMPUTE_CLGAUSSIANPYRAMID_H
#define ARM_COMPUTE_CLGAUSSIANPYRAMID_H
-#include "arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h"
-
-#include "arm_compute/core/CL/kernels/CLScaleKernel.h"
#include "arm_compute/core/IPyramid.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLPyramid.h"
@@ -38,7 +35,12 @@
namespace arm_compute
{
+class CLCompileContext;
+class CLFillBorderKernel;
class ICLTensor;
+class CLGaussianPyramidHorKernel;
+class CLGaussianPyramidVertKernel;
+class CLScaleKernel;
/** Common interface for all Gaussian pyramid functions */
class CLGaussianPyramid : public IFunction
@@ -55,7 +57,7 @@ public:
/** Allow instances of this class to be moved */
CLGaussianPyramid &operator=(CLGaussianPyramid &&) = default;
/** Default destructor */
- virtual ~CLGaussianPyramid() = default;
+ ~CLGaussianPyramid();
/** Initialise the function's source, destinations and border mode.
*
* @param[in, out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
@@ -93,6 +95,12 @@ class CLGaussianPyramidHalf : public CLGaussianPyramid
public:
/** Constructor */
CLGaussianPyramidHalf();
+ /** Prevent instances of this class from being copied */
+ CLGaussianPyramidHalf(const CLGaussianPyramidHalf &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLGaussianPyramidHalf &operator=(const CLGaussianPyramidHalf &) = delete;
+ /** Default destructor */
+ ~CLGaussianPyramidHalf();
// Inherited methods overridden:
void configure(ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override;
@@ -100,10 +108,10 @@ public:
void run() override;
private:
- std::vector<CLFillBorderKernel> _horizontal_border_handler;
- std::vector<CLFillBorderKernel> _vertical_border_handler;
- std::vector<CLGaussianPyramidHorKernel> _horizontal_reduction;
- std::vector<CLGaussianPyramidVertKernel> _vertical_reduction;
+ std::vector<std::unique_ptr<CLFillBorderKernel>> _horizontal_border_handler;
+ std::vector<std::unique_ptr<CLFillBorderKernel>> _vertical_border_handler;
+ std::vector<std::unique_ptr<CLGaussianPyramidHorKernel>> _horizontal_reduction;
+ std::vector<std::unique_ptr<CLGaussianPyramidVertKernel>> _vertical_reduction;
};
/** Basic function to execute gaussian pyramid with ORB scale factor. This function calls the following OpenCL kernels and functions:
@@ -124,8 +132,8 @@ public:
void run() override;
private:
- std::vector<CLGaussian5x5> _gauss5x5;
- std::vector<CLScaleKernel> _scale_nearest;
+ std::vector<CLGaussian5x5> _gauss5x5;
+ std::vector<std::unique_ptr<CLScaleKernel>> _scale_nearest;
};
}
#endif /*ARM_COMPUTE_CLGAUSSIANPYRAMID_H */
diff --git a/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h b/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h
index 6d5f2e5d71..0fb9a06c84 100644
--- a/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h
+++ b/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h
@@ -23,12 +23,7 @@
*/
#ifndef ARM_COMPUTE_CLGENERATEPROPOSALSLAYER_H
#define ARM_COMPUTE_CLGENERATEPROPOSALSLAYER_H
-#include "arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h"
-#include "arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLGenerateProposalsLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLPadLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLPermuteKernel.h"
-#include "arm_compute/core/CL/kernels/CLQuantizationLayerKernel.h"
+
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/CL/CLTensor.h"
@@ -38,9 +33,19 @@
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/MemoryGroup.h"
+#include <memory>
+
namespace arm_compute
{
+class CLCompileContext;
+class CLBoundingBoxTransformKernel;
+class CLDequantizationLayerKernel;
+class CLComputeAllAnchorsKernel;
+class CLPadLayerKernel;
+class CLPermuteKernel;
+class CLQuantizationLayerKernel;
class ICLTensor;
+class ITensorInfo;
/** Basic function to generate proposals for a RPN (Region Proposal Network)
*
@@ -67,6 +72,8 @@ public:
CLGenerateProposalsLayer(const CLGenerateProposalsLayer &) = delete;
/** Prevent instances of this class from being copied (As this class contains pointers) */
CLGenerateProposalsLayer &operator=(const CLGenerateProposalsLayer &) = delete;
+ /** Default destructor */
+ ~CLGenerateProposalsLayer();
/** Set the input and output tensors.
*
@@ -130,16 +137,16 @@ private:
MemoryGroup _memory_group;
// OpenCL kernels
- CLPermuteKernel _permute_deltas_kernel;
- CLReshapeLayer _flatten_deltas;
- CLPermuteKernel _permute_scores_kernel;
- CLReshapeLayer _flatten_scores;
- CLComputeAllAnchorsKernel _compute_anchors_kernel;
- CLBoundingBoxTransformKernel _bounding_box_kernel;
- CLPadLayerKernel _pad_kernel;
- CLDequantizationLayerKernel _dequantize_anchors;
- CLDequantizationLayerKernel _dequantize_deltas;
- CLQuantizationLayerKernel _quantize_all_proposals;
+ std::unique_ptr<CLPermuteKernel> _permute_deltas_kernel;
+ CLReshapeLayer _flatten_deltas;
+ std::unique_ptr<CLPermuteKernel> _permute_scores_kernel;
+ CLReshapeLayer _flatten_scores;
+ std::unique_ptr<CLComputeAllAnchorsKernel> _compute_anchors_kernel;
+ std::unique_ptr<CLBoundingBoxTransformKernel> _bounding_box_kernel;
+ std::unique_ptr<CLPadLayerKernel> _pad_kernel;
+ std::unique_ptr<CLDequantizationLayerKernel> _dequantize_anchors;
+ std::unique_ptr<CLDequantizationLayerKernel> _dequantize_deltas;
+ std::unique_ptr<CLQuantizationLayerKernel> _quantize_all_proposals;
// CPP functions
CPPBoxWithNonMaximaSuppressionLimit _cpp_nms;
diff --git a/arm_compute/runtime/CL/functions/CLHOGDescriptor.h b/arm_compute/runtime/CL/functions/CLHOGDescriptor.h
index dad7e6edf8..fa37b3c84e 100644
--- a/arm_compute/runtime/CL/functions/CLHOGDescriptor.h
+++ b/arm_compute/runtime/CL/functions/CLHOGDescriptor.h
@@ -24,7 +24,6 @@
#ifndef ARM_COMPUTE_CLHOGDESCRIPTOR_H
#define ARM_COMPUTE_CLHOGDESCRIPTOR_H
-#include "arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLHOGGradient.h"
@@ -37,6 +36,8 @@
namespace arm_compute
{
class IHOG;
+class CLHOGOrientationBinningKernel;
+class CLHOGBlockNormalizationKernel;
/** Basic function to calculate HOG descriptor. This function calls the following OpenCL kernels:
*
* -# @ref CLHOGGradient
@@ -49,6 +50,12 @@ class CLHOGDescriptor : public IFunction
public:
/** Default constructor */
CLHOGDescriptor(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied */
+ CLHOGDescriptor(const CLHOGDescriptor &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLHOGDescriptor &operator=(const CLHOGDescriptor &) = delete;
+ /** Default destructor */
+ ~CLHOGDescriptor();
/** Initialise the function's source, destination, HOG data-object and border mode
*
* @param[in, out] input Input tensor. Data type supported: U8
@@ -75,13 +82,13 @@ public:
void run() override;
private:
- MemoryGroup _memory_group;
- CLHOGGradient _gradient;
- CLHOGOrientationBinningKernel _orient_bin;
- CLHOGBlockNormalizationKernel _block_norm;
- CLTensor _mag;
- CLTensor _phase;
- CLTensor _hog_space;
+ MemoryGroup _memory_group;
+ CLHOGGradient _gradient;
+ std::unique_ptr<CLHOGOrientationBinningKernel> _orient_bin;
+ std::unique_ptr<CLHOGBlockNormalizationKernel> _block_norm;
+ CLTensor _mag;
+ CLTensor _phase;
+ CLTensor _hog_space;
};
}
diff --git a/arm_compute/runtime/CL/functions/CLHOGDetector.h b/arm_compute/runtime/CL/functions/CLHOGDetector.h
index 6697b5c24d..edc5b652d3 100644
--- a/arm_compute/runtime/CL/functions/CLHOGDetector.h
+++ b/arm_compute/runtime/CL/functions/CLHOGDetector.h
@@ -24,13 +24,20 @@
#ifndef ARM_COMPUTE_CLHOGDETECTOR_H
#define ARM_COMPUTE_CLHOGDETECTOR_H
+#include "arm_compute/core/CL/ICLArray.h"
#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/CL/kernels/CLHOGDetectorKernel.h"
#include "arm_compute/core/IHOG.h"
#include "arm_compute/runtime/IFunction.h"
+#include <memory>
+
namespace arm_compute
{
+class CLCompileContext;
+class CLHOGDetectorKernel;
+class ICLTensor;
+class ICLHOG;
+
/** Basic function to execute HOG detector based on linear SVM. This function calls the following OpenCL kernel:
*
* -# @ref CLHOGDetectorKernel
@@ -50,7 +57,7 @@ public:
/** Allow instances of this class to be moved */
CLHOGDetector &operator=(CLHOGDetector &&) = default;
/** Default destructor */
- ~CLHOGDetector() = default;
+ ~CLHOGDetector();
/** Initialise the kernel's input, output, HOG data object, detection window stride, threshold and index class
*
* @attention The function does not reset the number of values in @ref IDetectionWindowArray so it is caller's responsibility to clear it.
@@ -78,16 +85,16 @@ public:
* @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to
*/
void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, const Size2D &detection_window_stride,
- float threshold = 0.0f,
+ float threshold = 0.0f,
size_t idx_class = 0);
// Inherited methods overridden:
void run() override;
private:
- CLHOGDetectorKernel _hog_detector_kernel;
- ICLDetectionWindowArray *_detection_windows;
- cl::Buffer _num_detection_windows;
+ std::unique_ptr<CLHOGDetectorKernel> _hog_detector_kernel;
+ ICLDetectionWindowArray *_detection_windows;
+ cl::Buffer _num_detection_windows;
};
}
diff --git a/arm_compute/runtime/CL/functions/CLHOGGradient.h b/arm_compute/runtime/CL/functions/CLHOGGradient.h
index b0589027e7..39d26fb110 100644
--- a/arm_compute/runtime/CL/functions/CLHOGGradient.h
+++ b/arm_compute/runtime/CL/functions/CLHOGGradient.h
@@ -24,9 +24,6 @@
#ifndef ARM_COMPUTE_CLHOGGRADIENT_H
#define ARM_COMPUTE_CLHOGGRADIENT_H
-#include "arm_compute/core/CL/ICLKernel.h"
-
-#include "arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLDerivative.h"
@@ -39,6 +36,9 @@
namespace arm_compute
{
+class CLCompileContext;
+class CLMagnitudePhaseKernel;
+class ITensorInfo;
/** Basic function to calculate the gradient for HOG. This function calls the following OpenCL kernels:
*
* -# @ref CLDerivative
@@ -79,11 +79,11 @@ public:
void run() override;
private:
- MemoryGroup _memory_group;
- CLDerivative _derivative;
- CLMagnitudePhaseKernel _mag_phase;
- CLTensor _gx;
- CLTensor _gy;
+ MemoryGroup _memory_group;
+ CLDerivative _derivative;
+ std::unique_ptr<CLMagnitudePhaseKernel> _mag_phase;
+ CLTensor _gx;
+ CLTensor _gy;
};
}
#endif /*ARM_COMPUTE_CLHOGGRADIENT_H */
diff --git a/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h b/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h
index e7631c2c5a..2a2c9a0a5c 100644
--- a/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h
+++ b/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h
@@ -26,7 +26,6 @@
#include "arm_compute/core/CL/ICLArray.h"
#include "arm_compute/core/CL/ICLMultiHOG.h"
-#include "arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h"
#include "arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLHOGDetector.h"
@@ -39,6 +38,9 @@
namespace arm_compute
{
+class CLCompileContext;
+class CLHOGOrientationBinningKernel;
+class CLHOGBlockNormalizationKernel;
/** Basic function to detect multiple objects (or the same object at different scales) on the same input image using HOG. This function calls the following kernels:
*
* -# @ref CLHOGGradient
@@ -62,6 +64,8 @@ public:
CLHOGMultiDetection(const CLHOGMultiDetection &) = delete;
/** Prevent instances of this class from being copied (As this class contains pointers) */
CLHOGMultiDetection &operator=(const CLHOGMultiDetection &) = delete;
+ /** Default destructor */
+ ~CLHOGMultiDetection();
/** Initialise the function's source, destination, detection window strides, border mode, threshold and non-maxima suppression
*
* @param[in, out] input Input tensor. Data type supported: U8
@@ -110,21 +114,21 @@ public:
void run() override;
private:
- MemoryGroup _memory_group;
- CLHOGGradient _gradient_kernel;
- std::vector<CLHOGOrientationBinningKernel> _orient_bin_kernel;
- std::vector<CLHOGBlockNormalizationKernel> _block_norm_kernel;
- std::vector<CLHOGDetector> _hog_detect_kernel;
- CPPDetectionWindowNonMaximaSuppressionKernel _non_maxima_kernel;
- std::vector<CLTensor> _hog_space;
- std::vector<CLTensor> _hog_norm_space;
- ICLDetectionWindowArray *_detection_windows;
- CLTensor _mag;
- CLTensor _phase;
- bool _non_maxima_suppression;
- size_t _num_orient_bin_kernel;
- size_t _num_block_norm_kernel;
- size_t _num_hog_detect_kernel;
+ MemoryGroup _memory_group;
+ CLHOGGradient _gradient_kernel;
+ std::vector<std::unique_ptr<CLHOGOrientationBinningKernel>> _orient_bin_kernel;
+ std::vector<std::unique_ptr<CLHOGBlockNormalizationKernel>> _block_norm_kernel;
+ std::vector<CLHOGDetector> _hog_detect_kernel;
+ CPPDetectionWindowNonMaximaSuppressionKernel _non_maxima_kernel;
+ std::vector<CLTensor> _hog_space;
+ std::vector<CLTensor> _hog_norm_space;
+ ICLDetectionWindowArray *_detection_windows;
+ CLTensor _mag;
+ CLTensor _phase;
+ bool _non_maxima_suppression;
+ size_t _num_orient_bin_kernel;
+ size_t _num_block_norm_kernel;
+ size_t _num_hog_detect_kernel;
};
}
diff --git a/arm_compute/runtime/CL/functions/CLHarrisCorners.h b/arm_compute/runtime/CL/functions/CLHarrisCorners.h
index 326a895d39..c9c67f5a28 100644
--- a/arm_compute/runtime/CL/functions/CLHarrisCorners.h
+++ b/arm_compute/runtime/CL/functions/CLHarrisCorners.h
@@ -24,16 +24,13 @@
#ifndef ARM_COMPUTE_CLHARRISCORNERS_H
#define ARM_COMPUTE_CLHARRISCORNERS_H
-#include "arm_compute/runtime/IFunction.h"
-
#include "arm_compute/core/CL/ICLArray.h"
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
-#include "arm_compute/core/CL/kernels/CLHarrisCornersKernel.h"
#include "arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h"
#include "arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h"
+#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
#include <cstdint>
@@ -41,6 +38,9 @@
namespace arm_compute
{
+class CLCompileContext;
+class CLHarrisScoreKernel;
+class CLFillBorderKernel;
class ICLTensor;
using ICLImage = ICLTensor;
@@ -66,6 +66,8 @@ public:
CLHarrisCorners(const CLHarrisCorners &) = delete;
/** Prevent instances of this class from being copied (As this class contains pointers) */
const CLHarrisCorners &operator=(const CLHarrisCorners &) = delete;
+ /** Default destructor */
+ ~CLHarrisCorners();
/** Initialize the function's source, destination, conv and border_mode.
*
* @param[in,out] input Source image. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
@@ -104,21 +106,21 @@ public:
void run() override;
private:
- MemoryGroup _memory_group; /**< Function's memory group */
- std::unique_ptr<IFunction> _sobel; /**< Sobel function */
- CLHarrisScoreKernel _harris_score; /**< Harris score kernel */
- CLNonMaximaSuppression3x3 _non_max_suppr; /**< Non-maxima suppression function */
- CPPCornerCandidatesKernel _candidates; /**< Sort kernel */
- CPPSortEuclideanDistanceKernel _sort_euclidean; /**< Euclidean distance kernel */
- CLFillBorderKernel _border_gx; /**< Border handler before running harris score */
- CLFillBorderKernel _border_gy; /**< Border handler before running harris score */
- CLImage _gx; /**< Source image - Gx component */
- CLImage _gy; /**< Source image - Gy component */
- CLImage _score; /**< Source image - Harris score */
- CLImage _nonmax; /**< Source image - Non-Maxima suppressed image */
- std::vector<InternalKeypoint> _corners_list; /**< Array of InternalKeypoint. It stores the potential corner candidates */
- int32_t _num_corner_candidates; /**< Number of potential corner candidates */
- ICLKeyPointArray *_corners; /**< Output corners array */
+ MemoryGroup _memory_group; /**< Function's memory group */
+ std::unique_ptr<IFunction> _sobel; /**< Sobel function */
+ std::unique_ptr<CLHarrisScoreKernel> _harris_score; /**< Harris score kernel */
+ CLNonMaximaSuppression3x3 _non_max_suppr; /**< Non-maxima suppression function */
+ CPPCornerCandidatesKernel _candidates; /**< Sort kernel */
+ CPPSortEuclideanDistanceKernel _sort_euclidean; /**< Euclidean distance kernel */
+ std::unique_ptr<CLFillBorderKernel> _border_gx; /**< Border handler before running harris score */
+ std::unique_ptr<CLFillBorderKernel> _border_gy; /**< Border handler before running harris score */
+ CLImage _gx; /**< Source image - Gx component */
+ CLImage _gy; /**< Source image - Gy component */
+ CLImage _score; /**< Source image - Harris score */
+ CLImage _nonmax; /**< Source image - Non-Maxima suppressed image */
+ std::vector<InternalKeypoint> _corners_list; /**< Array of InternalKeypoint. It stores the potential corner candidates */
+ int32_t _num_corner_candidates; /**< Number of potential corner candidates */
+ ICLKeyPointArray *_corners; /**< Output corners array */
};
}
#endif /*ARM_COMPUTE_CLHARRISCORNERS_H */
diff --git a/arm_compute/runtime/CL/functions/CLHistogram.h b/arm_compute/runtime/CL/functions/CLHistogram.h
index 7fdb8a9022..164bd0a28a 100644
--- a/arm_compute/runtime/CL/functions/CLHistogram.h
+++ b/arm_compute/runtime/CL/functions/CLHistogram.h
@@ -24,8 +24,8 @@
#ifndef ARM_COMPUTE_CLHISTOGRAM_H
#define ARM_COMPUTE_CLHISTOGRAM_H
-#include "arm_compute/core/CL/kernels/CLHistogramKernel.h"
#include "arm_compute/runtime/IFunction.h"
+#include "src/core/CL/kernels/CLHistogramKernel.h"
namespace arm_compute
{
diff --git a/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayer.h
index d7aa11cbc8..d41f3fedf6 100644
--- a/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayer.h
@@ -24,11 +24,14 @@
#ifndef ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYER_H
#define ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYER_H
+#include "arm_compute/core/Error.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to perform a Instance normalization.
*
diff --git a/arm_compute/runtime/CL/functions/CLIntegralImage.h b/arm_compute/runtime/CL/functions/CLIntegralImage.h
index 6b10ede650..0ecdbde8fe 100644
--- a/arm_compute/runtime/CL/functions/CLIntegralImage.h
+++ b/arm_compute/runtime/CL/functions/CLIntegralImage.h
@@ -24,11 +24,15 @@
#ifndef ARM_COMPUTE_CLINTEGRALIMAGE_H
#define ARM_COMPUTE_CLINTEGRALIMAGE_H
-#include "arm_compute/core/CL/kernels/CLIntegralImageKernel.h"
#include "arm_compute/runtime/IFunction.h"
+#include <memory>
+
namespace arm_compute
{
+class CLCompileContext;
+class CLIntegralImageHorKernel;
+class CLIntegralImageVertKernel;
class ICLTensor;
/** Basic function to execute integral image. This function calls the following OpenCL kernels:
@@ -42,6 +46,12 @@ class CLIntegralImage : public IFunction
public:
/** Default Constructor. */
CLIntegralImage();
+ /** Prevent instances of this class from being copied */
+ CLIntegralImage(const CLIntegralImage &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLIntegralImage &operator=(const CLIntegralImage &) = delete;
+ /** Default destructor */
+ ~CLIntegralImage();
/** Initialise the function's source, destinations and border mode.
*
* @param[in] input Source tensor. Data types supported: U8.
@@ -60,8 +70,8 @@ public:
void run() override;
protected:
- CLIntegralImageHorKernel _integral_hor; /**< Integral Image Horizontal kernel */
- CLIntegralImageVertKernel _integral_vert; /**< Integral Image Vertical kernel */
+ std::unique_ptr<CLIntegralImageHorKernel> _integral_hor; /**< Integral Image Horizontal kernel */
+ std::unique_ptr<CLIntegralImageVertKernel> _integral_vert; /**< Integral Image Vertical kernel */
};
}
#endif /*ARM_COMPUTE_CLINTEGRALIMAGE_H */
diff --git a/arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h b/arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h
index bc79101d9d..401d249eb4 100644
--- a/arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h
+++ b/arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h
@@ -24,7 +24,6 @@
#ifndef ARM_COMPUTE_CLL2NORMALIZELAYER_H
#define ARM_COMPUTE_CLL2NORMALIZELAYER_H
-#include "arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
@@ -37,7 +36,10 @@
namespace arm_compute
{
+class CLCompileContext;
+class CLL2NormalizeLayerKernel;
class ICLTensor;
+class ITensorInfo;
/** Basic function to perform a L2 normalization on a given axis.
*
@@ -50,6 +52,16 @@ class CLL2NormalizeLayer : public IFunction
public:
/** Constructor */
CLL2NormalizeLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Default Destructor */
+ ~CLL2NormalizeLayer();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLL2NormalizeLayer(const CLL2NormalizeLayer &) = delete;
+ /** Default move constructor */
+ CLL2NormalizeLayer(CLL2NormalizeLayer &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLL2NormalizeLayer &operator=(const CLL2NormalizeLayer &) = delete;
+ /** Default move assignment operator */
+ CLL2NormalizeLayer &operator=(CLL2NormalizeLayer &&) = default;
/** Set the input and output tensors.
*
@@ -84,10 +96,10 @@ public:
void run() override;
private:
- MemoryGroup _memory_group;
- CLReductionOperation _reduce_func;
- CLL2NormalizeLayerKernel _normalize_kernel;
- CLTensor _sumsq;
+ MemoryGroup _memory_group;
+ CLReductionOperation _reduce_func;
+ std::unique_ptr<CLL2NormalizeLayerKernel> _normalize_kernel;
+ CLTensor _sumsq;
};
}
#endif /*ARM_COMPUTE_CLL2NORMALIZELAYER_H */
diff --git a/arm_compute/runtime/CL/functions/CLLSTMLayer.h b/arm_compute/runtime/CL/functions/CLLSTMLayer.h
index 1a8b33463d..017f26aa1e 100644
--- a/arm_compute/runtime/CL/functions/CLLSTMLayer.h
+++ b/arm_compute/runtime/CL/functions/CLLSTMLayer.h
@@ -26,8 +26,6 @@
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/CL/kernels/CLCopyKernel.h"
-#include "arm_compute/core/CL/kernels/CLMemsetKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
@@ -45,6 +43,10 @@
namespace arm_compute
{
+class CLCompileContext;
+class CLCopyKernel;
+class CLMemsetKernel;
+class CLTransposeKernel;
class ICLTensor;
/** This function performs a single time step in a Long Short-Term Memory (LSTM) layer.
@@ -55,6 +57,16 @@ class CLLSTMLayer : public IFunction
public:
/** Default constructor */
CLLSTMLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied */
+ CLLSTMLayer(const CLLSTMLayer &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLLSTMLayer &operator=(const CLLSTMLayer &) = delete;
+ /** Prevent instances of this class to be moved */
+ CLLSTMLayer(CLLSTMLayer &&) = delete;
+ /** Prevent instances of this class to be moved */
+ CLLSTMLayer &operator=(CLLSTMLayer &&) = delete;
+ /** Default destructor */
+ ~CLLSTMLayer();
/** Initialize function's tensors.
*
* @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: F16/F32.
@@ -200,90 +212,90 @@ public:
void prepare() override;
private:
- MemoryGroup _memory_group;
- CLFullyConnectedLayer _fully_connected_input_gate;
- CLArithmeticAddition _accum_input_gate1;
- CLArithmeticSubtraction _subtract_input_gate;
- CLPixelWiseMultiplication _pixelwise_mul_input_gate;
- CLActivationLayer _activation_input_gate;
- CLFullyConnectedLayer _fully_connected_forget_gate;
- CLArithmeticAddition _accum_forget_gate1;
- CLPixelWiseMultiplication _pixelwise_mul_forget_gate;
- CLActivationLayer _activation_forget_gate;
- CLFullyConnectedLayer _fully_connected_cell_state;
- CLGEMM _gemm_cell_state1;
- CLTransposeKernel _transpose_cell_state;
- CLArithmeticAddition _accum_cell_state1;
- CLArithmeticAddition _accum_cell_state2;
- CLPixelWiseMultiplication _pixelwise_mul_cell_state1;
- CLActivationLayer _activation_cell_state;
- CLActivationLayer _cell_clip;
- CLPixelWiseMultiplication _pixelwise_mul_cell_state2;
- CLFullyConnectedLayer _fully_connected_output;
- CLPixelWiseMultiplication _pixelwise_mul_output_state1;
- CLArithmeticAddition _accum_output1;
- CLActivationLayer _activation_output;
- CLActivationLayer _activation_output_state;
- CLPixelWiseMultiplication _pixelwise_mul_output_state2;
- CLFullyConnectedLayer _fully_connected_output_state;
- CLActivationLayer _projection_clip;
- CLCopyKernel _copy_cell_state;
- CLCopyKernel _copy_output;
- CLConcatenateLayer _concat_scratch_buffer;
- CLConcatenateLayer _concat_inputs_forget_gate;
- CLConcatenateLayer _concat_weights_forget_gate;
- CLConcatenateLayer _concat_weights_input_gate;
- CLConcatenateLayer _concat_weights_output;
- CLMemsetKernel _ones_memset_kernel;
- CLMeanStdDevNormalizationLayer _mean_std_norm_input_gate;
- CLPixelWiseMultiplication _pixelwise_mul_input_gate_coeff;
- CLArithmeticAddition _accum_input_gate_bias;
- CLMeanStdDevNormalizationLayer _mean_std_norm_forget_gate;
- CLPixelWiseMultiplication _pixelwise_mul_forget_gate_coeff;
- CLArithmeticAddition _accum_forget_gate_bias;
- CLMeanStdDevNormalizationLayer _mean_std_norm_cell_gate;
- CLPixelWiseMultiplication _pixelwise_mul_cell_gate_coeff;
- CLArithmeticAddition _accum_cell_gate_bias;
- CLMeanStdDevNormalizationLayer _mean_std_norm_output_gate;
- CLPixelWiseMultiplication _pixelwise_mul_output_gate_coeff;
- CLArithmeticAddition _accum_output_gate_bias;
- CLTensor _input_gate_out1;
- CLTensor _input_gate_out2;
- CLTensor _input_gate_out3;
- CLTensor _input_gate_out4;
- CLTensor _forget_gate_out1;
- CLTensor _forget_gate_out2;
- CLTensor _forget_gate_out3;
- CLTensor _forget_gate_out4;
- CLTensor _forget_gate_out5;
- CLTensor _forget_gate_out6;
- CLTensor _cell_state_out1;
- CLTensor _cell_state_out2;
- CLTensor _cell_state_out3;
- CLTensor _cell_state_out4;
- CLTensor _cell_state_out5;
- CLTensor _output1;
- CLTensor _output2;
- CLTensor _output3;
- CLTensor _output4;
- CLTensor _cell_state_activation;
- CLTensor _output_state1;
- CLTensor _ones;
- CLTensor _input_layer_norm_out1;
- CLTensor _input_layer_norm_out2;
- CLTensor _forget_layer_norm_out1;
- CLTensor _forget_layer_norm_out2;
- CLTensor _cell_layer_norm_out1;
- CLTensor _cell_layer_norm_out2;
- CLTensor _output_layer_norm_out1;
- CLTensor _output_layer_norm_out2;
- bool _run_peephole_opt;
- bool _run_cifg_opt;
- bool _perform_cell_clipping;
- bool _has_projection_weights;
- bool _perform_projection_clipping;
- bool _is_prepared;
- bool _is_layer_norm_lstm;
+ MemoryGroup _memory_group;
+ CLFullyConnectedLayer _fully_connected_input_gate;
+ CLArithmeticAddition _accum_input_gate1;
+ CLArithmeticSubtraction _subtract_input_gate;
+ CLPixelWiseMultiplication _pixelwise_mul_input_gate;
+ CLActivationLayer _activation_input_gate;
+ CLFullyConnectedLayer _fully_connected_forget_gate;
+ CLArithmeticAddition _accum_forget_gate1;
+ CLPixelWiseMultiplication _pixelwise_mul_forget_gate;
+ CLActivationLayer _activation_forget_gate;
+ CLFullyConnectedLayer _fully_connected_cell_state;
+ CLGEMM _gemm_cell_state1;
+ std::unique_ptr<CLTransposeKernel> _transpose_cell_state;
+ CLArithmeticAddition _accum_cell_state1;
+ CLArithmeticAddition _accum_cell_state2;
+ CLPixelWiseMultiplication _pixelwise_mul_cell_state1;
+ CLActivationLayer _activation_cell_state;
+ CLActivationLayer _cell_clip;
+ CLPixelWiseMultiplication _pixelwise_mul_cell_state2;
+ CLFullyConnectedLayer _fully_connected_output;
+ CLPixelWiseMultiplication _pixelwise_mul_output_state1;
+ CLArithmeticAddition _accum_output1;
+ CLActivationLayer _activation_output;
+ CLActivationLayer _activation_output_state;
+ CLPixelWiseMultiplication _pixelwise_mul_output_state2;
+ CLFullyConnectedLayer _fully_connected_output_state;
+ CLActivationLayer _projection_clip;
+ std::unique_ptr<CLCopyKernel> _copy_cell_state;
+ std::unique_ptr<CLCopyKernel> _copy_output;
+ CLConcatenateLayer _concat_scratch_buffer;
+ CLConcatenateLayer _concat_inputs_forget_gate;
+ CLConcatenateLayer _concat_weights_forget_gate;
+ CLConcatenateLayer _concat_weights_input_gate;
+ CLConcatenateLayer _concat_weights_output;
+ std::unique_ptr<CLMemsetKernel> _ones_memset_kernel;
+ CLMeanStdDevNormalizationLayer _mean_std_norm_input_gate;
+ CLPixelWiseMultiplication _pixelwise_mul_input_gate_coeff;
+ CLArithmeticAddition _accum_input_gate_bias;
+ CLMeanStdDevNormalizationLayer _mean_std_norm_forget_gate;
+ CLPixelWiseMultiplication _pixelwise_mul_forget_gate_coeff;
+ CLArithmeticAddition _accum_forget_gate_bias;
+ CLMeanStdDevNormalizationLayer _mean_std_norm_cell_gate;
+ CLPixelWiseMultiplication _pixelwise_mul_cell_gate_coeff;
+ CLArithmeticAddition _accum_cell_gate_bias;
+ CLMeanStdDevNormalizationLayer _mean_std_norm_output_gate;
+ CLPixelWiseMultiplication _pixelwise_mul_output_gate_coeff;
+ CLArithmeticAddition _accum_output_gate_bias;
+ CLTensor _input_gate_out1;
+ CLTensor _input_gate_out2;
+ CLTensor _input_gate_out3;
+ CLTensor _input_gate_out4;
+ CLTensor _forget_gate_out1;
+ CLTensor _forget_gate_out2;
+ CLTensor _forget_gate_out3;
+ CLTensor _forget_gate_out4;
+ CLTensor _forget_gate_out5;
+ CLTensor _forget_gate_out6;
+ CLTensor _cell_state_out1;
+ CLTensor _cell_state_out2;
+ CLTensor _cell_state_out3;
+ CLTensor _cell_state_out4;
+ CLTensor _cell_state_out5;
+ CLTensor _output1;
+ CLTensor _output2;
+ CLTensor _output3;
+ CLTensor _output4;
+ CLTensor _cell_state_activation;
+ CLTensor _output_state1;
+ CLTensor _ones;
+ CLTensor _input_layer_norm_out1;
+ CLTensor _input_layer_norm_out2;
+ CLTensor _forget_layer_norm_out1;
+ CLTensor _forget_layer_norm_out2;
+ CLTensor _cell_layer_norm_out1;
+ CLTensor _cell_layer_norm_out2;
+ CLTensor _output_layer_norm_out1;
+ CLTensor _output_layer_norm_out2;
+ bool _run_peephole_opt;
+ bool _run_cifg_opt;
+ bool _perform_cell_clipping;
+ bool _has_projection_weights;
+ bool _perform_projection_clipping;
+ bool _is_prepared;
+ bool _is_layer_norm_lstm;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLLSTMLAYER_H */
diff --git a/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h
index ba85c6140c..3bbf9f2c30 100644
--- a/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h
+++ b/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h
@@ -26,10 +26,6 @@
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/CL/kernels/CLCol2ImKernel.h"
-#include "arm_compute/core/CL/kernels/CLIm2ColKernel.h"
-#include "arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h"
-#include "arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/IMemoryManager.h"
@@ -39,7 +35,13 @@
namespace arm_compute
{
+class CLCompileContext;
+class CLCol2ImKernel;
+class CLIm2ColKernel;
+class CLWeightsReshapeKernel;
+class CLLocallyConnectedMatrixMultiplyKernel;
class ICLTensor;
+class ITensorInfo;
/** Basic function to compute the locally connected layer. This function calls the following OpenCL kernels:
*
@@ -108,16 +110,16 @@ public:
void prepare() override;
private:
- MemoryGroup _memory_group;
- CLIm2ColKernel _input_im2col_kernel;
- CLWeightsReshapeKernel _weights_reshape_kernel;
- CLLocallyConnectedMatrixMultiplyKernel _mm_kernel;
- CLCol2ImKernel _output_col2im_kernel;
- CLTensor _input_im2col_reshaped;
- CLTensor _weights_reshaped;
- CLTensor _gemm_output;
- bool _is_prepared;
- const ICLTensor *_original_weights;
+ MemoryGroup _memory_group;
+ std::unique_ptr<CLIm2ColKernel> _input_im2col_kernel;
+ std::unique_ptr<CLWeightsReshapeKernel> _weights_reshape_kernel;
+ std::unique_ptr<CLLocallyConnectedMatrixMultiplyKernel> _mm_kernel;
+ std::unique_ptr<CLCol2ImKernel> _output_col2im_kernel;
+ CLTensor _input_im2col_reshaped;
+ CLTensor _weights_reshaped;
+ CLTensor _gemm_output;
+ bool _is_prepared;
+ const ICLTensor *_original_weights;
};
}
#endif /* ARM_COMPUTE_CLLOCALLYCONNECTEDLAYER_H */
diff --git a/arm_compute/runtime/CL/functions/CLMagnitude.h b/arm_compute/runtime/CL/functions/CLMagnitude.h
index ad7cc778e5..6ac141641c 100644
--- a/arm_compute/runtime/CL/functions/CLMagnitude.h
+++ b/arm_compute/runtime/CL/functions/CLMagnitude.h
@@ -29,6 +29,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to run @ref CLMagnitudePhaseKernel. */
diff --git a/arm_compute/runtime/CL/functions/CLMaxUnpoolingLayer.h b/arm_compute/runtime/CL/functions/CLMaxUnpoolingLayer.h
index 5c8548f9e0..693862fb89 100644
--- a/arm_compute/runtime/CL/functions/CLMaxUnpoolingLayer.h
+++ b/arm_compute/runtime/CL/functions/CLMaxUnpoolingLayer.h
@@ -24,14 +24,19 @@
#ifndef ARM_COMPUTE_CLMAXUNPOOLINGLAYER_H
#define ARM_COMPUTE_CLMAXUNPOOLINGLAYER_H
+#include "arm_compute/core/Error.h"
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/CL/kernels/CLMaxUnpoolingLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLMemsetKernel.h"
+#include <memory>
namespace arm_compute
{
-class ITensor;
+class CLCompileContext;
+class ICLTensor;
+class ITensorInfo;
+class CLMaxUnpoolingLayerKernel;
+class CLMemsetKernel;
+struct PoolingLayerInfo;
/** Function to perform MaxUnpooling. This function calls the following OpenCL kernels:
*
@@ -43,6 +48,12 @@ class CLMaxUnpoolingLayer : public IFunction
public:
/** Constructor */
CLMaxUnpoolingLayer();
+ /** Prevent instances of this class from being copied */
+ CLMaxUnpoolingLayer(const CLMaxUnpoolingLayer &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLMaxUnpoolingLayer &operator=(const CLMaxUnpoolingLayer &) = delete;
+ /** Default destructor */
+ ~CLMaxUnpoolingLayer();
/** Set the input and output tensors.
*
* @note Output shape must be equal to the shape of the original input to pool.
@@ -88,8 +99,8 @@ public:
void run() override;
private:
- CLMemsetKernel _memset_kernel;
- CLMaxUnpoolingLayerKernel _unpooling_layer_kernel;
+ std::unique_ptr<CLMemsetKernel> _memset_kernel;
+ std::unique_ptr<CLMaxUnpoolingLayerKernel> _unpooling_layer_kernel;
};
}
#endif /* ARM_COMPUTE_CLMAXUNPOOLINGLAYER_H */
diff --git a/arm_compute/runtime/CL/functions/CLMeanStdDev.h b/arm_compute/runtime/CL/functions/CLMeanStdDev.h
index be192a7c11..d9ced1393e 100644
--- a/arm_compute/runtime/CL/functions/CLMeanStdDev.h
+++ b/arm_compute/runtime/CL/functions/CLMeanStdDev.h
@@ -25,15 +25,20 @@
#define ARM_COMPUTE_CLMEANSTDDEV_H
#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
-#include "arm_compute/core/CL/kernels/CLMeanStdDevKernel.h"
#include "arm_compute/runtime/CL/functions/CLReductionOperation.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
+#include <memory>
+
namespace arm_compute
{
+class CLCompileContext;
+class ICLTensor;
+class ITensorInfo;
+class CLFillBorderKernel;
+class CLMeanStdDevKernel;
/** Basic function to execute mean and standard deviation by calling @ref CLMeanStdDevKernel */
class CLMeanStdDev : public IFunction
{
@@ -49,7 +54,7 @@ public:
/** Allow instances of this class to be moved */
CLMeanStdDev &operator=(CLMeanStdDev &&) = default;
/** Default destructor */
- ~CLMeanStdDev() = default;
+ ~CLMeanStdDev();
/** Initialise the kernel's inputs and outputs.
*
* @param[in, out] input Input image. Data types supported: U8/F16/F32. (Written to only for border filling)
@@ -83,20 +88,20 @@ private:
void run_float();
void run_int();
- MemoryGroup _memory_group; /**< Function's memory group */
- DataType _data_type; /**< Input data type. */
- unsigned int _num_pixels; /**< Number of image's pixels. */
- bool _run_stddev; /**< Flag for knowing if we should run stddev reduction function. */
- CLReductionOperation _reduction_operation_mean; /**< Reduction operation function for computing mean value. */
- CLReductionOperation _reduction_operation_stddev; /**< Reduction operation function for computing standard deviation. */
- CLTensor _reduction_output_mean; /**< Reduction operation output tensor for mean value. */
- CLTensor _reduction_output_stddev; /**< Reduction operation output tensor for standard deviation value. */
- float *_mean; /**< Pointer that holds the mean value. */
- float *_stddev; /**< Pointer that holds the standard deviation value. */
- CLMeanStdDevKernel _mean_stddev_kernel; /**< Kernel that standard deviation calculation. */
- CLFillBorderKernel _fill_border_kernel; /**< Kernel that fills the border with zeroes. */
- cl::Buffer _global_sum; /**< Variable that holds the global sum among calls in order to ease reduction */
- cl::Buffer _global_sum_squared; /**< Variable that holds the global sum of squared values among calls in order to ease reduction */
+ MemoryGroup _memory_group; /**< Function's memory group */
+ DataType _data_type; /**< Input data type. */
+ unsigned int _num_pixels; /**< Number of image's pixels. */
+ bool _run_stddev; /**< Flag for knowing if we should run stddev reduction function. */
+ CLReductionOperation _reduction_operation_mean; /**< Reduction operation function for computing mean value. */
+ CLReductionOperation _reduction_operation_stddev; /**< Reduction operation function for computing standard deviation. */
+ CLTensor _reduction_output_mean; /**< Reduction operation output tensor for mean value. */
+ CLTensor _reduction_output_stddev; /**< Reduction operation output tensor for standard deviation value. */
+ float *_mean; /**< Pointer that holds the mean value. */
+ float *_stddev; /**< Pointer that holds the standard deviation value. */
+ std::unique_ptr<CLMeanStdDevKernel> _mean_stddev_kernel; /**< Kernel that standard deviation calculation. */
+ std::unique_ptr<CLFillBorderKernel> _fill_border_kernel; /**< Kernel that fills the border with zeroes. */
+ cl::Buffer _global_sum; /**< Variable that holds the global sum among calls in order to ease reduction */
+ cl::Buffer _global_sum_squared; /**< Variable that holds the global sum of squared values among calls in order to ease reduction */
};
}
#endif /*ARM_COMPUTE_CLMEANSTDDEV_H */
diff --git a/arm_compute/runtime/CL/functions/CLMeanStdDevNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLMeanStdDevNormalizationLayer.h
index 1627de1ae8..cfe59eac09 100644
--- a/arm_compute/runtime/CL/functions/CLMeanStdDevNormalizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLMeanStdDevNormalizationLayer.h
@@ -29,7 +29,9 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to execute mean and standard deviation normalization by calling @ref CLMeanStdDevNormalizationKernel */
class CLMeanStdDevNormalizationLayer : public ICLSimpleFunction
diff --git a/arm_compute/runtime/CL/functions/CLMedian3x3.h b/arm_compute/runtime/CL/functions/CLMedian3x3.h
index 7f67f958c1..6c0458203e 100644
--- a/arm_compute/runtime/CL/functions/CLMedian3x3.h
+++ b/arm_compute/runtime/CL/functions/CLMedian3x3.h
@@ -31,6 +31,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to execute median filter. This function calls the following OpenCL kernels:
diff --git a/arm_compute/runtime/CL/functions/CLMinMaxLocation.h b/arm_compute/runtime/CL/functions/CLMinMaxLocation.h
index 04926f7bd0..4e3f28b006 100644
--- a/arm_compute/runtime/CL/functions/CLMinMaxLocation.h
+++ b/arm_compute/runtime/CL/functions/CLMinMaxLocation.h
@@ -24,12 +24,16 @@
#ifndef ARM_COMPUTE_CLMINMAXLOCATION_H
#define ARM_COMPUTE_CLMINMAXLOCATION_H
-#include "arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h"
#include "arm_compute/runtime/CL/CLArray.h"
#include "arm_compute/runtime/IFunction.h"
+#include <memory>
+
namespace arm_compute
{
+class CLCompileContext;
+class CLMinMaxKernel;
+class CLMinMaxLocationKernel;
class ICLTensor;
using ICLImage = ICLTensor;
@@ -51,6 +55,8 @@ public:
CLMinMaxLocation(CLMinMaxLocation &&) = default;
/** Allow instances of this class to be moved */
CLMinMaxLocation &operator=(CLMinMaxLocation &&) = default;
+ /** Default destructor */
+ ~CLMinMaxLocation();
/** Initialise the kernel's inputs and outputs.
*
* @note When locations of min and max occurrences are requested, the reported number of locations is limited to the given array size.
@@ -87,16 +93,16 @@ public:
void run() override;
private:
- CLMinMaxKernel _min_max_kernel; /**< Kernel that performs min/max */
- CLMinMaxLocationKernel _min_max_loc_kernel; /**< Kernel that counts min/max occurrences and identifies their positions */
- cl::Buffer _min_max_vals; /**< Buffer to collect min, max values */
- cl::Buffer _min_max_count_vals; /**< Buffer to collect min, max values */
- void *_min; /**< Minimum value. */
- void *_max; /**< Maximum value. */
- uint32_t *_min_count; /**< Minimum value occurrences. */
- uint32_t *_max_count; /**< Maximum value occurrences. */
- CLCoordinates2DArray *_min_loc; /**< Minimum value occurrences coordinates. */
- CLCoordinates2DArray *_max_loc; /**< Maximum value occurrences coordinates. */
+ std::unique_ptr<CLMinMaxKernel> _min_max_kernel; /**< Kernel that performs min/max */
+ std::unique_ptr<CLMinMaxLocationKernel> _min_max_loc_kernel; /**< Kernel that counts min/max occurrences and identifies their positions */
+ cl::Buffer _min_max_vals; /**< Buffer to collect min, max values */
+ cl::Buffer _min_max_count_vals; /**< Buffer to collect min, max values */
+ void *_min; /**< Minimum value. */
+ void *_max; /**< Maximum value. */
+ uint32_t *_min_count; /**< Minimum value occurrences. */
+ uint32_t *_max_count; /**< Maximum value occurrences. */
+ CLCoordinates2DArray *_min_loc; /**< Minimum value occurrences coordinates. */
+ CLCoordinates2DArray *_max_loc; /**< Maximum value occurrences coordinates. */
};
}
#endif /*ARM_COMPUTE_CLMINMAXLOCATION_H */
diff --git a/arm_compute/runtime/CL/functions/CLNonLinearFilter.h b/arm_compute/runtime/CL/functions/CLNonLinearFilter.h
index 8b7e350e09..1b466bf662 100644
--- a/arm_compute/runtime/CL/functions/CLNonLinearFilter.h
+++ b/arm_compute/runtime/CL/functions/CLNonLinearFilter.h
@@ -31,6 +31,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to execute non linear filter. This function calls the following OpenCL kernels:
diff --git a/arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h b/arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h
index 556de1c64c..c767a042ff 100644
--- a/arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h
+++ b/arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h
@@ -29,6 +29,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to execute non-maxima suppression over a 3x3 window. This function calls the following CL kernels:
diff --git a/arm_compute/runtime/CL/functions/CLNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLNormalizationLayer.h
index a2d46b368f..389b21e5c8 100644
--- a/arm_compute/runtime/CL/functions/CLNormalizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLNormalizationLayer.h
@@ -24,18 +24,19 @@
#ifndef ARM_COMPUTE_CLNORMALIZATIONLAYER_H
#define ARM_COMPUTE_CLNORMALIZATIONLAYER_H
-#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
-#include "arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h"
+#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/Types.h"
+#include <memory>
namespace arm_compute
{
+class CLCompileContext;
+class CLFillBorderKernel;
+class CLNormalizationLayerKernel;
class ICLTensor;
+class ITensorInfo;
/** Basic function to compute a normalization layer. This function calls the following CL kernels:
*
@@ -48,6 +49,16 @@ class CLNormalizationLayer : public IFunction
public:
/** Default constructor */
CLNormalizationLayer();
+ /** Prevent instances of this class from being copied */
+ CLNormalizationLayer(const CLNormalizationLayer &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLNormalizationLayer &operator=(const CLNormalizationLayer &) = delete;
+ /** Prevent instances of this class to be moved */
+ CLNormalizationLayer(CLNormalizationLayer &&) = delete;
+ /** Prevent instances of this class to be moved */
+ CLNormalizationLayer &operator=(CLNormalizationLayer &&) = delete;
+ /** Default destructor */
+ ~CLNormalizationLayer();
/** Set the input and output tensors.
*
* @param[in, out] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
@@ -85,8 +96,8 @@ public:
void run() override;
private:
- CLNormalizationLayerKernel _norm_kernel; /**< Normalization layer kernel to run */
- CLFillBorderKernel _border_handler; /**< Kernel to handle borders */
+ std::unique_ptr<CLNormalizationLayerKernel> _norm_kernel; /**< Normalization layer kernel to run */
+ std::unique_ptr<CLFillBorderKernel> _border_handler; /**< Kernel to handle borders */
};
}
#endif /* ARM_COMPUTE_CLNORMALIZATIONLAYER_H */
diff --git a/arm_compute/runtime/CL/functions/CLNormalizePlanarYUVLayer.h b/arm_compute/runtime/CL/functions/CLNormalizePlanarYUVLayer.h
index cf4a9b6497..de5155c65a 100644
--- a/arm_compute/runtime/CL/functions/CLNormalizePlanarYUVLayer.h
+++ b/arm_compute/runtime/CL/functions/CLNormalizePlanarYUVLayer.h
@@ -31,7 +31,9 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLNormalizePlanarYUVLayerKernel
*
diff --git a/arm_compute/runtime/CL/functions/CLOpticalFlow.h b/arm_compute/runtime/CL/functions/CLOpticalFlow.h
index adce6748c8..0e34374aa5 100644
--- a/arm_compute/runtime/CL/functions/CLOpticalFlow.h
+++ b/arm_compute/runtime/CL/functions/CLOpticalFlow.h
@@ -24,8 +24,6 @@
#ifndef ARM_COMPUTE_CLOPTICALFLOW_H
#define ARM_COMPUTE_CLOPTICALFLOW_H
-#include "arm_compute/core/CL/kernels/CLLKTrackerKernel.h"
-
#include "arm_compute/core/IArray.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLArray.h"
@@ -41,7 +39,12 @@
namespace arm_compute
{
+class CLCompileContext;
class CLPyramid;
+class CLLKTrackerInitKernel;
+class CLLKTrackerStage0Kernel;
+class CLLKTrackerStage1Kernel;
+class CLLKTrackerFinalizeKernel;
/** OpenCL Array of Internal Keypoints */
using CLLKInternalKeypointArray = CLArray<CLLKInternalKeypoint>;
@@ -71,6 +74,8 @@ public:
CLOpticalFlow(CLOpticalFlow &&) = default;
/** Allow instances of this class to be moved */
CLOpticalFlow &operator=(CLOpticalFlow &&) = default;
+ /** Default destructor */
+ ~CLOpticalFlow();
/** Initialise the function input and output
*
* @param[in] old_pyramid Pointer to the pyramid for the old tensor. Data types supported U8
@@ -117,22 +122,22 @@ public:
void run() override;
private:
- MemoryGroup _memory_group;
- std::vector<CLLKTrackerInitKernel> _tracker_init_kernel;
- std::vector<CLLKTrackerStage0Kernel> _tracker_stage0_kernel;
- std::vector<CLLKTrackerStage1Kernel> _tracker_stage1_kernel;
- CLLKTrackerFinalizeKernel _tracker_finalize_kernel;
- std::vector<CLScharr3x3> _func_scharr;
- std::vector<CLTensor> _scharr_gx;
- std::vector<CLTensor> _scharr_gy;
- const ICLKeyPointArray *_old_points;
- const ICLKeyPointArray *_new_points_estimates;
- ICLKeyPointArray *_new_points;
- std::unique_ptr<CLLKInternalKeypointArray> _old_points_internal;
- std::unique_ptr<CLLKInternalKeypointArray> _new_points_internal;
- std::unique_ptr<CLCoefficientTableArray> _coefficient_table;
- std::unique_ptr<CLOldValueArray> _old_values;
- size_t _num_levels;
+ MemoryGroup _memory_group;
+ std::vector<std::unique_ptr<CLLKTrackerInitKernel>> _tracker_init_kernel;
+ std::vector<std::unique_ptr<CLLKTrackerStage0Kernel>> _tracker_stage0_kernel;
+ std::vector<std::unique_ptr<CLLKTrackerStage1Kernel>> _tracker_stage1_kernel;
+ std::unique_ptr<CLLKTrackerFinalizeKernel> _tracker_finalize_kernel;
+ std::vector<CLScharr3x3> _func_scharr;
+ std::vector<CLTensor> _scharr_gx;
+ std::vector<CLTensor> _scharr_gy;
+ const ICLKeyPointArray *_old_points;
+ const ICLKeyPointArray *_new_points_estimates;
+ ICLKeyPointArray *_new_points;
+ std::unique_ptr<CLLKInternalKeypointArray> _old_points_internal;
+ std::unique_ptr<CLLKInternalKeypointArray> _new_points_internal;
+ std::unique_ptr<CLCoefficientTableArray> _coefficient_table;
+ std::unique_ptr<CLOldValueArray> _old_values;
+ size_t _num_levels;
};
}
#endif /*ARM_COMPUTE_CLOPTICALFLOW_H */
diff --git a/arm_compute/runtime/CL/functions/CLPReluLayer.h b/arm_compute/runtime/CL/functions/CLPReluLayer.h
index ffde9ec186..ab32bccc24 100644
--- a/arm_compute/runtime/CL/functions/CLPReluLayer.h
+++ b/arm_compute/runtime/CL/functions/CLPReluLayer.h
@@ -24,13 +24,14 @@
#ifndef ARM_COMPUTE_CLPRELULAYER_H
#define ARM_COMPUTE_CLPRELULAYER_H
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
#include "arm_compute/runtime/CL/ICLOperator.h"
#include "arm_compute/runtime/IFunction.h"
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
namespace experimental
{
diff --git a/arm_compute/runtime/CL/functions/CLPadLayer.h b/arm_compute/runtime/CL/functions/CLPadLayer.h
index e3a923f81c..2bbde30fc2 100644
--- a/arm_compute/runtime/CL/functions/CLPadLayer.h
+++ b/arm_compute/runtime/CL/functions/CLPadLayer.h
@@ -24,13 +24,15 @@
#ifndef ARM_COMPUTE_CLPADLAYER_H
#define ARM_COMPUTE_CLPADLAYER_H
-#include "arm_compute/core/CL/kernels/CLCopyKernel.h"
-#include "arm_compute/core/CL/kernels/CLPadLayerKernel.h"
+#include "arm_compute/core/Error.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/IFunction.h"
namespace arm_compute
{
+class CLCompileContext;
+class CLPadLayerKernel;
+class CLCopyKernel;
class ICLTensor;
/** Basic function to pad a tensor. This function calls the following OpenCL functions/kernels:
@@ -51,6 +53,8 @@ public:
CLPadLayer &operator=(const CLPadLayer &) = delete;
/** Default move assignment operator */
CLPadLayer &operator=(CLPadLayer &&) = default;
+ /** Default destructor */
+ ~CLPadLayer();
/** Initialize the function
*
@@ -95,9 +99,9 @@ public:
private:
void configure_reflect_mode(ICLTensor *input, ICLTensor *output);
- CLPadLayerKernel _pad_kernel;
- CLCopyKernel _copy_kernel;
- bool _perform_pad;
+ std::unique_ptr<CLPadLayerKernel> _pad_kernel;
+ std::unique_ptr<CLCopyKernel> _copy_kernel;
+ bool _perform_pad;
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_PADLAYER_H */
diff --git a/arm_compute/runtime/CL/functions/CLPermute.h b/arm_compute/runtime/CL/functions/CLPermute.h
index abc23eff0c..50e81da7c4 100644
--- a/arm_compute/runtime/CL/functions/CLPermute.h
+++ b/arm_compute/runtime/CL/functions/CLPermute.h
@@ -31,7 +31,9 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to execute an @ref CLPermuteKernel. */
class CLPermute : public ICLSimpleFunction
diff --git a/arm_compute/runtime/CL/functions/CLPhase.h b/arm_compute/runtime/CL/functions/CLPhase.h
index 2731a08a52..34b8e72175 100644
--- a/arm_compute/runtime/CL/functions/CLPhase.h
+++ b/arm_compute/runtime/CL/functions/CLPhase.h
@@ -29,6 +29,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to execute an @ref CLMagnitudePhaseKernel. */
diff --git a/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h b/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h
index 2066012306..6432cd040d 100644
--- a/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h
+++ b/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h
@@ -24,14 +24,16 @@
#ifndef ARM_COMPUTE_CLPIXELWISEMULTIPLICATION_H
#define ARM_COMPUTE_CLPIXELWISEMULTIPLICATION_H
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
#include "arm_compute/runtime/CL/ICLOperator.h"
#include "arm_compute/runtime/IFunction.h"
namespace arm_compute
{
// Forward declaration
+class CLCompileContext;
+class CLFillBorderKernel;
class ICLTensor;
+class ITensorInfo;
namespace experimental
{
@@ -106,7 +108,7 @@ public:
void run(ITensorPack &tensors) override;
private:
- CLFillBorderKernel _border_handler;
+ std::unique_ptr<CLFillBorderKernel> _border_handler;
};
/** Basic function to run @ref CLComplexPixelWiseMultiplicationKernel. */
@@ -139,7 +141,7 @@ public:
void run(ITensorPack &tensors) override;
private:
- CLFillBorderKernel _border_handler;
+ std::unique_ptr<CLFillBorderKernel> _border_handler;
};
} // namespace experimental
diff --git a/arm_compute/runtime/CL/functions/CLPoolingLayer.h b/arm_compute/runtime/CL/functions/CLPoolingLayer.h
index 96dacf9322..ef1f426c22 100644
--- a/arm_compute/runtime/CL/functions/CLPoolingLayer.h
+++ b/arm_compute/runtime/CL/functions/CLPoolingLayer.h
@@ -31,7 +31,9 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to simulate a pooling layer with the specified pooling operation. This function calls the following OpenCL kernels:
*
diff --git a/arm_compute/runtime/CL/functions/CLPriorBoxLayer.h b/arm_compute/runtime/CL/functions/CLPriorBoxLayer.h
index 9a78e77307..9129bfd064 100644
--- a/arm_compute/runtime/CL/functions/CLPriorBoxLayer.h
+++ b/arm_compute/runtime/CL/functions/CLPriorBoxLayer.h
@@ -24,13 +24,16 @@
#ifndef ARM_COMPUTE_CLPRIORBOXLAYER_H
#define ARM_COMPUTE_CLPRIORBOXLAYER_H
-#include "arm_compute/core/CL/kernels/CLPriorBoxLayerKernel.h"
+#include "arm_compute/core/CL/OpenCL.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
+class CLCompileContext;
+class CLPriorBoxLayerKernel;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLPriorBoxLayerKernel. */
class CLPriorBoxLayer : public ICLSimpleFunction
diff --git a/arm_compute/runtime/CL/functions/CLQLSTMLayer.h b/arm_compute/runtime/CL/functions/CLQLSTMLayer.h
index 6e537680ee..a8f9221b3d 100644
--- a/arm_compute/runtime/CL/functions/CLQLSTMLayer.h
+++ b/arm_compute/runtime/CL/functions/CLQLSTMLayer.h
@@ -24,9 +24,6 @@
#ifndef ARM_COMPUTE_CLQLSTMLAYER_H
#define ARM_COMPUTE_CLQLSTMLAYER_H
-#include "arm_compute/core/CL/kernels/CLCopyKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h"
-#include "arm_compute/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
#include "arm_compute/runtime/CL/functions/CLElementwiseOperations.h"
@@ -40,7 +37,12 @@
namespace arm_compute
{
// Forward declarations
+class CLCompileContext;
+class CLCopyKernel;
class ICLTensor;
+class CLGEMMLowpMatrixAReductionKernel;
+class CLQLSTMLayerNormalizationKernel;
+class ITensorInfo;
/** Basic function to run @ref CLQLSTMLayer
*
@@ -68,6 +70,8 @@ public:
CLQLSTMLayer &operator=(const CLQLSTMLayer &) = delete;
/** Default move assignment operator */
CLQLSTMLayer &operator=(CLQLSTMLayer &&) = default;
+ /** Default destructor */
+ ~CLQLSTMLayer();
/** Initialize function's tensors.
*
* @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: QASYMM8_SIGNED.
@@ -285,72 +289,72 @@ private:
};
// Functions used
- CLTranspose _transpose_input_to_forget_weights{};
- CLTranspose _transpose_input_to_cell_weights{};
- CLTranspose _transpose_input_to_output_weights{};
- CLTranspose _transpose_input_to_input_weights{};
- CLTranspose _transpose_recurrent_to_forget_weights{};
- CLTranspose _transpose_recurrent_to_cell_weights{};
- CLTranspose _transpose_recurrent_to_output_weights{};
- CLTranspose _transpose_recurrent_to_input_weights{};
- CLTranspose _transpose_projection_weights{};
- CLGEMMLowpMatrixAReductionKernel _input_to_input_reduction{};
- CLGEMMLowpMatrixAReductionKernel _recurrent_to_input_reduction{};
- CLGEMMLowpMatrixAReductionKernel _input_to_forget_reduction{};
- CLGEMMLowpMatrixAReductionKernel _recurrent_to_forget_reduction{};
- CLGEMMLowpMatrixAReductionKernel _input_to_cell_reduction{};
- CLGEMMLowpMatrixAReductionKernel _recurrent_to_cell_reduction{};
- CLGEMMLowpMatrixAReductionKernel _input_to_output_reduction{};
- CLGEMMLowpMatrixAReductionKernel _recurrent_to_output_reduction{};
- CLGEMMLowpMatrixAReductionKernel _projection_reduction{};
- CLArithmeticAddition _projection_bias_add{};
- CLGEMMLowpMatrixMultiplyCore _mm_input_to_forget{};
- CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_forget{};
- CLPixelWiseMultiplication _pixelwise_mul_cell_to_forget{};
- CLGEMMLowpOutputStage _input_to_forget_outstage{};
- CLGEMMLowpOutputStage _recurrent_to_forget_outstage{};
- CLGEMMLowpOutputStage _cell_to_forget_outstage{};
- CLArithmeticAddition _accumulate_input_recurrent_forget{};
- CLArithmeticAddition _accumulate_cell_forget{};
- CLActivationLayer _forget_gate_sigmoid{};
- CLGEMMLowpMatrixMultiplyCore _mm_input_to_cell{};
- CLGEMMLowpOutputStage _input_to_cell_outstage{};
- CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_cell{};
- CLGEMMLowpOutputStage _recurrent_to_cell_outstage{};
- CLArithmeticAddition _accumulate_input_recurrent_modulation{};
- CLActivationLayer _cell_gate_tanh{};
- CLArithmeticSubtraction _input_gate_sub{};
- CLGEMMLowpMatrixMultiplyCore _mm_input_to_input{};
- CLGEMMLowpOutputStage _input_to_input_outstage{};
- CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_input{};
- CLGEMMLowpOutputStage _recurrent_to_input_outstage{};
- CLArithmeticAddition _accumulate_input_recurrent_input{};
- CLPixelWiseMultiplication _pixelwise_mul_cell_to_input{};
- CLGEMMLowpOutputStage _cell_to_input_outstage{};
- CLArithmeticAddition _accumulate_cell_input{};
- CLActivationLayer _input_gate_sigmoid{};
- CLPixelWiseMultiplication _pixelwise_mul_forget_cell{};
- CLPixelWiseMultiplication _pixelwise_mul_input_cell{};
- CLArithmeticAddition _add_forget_cell{};
- CLActivationLayer _cell_clip{};
- CLGEMMLowpMatrixMultiplyCore _mm_input_to_output{};
- CLGEMMLowpOutputStage _input_to_output_outstage{};
- CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_output{};
- CLGEMMLowpOutputStage _recurrent_to_output_outstage{};
- CLArithmeticAddition _accumulate_input_recurrent_output{};
- CLPixelWiseMultiplication _pixelwise_mul_cell_to_output{};
- CLGEMMLowpOutputStage _cell_to_output_outstage{};
- CLArithmeticAddition _accumulate_cell_to_output{};
- CLActivationLayer _output_gate_sigmoid{};
- CLActivationLayer _hidden_tanh{};
- CLPixelWiseMultiplication _pixelwise_mul_hidden{};
- CLGEMMLowpOutputStage _hidden_outstage{};
- CLGEMMLowpMatrixMultiplyCore _mm_projection{};
- CLGEMMLowpOutputStage _projection_outstage{};
- CLArithmeticAddition _accumulate_projection{};
- CLActivationLayer _projection_clip{};
- std::array<CLQLSTMLayerNormalizationKernel, _layer_norm_count> _layer_norms{ {} };
- CLCopyKernel _copy_output{};
+ CLTranspose _transpose_input_to_forget_weights{};
+ CLTranspose _transpose_input_to_cell_weights{};
+ CLTranspose _transpose_input_to_output_weights{};
+ CLTranspose _transpose_input_to_input_weights{};
+ CLTranspose _transpose_recurrent_to_forget_weights{};
+ CLTranspose _transpose_recurrent_to_cell_weights{};
+ CLTranspose _transpose_recurrent_to_output_weights{};
+ CLTranspose _transpose_recurrent_to_input_weights{};
+ CLTranspose _transpose_projection_weights{};
+ std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _input_to_input_reduction;
+ std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _recurrent_to_input_reduction;
+ std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _input_to_forget_reduction;
+ std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _recurrent_to_forget_reduction;
+ std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _input_to_cell_reduction;
+ std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _recurrent_to_cell_reduction;
+ std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _input_to_output_reduction;
+ std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _recurrent_to_output_reduction;
+ std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _projection_reduction;
+ CLArithmeticAddition _projection_bias_add{};
+ CLGEMMLowpMatrixMultiplyCore _mm_input_to_forget{};
+ CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_forget{};
+ CLPixelWiseMultiplication _pixelwise_mul_cell_to_forget{};
+ CLGEMMLowpOutputStage _input_to_forget_outstage{};
+ CLGEMMLowpOutputStage _recurrent_to_forget_outstage{};
+ CLGEMMLowpOutputStage _cell_to_forget_outstage{};
+ CLArithmeticAddition _accumulate_input_recurrent_forget{};
+ CLArithmeticAddition _accumulate_cell_forget{};
+ CLActivationLayer _forget_gate_sigmoid{};
+ CLGEMMLowpMatrixMultiplyCore _mm_input_to_cell{};
+ CLGEMMLowpOutputStage _input_to_cell_outstage{};
+ CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_cell{};
+ CLGEMMLowpOutputStage _recurrent_to_cell_outstage{};
+ CLArithmeticAddition _accumulate_input_recurrent_modulation{};
+ CLActivationLayer _cell_gate_tanh{};
+ CLArithmeticSubtraction _input_gate_sub{};
+ CLGEMMLowpMatrixMultiplyCore _mm_input_to_input{};
+ CLGEMMLowpOutputStage _input_to_input_outstage{};
+ CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_input{};
+ CLGEMMLowpOutputStage _recurrent_to_input_outstage{};
+ CLArithmeticAddition _accumulate_input_recurrent_input{};
+ CLPixelWiseMultiplication _pixelwise_mul_cell_to_input{};
+ CLGEMMLowpOutputStage _cell_to_input_outstage{};
+ CLArithmeticAddition _accumulate_cell_input{};
+ CLActivationLayer _input_gate_sigmoid{};
+ CLPixelWiseMultiplication _pixelwise_mul_forget_cell{};
+ CLPixelWiseMultiplication _pixelwise_mul_input_cell{};
+ CLArithmeticAddition _add_forget_cell{};
+ CLActivationLayer _cell_clip{};
+ CLGEMMLowpMatrixMultiplyCore _mm_input_to_output{};
+ CLGEMMLowpOutputStage _input_to_output_outstage{};
+ CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_output{};
+ CLGEMMLowpOutputStage _recurrent_to_output_outstage{};
+ CLArithmeticAddition _accumulate_input_recurrent_output{};
+ CLPixelWiseMultiplication _pixelwise_mul_cell_to_output{};
+ CLGEMMLowpOutputStage _cell_to_output_outstage{};
+ CLArithmeticAddition _accumulate_cell_to_output{};
+ CLActivationLayer _output_gate_sigmoid{};
+ CLActivationLayer _hidden_tanh{};
+ CLPixelWiseMultiplication _pixelwise_mul_hidden{};
+ CLGEMMLowpOutputStage _hidden_outstage{};
+ CLGEMMLowpMatrixMultiplyCore _mm_projection{};
+ CLGEMMLowpOutputStage _projection_outstage{};
+ CLArithmeticAddition _accumulate_projection{};
+ CLActivationLayer _projection_clip{};
+ std::array<std::unique_ptr<CLQLSTMLayerNormalizationKernel>, _layer_norm_count> _layer_norms;
+ std::unique_ptr<CLCopyKernel> _copy_output;
TensorCopyKernel _projection_bias_copy{};
TensorCopyKernel _projection_output_to_accumulate_copy{};
@@ -402,30 +406,11 @@ private:
inline CLQLSTMLayerNormalizationKernel &get_layer_norm(LayerNormGate g)
{
- return _layer_norms[getGateIndex(g)];
+ return *_layer_norms[getGateIndex(g)];
}
- inline void configure_layer_norm(LayerNormGate g, const ICLTensor *in)
- {
- ARM_COMPUTE_ERROR_ON(!_has_layer_norm);
-
- CLTensor *out = &get_layer_norm_output(g);
- _memory_group.manage(out);
- out->allocator()->init(*(in->info()));
-
- get_layer_norm(g).configure(in, out, get_layer_norm_weight(g), get_layer_norm_bias(g));
- }
-
- inline static Status validate_layer_norm(const ITensorInfo &in, const ITensorInfo &weight, const ITensorInfo &bias)
- {
- // Output quantization scale will be different, but ignored here
- // since it will be configured at configure() stage.
- const TensorInfo out
- {
- in
- };
- return CLQLSTMLayerNormalizationKernel::validate(&in, &out, &weight, &bias);
- }
+ inline void configure_layer_norm(LayerNormGate g, const ICLTensor *in);
+ inline static Status validate_layer_norm(const ITensorInfo &in, const ITensorInfo &weight, const ITensorInfo &bias);
// Temporary tensors
CLTensor _input_to_forget_weights_transposed{ nullptr };
diff --git a/arm_compute/runtime/CL/functions/CLQuantizationLayer.h b/arm_compute/runtime/CL/functions/CLQuantizationLayer.h
index e045adf5fd..a0a27c5cb4 100644
--- a/arm_compute/runtime/CL/functions/CLQuantizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLQuantizationLayer.h
@@ -24,11 +24,14 @@
#ifndef ARM_COMPUTE_CLQUANTIZATIONLAYER_H
#define ARM_COMPUTE_CLQUANTIZATIONLAYER_H
+#include "arm_compute/core/Error.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to simulate a quantization layer. This function calls the following CL kernels:
*
diff --git a/arm_compute/runtime/CL/functions/CLRNNLayer.h b/arm_compute/runtime/CL/functions/CLRNNLayer.h
index 9d1cb1a724..ff3fb5449b 100644
--- a/arm_compute/runtime/CL/functions/CLRNNLayer.h
+++ b/arm_compute/runtime/CL/functions/CLRNNLayer.h
@@ -24,15 +24,17 @@
#ifndef ARM_COMPUTE_CLRNN_LAYER_H
#define ARM_COMPUTE_CLRNN_LAYER_H
-#include "arm_compute/core/CL/kernels/CLCopyKernel.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
#include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
#include "arm_compute/runtime/CL/functions/CLElementwiseOperations.h"
#include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h"
#include "arm_compute/runtime/CL/functions/CLGEMM.h"
+#include <memory>
+
namespace arm_compute
{
+class CLCopyKernel;
class ICLTensor;
/** Basic function to run @ref CLRNNLayer */
@@ -41,6 +43,12 @@ class CLRNNLayer : public IFunction
public:
/** Default constructor */
CLRNNLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied */
+ CLRNNLayer(const CLRNNLayer &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLRNNLayer &operator=(const CLRNNLayer &) = delete;
+ /** Default destructor */
+ ~CLRNNLayer();
/** Initialize the function
*
* @param[in] input Input is a 2-D tensor of shape [input_size, batch_size]. Data types supported: F16/F32
@@ -85,16 +93,16 @@ public:
void prepare() override;
private:
- MemoryGroup _memory_group;
- CLGEMM _gemm_state_f;
- CLArithmeticAddition _add_kernel;
- CLActivationLayer _activation;
- CLFullyConnectedLayer _fully_connected_kernel;
- CLCopyKernel _copy_kernel;
- CLTensor _fully_connected_out;
- CLTensor _gemm_output;
- CLTensor _add_output;
- bool _is_prepared;
+ MemoryGroup _memory_group;
+ CLGEMM _gemm_state_f;
+ CLArithmeticAddition _add_kernel;
+ CLActivationLayer _activation;
+ CLFullyConnectedLayer _fully_connected_kernel;
+ std::unique_ptr<CLCopyKernel> _copy_kernel;
+ CLTensor _fully_connected_out;
+ CLTensor _gemm_output;
+ CLTensor _add_output;
+ bool _is_prepared;
};
}
#endif /* ARM_COMPUTE_CLRNN_LAYER_H */
diff --git a/arm_compute/runtime/CL/functions/CLROIAlignLayer.h b/arm_compute/runtime/CL/functions/CLROIAlignLayer.h
index 2e78f16d6b..b4cd5560ef 100644
--- a/arm_compute/runtime/CL/functions/CLROIAlignLayer.h
+++ b/arm_compute/runtime/CL/functions/CLROIAlignLayer.h
@@ -25,12 +25,14 @@
#define ARM_COMPUTE_CLROIALIGNLAYER_H
#include "arm_compute/core/CL/ICLArray.h"
-#include "arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ROIPoolingLayerInfo;
+class ITensorInfo;
/** Basic function to run @ref CLROIAlignLayerKernel.
*
diff --git a/arm_compute/runtime/CL/functions/CLROIPoolingLayer.h b/arm_compute/runtime/CL/functions/CLROIPoolingLayer.h
index 30139274be..836575ef68 100644
--- a/arm_compute/runtime/CL/functions/CLROIPoolingLayer.h
+++ b/arm_compute/runtime/CL/functions/CLROIPoolingLayer.h
@@ -24,14 +24,14 @@
#ifndef ARM_COMPUTE_CLROIPOOLINGLAYER_H
#define ARM_COMPUTE_CLROIPOOLINGLAYER_H
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
#include "arm_compute/core/CL/ICLArray.h"
-#include "arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h"
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ROIPoolingLayerInfo;
/** Basic function to run @ref CLROIPoolingLayerKernel.
*
diff --git a/arm_compute/runtime/CL/functions/CLRange.h b/arm_compute/runtime/CL/functions/CLRange.h
index a86cfb605d..e11e740861 100644
--- a/arm_compute/runtime/CL/functions/CLRange.h
+++ b/arm_compute/runtime/CL/functions/CLRange.h
@@ -29,7 +29,9 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLRangeKernel
*
diff --git a/arm_compute/runtime/CL/functions/CLReductionOperation.h b/arm_compute/runtime/CL/functions/CLReductionOperation.h
index 5d050d71d6..3fbcee6c21 100644
--- a/arm_compute/runtime/CL/functions/CLReductionOperation.h
+++ b/arm_compute/runtime/CL/functions/CLReductionOperation.h
@@ -24,8 +24,6 @@
#ifndef ARM_COMPUTE_CLREDUCTIONOPERATION_H
#define ARM_COMPUTE_CLREDUCTIONOPERATION_H
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
-#include "arm_compute/core/CL/kernels/CLReductionOperationKernel.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLReshapeLayer.h"
#include "arm_compute/runtime/IFunction.h"
@@ -37,6 +35,9 @@
namespace arm_compute
{
// Forward declarations
+class CLCompileContext;
+class CLFillBorderKernel;
+class CLReductionOperationKernel;
class ICLTensor;
/** Perform reduction operation.
@@ -49,6 +50,16 @@ public:
* @param[in] memory_manager (Optional) Memory manager.
*/
CLReductionOperation(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Default Destructor */
+ ~CLReductionOperation();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLReductionOperation(const CLReductionOperation &) = delete;
+ /** Default move constructor */
+ CLReductionOperation(CLReductionOperation &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLReductionOperation &operator=(const CLReductionOperation &) = delete;
+ /** Default move assignment operator */
+ CLReductionOperation &operator=(CLReductionOperation &&) = default;
/** Set the input and output tensors.
*
@@ -88,15 +99,15 @@ public:
private:
ICLTensor *configure_intermediate_result_vector(ICLTensor *input, ICLTensor *output);
- MemoryGroup _memory_group;
- std::vector<CLTensor> _results_vector;
- std::vector<CLReductionOperationKernel> _reduction_kernels_vector;
- std::vector<CLFillBorderKernel> _border_handlers_vector;
- CLReshapeLayer _reshape;
- unsigned int _num_of_stages;
- unsigned int _reduction_axis;
- bool _is_serial;
- bool _is_reshape_required;
+ MemoryGroup _memory_group;
+ std::vector<CLTensor> _results_vector;
+ std::vector<std::unique_ptr<CLReductionOperationKernel>> _reduction_kernels_vector;
+ std::vector<std::unique_ptr<CLFillBorderKernel>> _border_handlers_vector;
+ CLReshapeLayer _reshape;
+ unsigned int _num_of_stages;
+ unsigned int _reduction_axis;
+ bool _is_serial;
+ bool _is_reshape_required;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLREDUCTIONOPERATION_H */ \ No newline at end of file
diff --git a/arm_compute/runtime/CL/functions/CLRemap.h b/arm_compute/runtime/CL/functions/CLRemap.h
index 5b110d58f4..bf5d348b3b 100644
--- a/arm_compute/runtime/CL/functions/CLRemap.h
+++ b/arm_compute/runtime/CL/functions/CLRemap.h
@@ -31,6 +31,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to execute remap. This function calls the following OpenCL kernels:
diff --git a/arm_compute/runtime/CL/functions/CLReorgLayer.h b/arm_compute/runtime/CL/functions/CLReorgLayer.h
index a7287ce266..0840fd13fd 100644
--- a/arm_compute/runtime/CL/functions/CLReorgLayer.h
+++ b/arm_compute/runtime/CL/functions/CLReorgLayer.h
@@ -29,7 +29,9 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
class CLReorgLayer : public ICLSimpleFunction
{
diff --git a/arm_compute/runtime/CL/functions/CLReshapeLayer.h b/arm_compute/runtime/CL/functions/CLReshapeLayer.h
index 7fc6c3b864..b4d52ec8cf 100644
--- a/arm_compute/runtime/CL/functions/CLReshapeLayer.h
+++ b/arm_compute/runtime/CL/functions/CLReshapeLayer.h
@@ -29,7 +29,9 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLReshapeLayerKernel */
class CLReshapeLayer : public IFunction
diff --git a/arm_compute/runtime/CL/functions/CLReverse.h b/arm_compute/runtime/CL/functions/CLReverse.h
index 6b140920e9..81fa04b1f5 100644
--- a/arm_compute/runtime/CL/functions/CLReverse.h
+++ b/arm_compute/runtime/CL/functions/CLReverse.h
@@ -24,11 +24,14 @@
#ifndef ARM_COMPUTE_CLREVERSE_H
#define ARM_COMPUTE_CLREVERSE_H
+#include "arm_compute/core/Error.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLReverseKernel */
class CLReverse : public ICLSimpleFunction
diff --git a/arm_compute/runtime/CL/functions/CLScale.h b/arm_compute/runtime/CL/functions/CLScale.h
index d776e83035..360d63ea22 100644
--- a/arm_compute/runtime/CL/functions/CLScale.h
+++ b/arm_compute/runtime/CL/functions/CLScale.h
@@ -32,7 +32,9 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLScaleKernel */
class CLScale : public ICLSimpleFunction
diff --git a/arm_compute/runtime/CL/functions/CLScharr3x3.h b/arm_compute/runtime/CL/functions/CLScharr3x3.h
index 3892874f35..19c860f39b 100644
--- a/arm_compute/runtime/CL/functions/CLScharr3x3.h
+++ b/arm_compute/runtime/CL/functions/CLScharr3x3.h
@@ -31,6 +31,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to execute scharr 3x3 filter. This function calls the following OpenCL kernels:
diff --git a/arm_compute/runtime/CL/functions/CLSelect.h b/arm_compute/runtime/CL/functions/CLSelect.h
index a1af922303..7fd52312fb 100644
--- a/arm_compute/runtime/CL/functions/CLSelect.h
+++ b/arm_compute/runtime/CL/functions/CLSelect.h
@@ -24,14 +24,15 @@
#ifndef ARM_COMPUTE_CLSELECT_H
#define ARM_COMPUTE_CLSELECT_H
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
// Forward declarations
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLSelect */
class CLSelect : public ICLSimpleFunction
diff --git a/arm_compute/runtime/CL/functions/CLSlice.h b/arm_compute/runtime/CL/functions/CLSlice.h
index 23c398cb41..f17e77236d 100644
--- a/arm_compute/runtime/CL/functions/CLSlice.h
+++ b/arm_compute/runtime/CL/functions/CLSlice.h
@@ -31,6 +31,8 @@ namespace arm_compute
{
// Forward Declarations
class ICLTensor;
+class CLCompileContext;
+class ITensorInfo;
namespace experimental
{
diff --git a/arm_compute/runtime/CL/functions/CLSobel3x3.h b/arm_compute/runtime/CL/functions/CLSobel3x3.h
index 25d4ed6895..492900da11 100644
--- a/arm_compute/runtime/CL/functions/CLSobel3x3.h
+++ b/arm_compute/runtime/CL/functions/CLSobel3x3.h
@@ -31,6 +31,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to execute sobel 3x3 filter. This function calls the following OpenCL kernels:
@@ -42,6 +43,14 @@ class ICLTensor;
class CLSobel3x3 : public ICLSimpleFunction
{
public:
+ /** Default Constructor */
+ CLSobel3x3() = default;
+ /** Prevent instances of this class from being copied */
+ CLSobel3x3(const CLSobel3x3 &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLSobel3x3 &operator=(const CLSobel3x3 &) = delete;
+ /** Default destructor */
+ ~CLSobel3x3();
/** Initialise the function's source, destinations and border mode.
*
* @note At least one of output_x or output_y must be not NULL.
diff --git a/arm_compute/runtime/CL/functions/CLSobel5x5.h b/arm_compute/runtime/CL/functions/CLSobel5x5.h
index 1f91c46f7f..a00fdd72b8 100644
--- a/arm_compute/runtime/CL/functions/CLSobel5x5.h
+++ b/arm_compute/runtime/CL/functions/CLSobel5x5.h
@@ -24,8 +24,6 @@
#ifndef ARM_COMPUTE_CLSOBEL5X5_H
#define ARM_COMPUTE_CLSOBEL5X5_H
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
-#include "arm_compute/core/CL/kernels/CLSobel5x5Kernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/IFunction.h"
@@ -37,6 +35,10 @@
namespace arm_compute
{
+class CLCompileContext;
+class CLFillBorderKernel;
+class CLSobel5x5HorKernel;
+class CLSobel5x5VertKernel;
class ICLTensor;
/** Basic function to execute sobel 5x5 filter. This function calls the following OpenCL kernels:
@@ -54,6 +56,12 @@ public:
* @param[in] memory_manager (Optional) Memory manager.
*/
CLSobel5x5(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied */
+ CLSobel5x5(const CLSobel5x5 &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLSobel5x5 &operator=(const CLSobel5x5 &) = delete;
+ /** Default destructor */
+ ~CLSobel5x5();
/** Initialise the function's source, destinations and border mode.
*
* @note At least one of output_x or output_y must be not NULL.
@@ -82,12 +90,12 @@ public:
void run() override;
protected:
- MemoryGroup _memory_group; /**< Function's memory group */
- CLSobel5x5HorKernel _sobel_hor; /**< Sobel Horizontal 5x5 kernel */
- CLSobel5x5VertKernel _sobel_vert; /**< Sobel Vertical 5x5 kernel */
- CLFillBorderKernel _border_handler; /**< Kernel to handle image borders */
- CLImage _tmp_x; /**< Temporary buffer for Sobel X */
- CLImage _tmp_y; /**< Temporary buffer for Sobel Y */
+ MemoryGroup _memory_group; /**< Function's memory group */
+ std::unique_ptr<CLSobel5x5HorKernel> _sobel_hor; /**< Sobel Horizontal 5x5 kernel */
+ std::unique_ptr<CLSobel5x5VertKernel> _sobel_vert; /**< Sobel Vertical 5x5 kernel */
+ std::unique_ptr<CLFillBorderKernel> _border_handler; /**< Kernel to handle image borders */
+ CLImage _tmp_x; /**< Temporary buffer for Sobel X */
+ CLImage _tmp_y; /**< Temporary buffer for Sobel Y */
};
}
#endif /*ARM_COMPUTE_CLSOBEL5X5_H */
diff --git a/arm_compute/runtime/CL/functions/CLSobel7x7.h b/arm_compute/runtime/CL/functions/CLSobel7x7.h
index 91daf64c29..01a863b11b 100644
--- a/arm_compute/runtime/CL/functions/CLSobel7x7.h
+++ b/arm_compute/runtime/CL/functions/CLSobel7x7.h
@@ -24,8 +24,6 @@
#ifndef ARM_COMPUTE_CLSOBEL7X7_H
#define ARM_COMPUTE_CLSOBEL7X7_H
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
-#include "arm_compute/core/CL/kernels/CLSobel7x7Kernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/IFunction.h"
@@ -37,6 +35,10 @@
namespace arm_compute
{
+class CLCompileContext;
+class CLFillBorderKernel;
+class CLSobel7x7HorKernel;
+class CLSobel7x7VertKernel;
class ICLTensor;
/** Basic function to execute sobel 7x7 filter. This function calls the following OpenCL kernels:
@@ -54,6 +56,12 @@ public:
* @param[in] memory_manager (Optional) Memory manager.
*/
CLSobel7x7(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied */
+ CLSobel7x7(const CLSobel7x7 &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLSobel7x7 &operator=(const CLSobel7x7 &) = delete;
+ /** Default destructor */
+ ~CLSobel7x7();
/** Initialise the function's source, destinations and border mode.
*
* @note At least one of output_x or output_y must be not NULL.
@@ -82,12 +90,12 @@ public:
void run() override;
protected:
- MemoryGroup _memory_group; /**< Function's memory group */
- CLSobel7x7HorKernel _sobel_hor; /**< Sobel Horizontal 7x7 kernel */
- CLSobel7x7VertKernel _sobel_vert; /**< Sobel Vertical 7x7 kernel */
- CLFillBorderKernel _border_handler; /**< Kernel to handle image borders */
- CLImage _tmp_x; /**< Temporary buffer for Sobel X */
- CLImage _tmp_y; /**< Temporary buffer for Sobel Y */
+ MemoryGroup _memory_group; /**< Function's memory group */
+ std::unique_ptr<CLSobel7x7HorKernel> _sobel_hor; /**< Sobel Horizontal 7x7 kernel */
+ std::unique_ptr<CLSobel7x7VertKernel> _sobel_vert; /**< Sobel Vertical 7x7 kernel */
+ std::unique_ptr<CLFillBorderKernel> _border_handler; /**< Kernel to handle image borders */
+ CLImage _tmp_x; /**< Temporary buffer for Sobel X */
+ CLImage _tmp_y; /**< Temporary buffer for Sobel Y */
};
}
#endif /*ARM_COMPUTE_CLSOBEL7X7_H */
diff --git a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h
index fd71f3ed4d..ab10a64de4 100644
--- a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h
+++ b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h
@@ -24,7 +24,6 @@
#ifndef ARM_COMPUTE_CLSOFTMAXLAYER_H
#define ARM_COMPUTE_CLSOFTMAXLAYER_H
-#include "arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLPermute.h"
#include "arm_compute/runtime/IFunction.h"
@@ -35,7 +34,11 @@
namespace arm_compute
{
+class CLCompileContext;
+class CLLogits1DMaxShiftExpSumKernel;
+class CLLogits1DNormKernel;
class ICLTensor;
+class ITensorInfo;
/** Basic function to compute a SoftmaxLayer.
*
@@ -57,6 +60,16 @@ class CLSoftmaxLayerGeneric : public IFunction
public:
/** Constructor */
CLSoftmaxLayerGeneric(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied */
+ CLSoftmaxLayerGeneric(const CLSoftmaxLayerGeneric &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLSoftmaxLayerGeneric &operator=(const CLSoftmaxLayerGeneric &) = delete;
+ /** Prevent instances of this class to be moved */
+ CLSoftmaxLayerGeneric(CLSoftmaxLayerGeneric &&) = delete;
+ /** Prevent instances of this class to be moved */
+ CLSoftmaxLayerGeneric &operator=(CLSoftmaxLayerGeneric &&) = delete;
+ /** Default destructor */
+ ~CLSoftmaxLayerGeneric();
/** Set the input and output tensors.
*
* @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 for Softmax and F16/F32 for Log Softmax
@@ -92,17 +105,17 @@ public:
void run() override;
private:
- MemoryGroup _memory_group;
- CLPermute _permute_input;
- CLPermute _permute_output;
- CLLogits1DMaxShiftExpSumKernel _max_shift_exp_sum_kernel;
- CLLogits1DNormKernel _norm_kernel;
- CLTensor _max;
- CLTensor _sum;
- CLTensor _tmp;
- CLTensor _input_permuted;
- CLTensor _output_permuted;
- bool _needs_permute;
+ MemoryGroup _memory_group;
+ CLPermute _permute_input;
+ CLPermute _permute_output;
+ std::unique_ptr<CLLogits1DMaxShiftExpSumKernel> _max_shift_exp_sum_kernel;
+ std::unique_ptr<CLLogits1DNormKernel> _norm_kernel;
+ CLTensor _max;
+ CLTensor _sum;
+ CLTensor _tmp;
+ CLTensor _input_permuted;
+ CLTensor _output_permuted;
+ bool _needs_permute;
};
using CLSoftmaxLayer = CLSoftmaxLayerGeneric<false>;
diff --git a/arm_compute/runtime/CL/functions/CLSpaceToBatchLayer.h b/arm_compute/runtime/CL/functions/CLSpaceToBatchLayer.h
index c6f7f11079..1611aa8ed4 100644
--- a/arm_compute/runtime/CL/functions/CLSpaceToBatchLayer.h
+++ b/arm_compute/runtime/CL/functions/CLSpaceToBatchLayer.h
@@ -24,16 +24,19 @@
#ifndef ARM_COMPUTE_CLSPACETOBATCHLAYER_H
#define ARM_COMPUTE_CLSPACETOBATCHLAYER_H
-#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/core/CL/kernels/CLMemsetKernel.h"
-#include "arm_compute/core/CL/kernels/CLSpaceToBatchLayerKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
namespace arm_compute
{
+class CLCompileContext;
+class CLMemsetKernel;
+class CLSpaceToBatchLayerKernel;
class ICLTensor;
+class ITensorInfo;
/** Basic function to spatial divide a tensor. This function calls the following OpenCL kernels/functions:
*
@@ -54,7 +57,7 @@ public:
/** Allow instances of this class to be moved */
CLSpaceToBatchLayer &operator=(CLSpaceToBatchLayer &&) = default;
/** Default destructor */
- virtual ~CLSpaceToBatchLayer() = default;
+ ~CLSpaceToBatchLayer();
/** Set the input and output tensors.
*
* @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
@@ -121,9 +124,9 @@ public:
void run() override;
private:
- CLSpaceToBatchLayerKernel _space_to_batch_kernel; /**< SpaceToBatch kernel to run */
- CLMemsetKernel _memset_kernel; /**< Memset kernel to run */
- bool _has_padding; /**< Flag to check if the output has padding */
+ std::unique_ptr<CLSpaceToBatchLayerKernel> _space_to_batch_kernel; /**< SpaceToBatch kernel to run */
+ std::unique_ptr<CLMemsetKernel> _memset_kernel; /**< Memset kernel to run */
+ bool _has_padding; /**< Flag to check if the output has padding */
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLSPACETOBATCHLAYER_H */
diff --git a/arm_compute/runtime/CL/functions/CLSpaceToDepthLayer.h b/arm_compute/runtime/CL/functions/CLSpaceToDepthLayer.h
index 24830cf4d3..9e476fe7bd 100644
--- a/arm_compute/runtime/CL/functions/CLSpaceToDepthLayer.h
+++ b/arm_compute/runtime/CL/functions/CLSpaceToDepthLayer.h
@@ -24,14 +24,17 @@
#ifndef ARM_COMPUTE_CLSPACETODEPTHLAYER_H
#define ARM_COMPUTE_CLSPACETODEPTHLAYER_H
+#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/CL/kernels/CLSpaceToDepthLayerKernel.h"
-#include "arm_compute/core/Types.h"
+#include <memory>
namespace arm_compute
{
+class CLCompileContext;
+class CLSpaceToDepthLayerKernel;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLSpaceToDepthLayerKernel. */
class CLSpaceToDepthLayer : public IFunction
@@ -39,6 +42,16 @@ class CLSpaceToDepthLayer : public IFunction
public:
/** Default constructor */
CLSpaceToDepthLayer();
+ /** Prevent instances of this class from being copied */
+ CLSpaceToDepthLayer(const CLSpaceToDepthLayer &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLSpaceToDepthLayer &operator=(const CLSpaceToDepthLayer &) = delete;
+ /** Prevent instances of this class to be moved */
+ CLSpaceToDepthLayer(CLSpaceToDepthLayer &&) = delete;
+ /** Prevent instances of this class to be moved */
+ CLSpaceToDepthLayer &operator=(CLSpaceToDepthLayer &&) = delete;
+ /** Default destructor */
+ ~CLSpaceToDepthLayer();
/** Set the input and output tensors.
*
* @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
@@ -68,7 +81,7 @@ public:
void run() override;
private:
- CLSpaceToDepthLayerKernel _space_to_depth_kernel; /**< CLSpaceToDepthLayerKernel to run */
+ std::unique_ptr<CLSpaceToDepthLayerKernel> _space_to_depth_kernel; /**< CLSpaceToDepthLayerKernel to run */
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLSPACETODEPTHLAYER_H */
diff --git a/arm_compute/runtime/CL/functions/CLStackLayer.h b/arm_compute/runtime/CL/functions/CLStackLayer.h
index 95875962c8..3861fd299a 100644
--- a/arm_compute/runtime/CL/functions/CLStackLayer.h
+++ b/arm_compute/runtime/CL/functions/CLStackLayer.h
@@ -27,14 +27,15 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/CL/kernels/CLStackLayerKernel.h"
-
#include <memory>
#include <vector>
namespace arm_compute
{
+class CLCompileContext;
+class CLStackLayerKernel;
class ICLTensor;
+class ITensorInfo;
/** Basic function to stack tensors along an axis. This function calls the following kernel:
*
@@ -46,6 +47,16 @@ class CLStackLayer : public IFunction
public:
/** Default constructor */
CLStackLayer();
+ /** Prevent instances of this class from being copied */
+ CLStackLayer(const CLStackLayer &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLStackLayer &operator=(const CLStackLayer &) = delete;
+ /** Prevent instances of this class to be moved */
+ CLStackLayer(CLStackLayer &&) = delete;
+ /** Prevent instances of this class to be moved */
+ CLStackLayer &operator=(CLStackLayer &&) = delete;
+ /** Default destructor */
+ ~CLStackLayer();
/** Initialise the kernel's inputs vector and output.
*
* @note Supported input tensor rank: up to 4
@@ -84,9 +95,9 @@ public:
void run() override;
private:
- std::vector<ICLTensor *> _input;
- std::vector<CLStackLayerKernel> _stack_kernels;
- unsigned int _num_inputs;
+ std::vector<ICLTensor *> _input;
+ std::vector<std::unique_ptr<CLStackLayerKernel>> _stack_kernels;
+ unsigned int _num_inputs;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLSTACKLAYER_H */
diff --git a/arm_compute/runtime/CL/functions/CLTableLookup.h b/arm_compute/runtime/CL/functions/CLTableLookup.h
index 32d4b7bdf9..ca59309548 100644
--- a/arm_compute/runtime/CL/functions/CLTableLookup.h
+++ b/arm_compute/runtime/CL/functions/CLTableLookup.h
@@ -28,6 +28,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
class ICLLut;
diff --git a/arm_compute/runtime/CL/functions/CLThreshold.h b/arm_compute/runtime/CL/functions/CLThreshold.h
index f3af122f0a..2c9213bd01 100644
--- a/arm_compute/runtime/CL/functions/CLThreshold.h
+++ b/arm_compute/runtime/CL/functions/CLThreshold.h
@@ -33,6 +33,7 @@
namespace arm_compute
{
// Forward declarations
+class CLCompileContext;
class ICLTensor;
/** Basic function to run @ref CLThresholdKernel */
diff --git a/arm_compute/runtime/CL/functions/CLTile.h b/arm_compute/runtime/CL/functions/CLTile.h
index d2f1e9730c..69743693ff 100644
--- a/arm_compute/runtime/CL/functions/CLTile.h
+++ b/arm_compute/runtime/CL/functions/CLTile.h
@@ -24,13 +24,14 @@
#ifndef ARM_COMPUTE_CLTILE_H
#define ARM_COMPUTE_CLTILE_H
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLTileKernel */
class CLTile : public ICLSimpleFunction
diff --git a/arm_compute/runtime/CL/functions/CLTranspose.h b/arm_compute/runtime/CL/functions/CLTranspose.h
index 9ba7cafce4..2b7a03f23f 100644
--- a/arm_compute/runtime/CL/functions/CLTranspose.h
+++ b/arm_compute/runtime/CL/functions/CLTranspose.h
@@ -24,11 +24,14 @@
#ifndef ARM_COMPUTE_CLTRANSPOSE_H
#define ARM_COMPUTE_CLTRANSPOSE_H
+#include "arm_compute/core/Error.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to transpose a matrix on OpenCL. This function calls the following OpenCL kernel:
*
diff --git a/arm_compute/runtime/CL/functions/CLUpsampleLayer.h b/arm_compute/runtime/CL/functions/CLUpsampleLayer.h
index 07b4c8aecb..88b293069d 100644
--- a/arm_compute/runtime/CL/functions/CLUpsampleLayer.h
+++ b/arm_compute/runtime/CL/functions/CLUpsampleLayer.h
@@ -26,13 +26,17 @@
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
+#include <memory>
+
namespace arm_compute
{
+class CLCompileContext;
+class CLUpsampleLayerKernel;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLUpsampleLayerKernel */
class CLUpsampleLayer : public IFunction
@@ -49,7 +53,7 @@ public:
/** Allow instances of this class to be moved */
CLUpsampleLayer &operator=(CLUpsampleLayer &&) = default;
/** Default destructor */
- virtual ~CLUpsampleLayer() = default;
+ ~CLUpsampleLayer();
/** Initialize the function's source, destination, interpolation type and border_mode.
*
@@ -86,8 +90,8 @@ public:
void run() override;
private:
- CLUpsampleLayerKernel _upsample;
- ICLTensor *_output;
+ std::unique_ptr<CLUpsampleLayerKernel> _upsample;
+ ICLTensor *_output;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLUPSAMPLELAYER_H */
diff --git a/arm_compute/runtime/CL/functions/CLWarpAffine.h b/arm_compute/runtime/CL/functions/CLWarpAffine.h
index eb7c05be84..153e9bfdfc 100644
--- a/arm_compute/runtime/CL/functions/CLWarpAffine.h
+++ b/arm_compute/runtime/CL/functions/CLWarpAffine.h
@@ -31,6 +31,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to run @ref CLWarpAffineKernel for AFFINE transformation */
diff --git a/arm_compute/runtime/CL/functions/CLWarpPerspective.h b/arm_compute/runtime/CL/functions/CLWarpPerspective.h
index 2a1f78093e..5c8b5425a4 100644
--- a/arm_compute/runtime/CL/functions/CLWarpPerspective.h
+++ b/arm_compute/runtime/CL/functions/CLWarpPerspective.h
@@ -31,6 +31,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to run @ref CLWarpPerspectiveKernel for PERSPECTIVE transformation */
diff --git a/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h
index 602f644230..9ced69c1bb 100644
--- a/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h
@@ -24,8 +24,6 @@
#ifndef ARM_COMPUTE_CLWINOGRADCONVOLUTIONLAYER_H
#define ARM_COMPUTE_CLWINOGRADCONVOLUTIONLAYER_H
-#include "arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h"
-#include "arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/functions/CLGEMM.h"
#include "arm_compute/runtime/CL/functions/CLWinogradInputTransform.h"
@@ -33,7 +31,11 @@
namespace arm_compute
{
+class CLCompileContext;
+class CLWinogradFilterTransformKernel;
+class CLWinogradOutputTransformKernel;
class ICLTensor;
+class ITensorInfo;
/** Basic function to execute Winograd-based convolution on OpenCL. This function calls the following OpenCL functions/kernels:
*
@@ -56,6 +58,8 @@ public:
CLWinogradConvolutionLayer &operator=(const CLWinogradConvolutionLayer &) = delete;
/** Default move assignment operator */
CLWinogradConvolutionLayer &operator=(CLWinogradConvolutionLayer &&) = default;
+ /** Default destructor */
+ ~CLWinogradConvolutionLayer();
/** Set the input and output tensors.
*
* @note: This function only works with 3x3,3x1,1x3,5x5,5x1,1x5,7x1 and 1x7 kernels along with unit strides for both NCHW and NHWC data layout
@@ -122,16 +126,16 @@ public:
void prepare() override;
private:
- MemoryGroup _memory_group;
- CLGEMM _batched_mm;
- CLWinogradInputTransform _input_transform;
- CLWinogradFilterTransformKernel _filter_transform;
- CLWinogradOutputTransformKernel _output_transform;
- CLTensor _input0;
- CLTensor _input1;
- CLTensor _batched_mm_output;
- const ICLTensor *_original_weights;
- bool _is_prepared;
+ MemoryGroup _memory_group;
+ CLGEMM _batched_mm;
+ CLWinogradInputTransform _input_transform;
+ std::unique_ptr<CLWinogradFilterTransformKernel> _filter_transform;
+ std::unique_ptr<CLWinogradOutputTransformKernel> _output_transform;
+ CLTensor _input0;
+ CLTensor _input1;
+ CLTensor _batched_mm_output;
+ const ICLTensor *_original_weights;
+ bool _is_prepared;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLWINOGRADCONVOLUTIONLAYER_H */
diff --git a/arm_compute/runtime/CL/functions/CLWinogradInputTransform.h b/arm_compute/runtime/CL/functions/CLWinogradInputTransform.h
index 351f88012f..8cd809cc1f 100644
--- a/arm_compute/runtime/CL/functions/CLWinogradInputTransform.h
+++ b/arm_compute/runtime/CL/functions/CLWinogradInputTransform.h
@@ -31,7 +31,9 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to execute a @ref CLWinogradInputTransformKernel. */
class CLWinogradInputTransform : public ICLSimpleFunction
diff --git a/arm_compute/runtime/CL/functions/CLYOLOLayer.h b/arm_compute/runtime/CL/functions/CLYOLOLayer.h
index 3e403f44bd..48ee4ea4f7 100644
--- a/arm_compute/runtime/CL/functions/CLYOLOLayer.h
+++ b/arm_compute/runtime/CL/functions/CLYOLOLayer.h
@@ -24,13 +24,14 @@
#ifndef ARM_COMPUTE_CLYOLOLAYER_H
#define ARM_COMPUTE_CLYOLOLAYER_H
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLYOLOLayerKernel that performs a partial activation on the input
*
diff --git a/arm_compute/runtime/IOperator.h b/arm_compute/runtime/IOperator.h
index 0097383115..fd285160e9 100644
--- a/arm_compute/runtime/IOperator.h
+++ b/arm_compute/runtime/IOperator.h
@@ -24,14 +24,13 @@
#ifndef ARM_COMPUTE_IOPERATOR_H
#define ARM_COMPUTE_IOPERATOR_H
-#include "arm_compute/core/ITensorPack.h"
#include "arm_compute/core/experimental/Types.h"
-#include "arm_compute/runtime/IOperator.h"
#include "arm_compute/runtime/IRuntimeContext.h"
#include "arm_compute/runtime/Types.h"
namespace arm_compute
{
+class ITensorPack;
namespace experimental
{
/** Base class for all async functions */
diff --git a/docs/04_adding_operator.dox b/docs/04_adding_operator.dox
index cf2e78368d..13be712549 100644
--- a/docs/04_adding_operator.dox
+++ b/docs/04_adding_operator.dox
@@ -80,7 +80,7 @@ There are specific interfaces for OpenCL and Neon: @ref ICLKernel, INEKernel (us
There are two others implementation of @ref IKernel called @ref ICLSimpleKernel and INESimpleKernel, they are the interface for simple kernels that have just one input tensor and one output tensor.
Creating a new kernel implies adding new files:
-- arm_compute/core/CL/kernels/CLReshapeLayerKernel.h
+- src/core/CL/kernels/CLReshapeLayerKernel.h
- src/core/CL/cl_kernels/reshape_layer.cl
- src/core/CL/kernels/CLReshapeLayerKernel.cpp
- src/core/CL/CLKernelLibrary.cpp
@@ -90,16 +90,16 @@ Neon kernel
- src/core/NEON/kernels/NEReshapeLayerKernel.cpp
We must register the new layer in the respective libraries:
-- arm_compute/core/CL/CLKernels.h
+- src/core/CL/CLKernels.h
- arm_compute/core/NEON/NEKernels.h
These files contain the list of all kernels available in the corresponding Compute Library's backend, for example CLKernels:
@code{.cpp}
...
-#include "arm_compute/core/CL/kernels/CLMinMaxLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h"
+#include "src/core/CL/kernels/CLMinMaxLayerKernel.h"
+#include "src/core/CL/kernels/CLMinMaxLocationKernel.h"
...
-#include "arm_compute/core/CL/kernels/CLReshapeLayerKernel.h"
+#include "src/core/CL/kernels/CLReshapeLayerKernel.h"
...
@endcode
diff --git a/docs/ComputeLibrary.dir b/docs/ComputeLibrary.dir
index 93be52afd7..7733e531cd 100644
--- a/docs/ComputeLibrary.dir
+++ b/docs/ComputeLibrary.dir
@@ -23,18 +23,10 @@
* @brief Manages all the OpenCL kernels compilation and caching, provides accessors for the OpenCL Context.
*/
-/** @file arm_compute/core/CL/CLKernels.h
- * @brief Includes all the OpenCL kernels at once
- */
-
/** @file arm_compute/core/CL/OpenCL.h
* @brief Wrapper to configure the Khronos OpenCL C++ header
*/
-/** @dir arm_compute/core/CL/kernels
- * @brief Folder containing all the OpenCL kernels
- */
-
/** @dir arm_compute/core/CPP
* @brief CPP backend core: kernels and utilities.
*/
@@ -283,7 +275,7 @@
* @brief Folder containing all the configuration files for GEMM
*/
-/** @dir src/core/CL/cl_kernels
+/** @dir src/core/CL/kernels
* @brief All the OpenCL kernels
*/
diff --git a/examples/cl_cache.cpp b/examples/cl_cache.cpp
index 37e1c270d7..6de62f7c5d 100644
--- a/examples/cl_cache.cpp
+++ b/examples/cl_cache.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,12 +21,12 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/runtime/CL/CLFunctions.h"
-
+#include "arm_compute/core/CL/OpenCL.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/CL/Utils.h"
+#include "arm_compute/runtime/CL/functions/CLPermute.h"
#include "utils/Utils.h"
using namespace arm_compute;
diff --git a/examples/cl_convolution.cpp b/examples/cl_convolution.cpp
index 34b3466f77..bfa53f3379 100644
--- a/examples/cl_convolution.cpp
+++ b/examples/cl_convolution.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,9 +25,10 @@
#error "This example needs to be built with -DARM_COMPUTE_CL"
#endif /* ARM_COMPUTE_CL */
+#include "arm_compute/core/CL/OpenCL.h"
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLFunctions.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "arm_compute/runtime/CL/functions/CLConvolution.h"
#include "utils/ImageLoader.h"
#include "utils/Utils.h"
diff --git a/examples/cl_events.cpp b/examples/cl_events.cpp
index f578180869..27c063cbc9 100644
--- a/examples/cl_events.cpp
+++ b/examples/cl_events.cpp
@@ -26,8 +26,10 @@
#endif /* ARM_COMPUTE_CL */
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLFunctions.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "arm_compute/runtime/CL/functions/CLGaussian5x5.h"
+#include "arm_compute/runtime/CL/functions/CLMedian3x3.h"
+#include "arm_compute/runtime/CL/functions/CLScale.h"
#include "utils/ImageLoader.h"
#include "utils/Utils.h"
diff --git a/examples/cl_sgemm.cpp b/examples/cl_sgemm.cpp
index 7d3b4fe97f..27af228954 100644
--- a/examples/cl_sgemm.cpp
+++ b/examples/cl_sgemm.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,9 +26,9 @@
#endif /* ARM_COMPUTE_CL */
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLFunctions.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/CL/CLTuner.h"
+#include "arm_compute/runtime/CL/functions/CLGEMM.h"
#include "utils/Utils.h"
#include <cstdlib>
diff --git a/examples/gemm_tuner/cl_gemm_native.cpp b/examples/gemm_tuner/cl_gemm_native.cpp
index 43035082a4..02f144ea12 100644
--- a/examples/gemm_tuner/cl_gemm_native.cpp
+++ b/examples/gemm_tuner/cl_gemm_native.cpp
@@ -26,14 +26,13 @@
#endif /* ARM_COMPUTE_CL */
#include "CommonGemmExampleOptions.h"
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/KernelDescriptors.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/runtime/CL/CLFunctions.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/CL/CLTuner.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h"
#include "tests/CL/Helper.h"
#include "utils/Utils.h"
#include "utils/command_line/CommandLineOptions.h"
diff --git a/examples/gemm_tuner/cl_gemm_reshaped.cpp b/examples/gemm_tuner/cl_gemm_reshaped.cpp
index 92fa990c87..a4d6203d5c 100644
--- a/examples/gemm_tuner/cl_gemm_reshaped.cpp
+++ b/examples/gemm_tuner/cl_gemm_reshaped.cpp
@@ -25,17 +25,16 @@
#error "This example needs to be built with -DARM_COMPUTE_CL"
#endif /* ARM_COMPUTE_CL */
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/KernelDescriptors.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/runtime/CL/CLFunctions.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/CL/CLTuner.h"
#include "examples/gemm_tuner/CommonGemmExampleOptions.h"
#include "examples/gemm_tuner/GemmTunerHelpers.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
#include "tests/CL/Helper.h"
#include "utils/Utils.h"
#include "utils/command_line/CommandLineOptions.h"
diff --git a/examples/gemm_tuner/cl_gemm_reshaped_rhs_only.cpp b/examples/gemm_tuner/cl_gemm_reshaped_rhs_only.cpp
index 3a760018e1..cf65d0dd33 100644
--- a/examples/gemm_tuner/cl_gemm_reshaped_rhs_only.cpp
+++ b/examples/gemm_tuner/cl_gemm_reshaped_rhs_only.cpp
@@ -27,14 +27,13 @@
#include "CommonGemmExampleOptions.h"
#include "GemmTunerHelpers.h"
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/KernelDescriptors.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/runtime/CL/CLFunctions.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/CL/CLTuner.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h"
#include "tests/CL/Helper.h"
#include "utils/Utils.h"
#include "utils/command_line/CommandLineOptions.h"
diff --git a/examples/neoncl_scale_median_gaussian.cpp b/examples/neoncl_scale_median_gaussian.cpp
index df0eb9620f..948aff23bb 100644
--- a/examples/neoncl_scale_median_gaussian.cpp
+++ b/examples/neoncl_scale_median_gaussian.cpp
@@ -26,8 +26,9 @@
#endif /* ARM_COMPUTE_CL */
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLFunctions.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "arm_compute/runtime/CL/functions/CLGaussian5x5.h"
+#include "arm_compute/runtime/CL/functions/CLScale.h"
#include "arm_compute/runtime/NEON/NEFunctions.h"
#include "utils/ImageLoader.h"
#include "utils/Utils.h"
diff --git a/src/core/CL/CLKernels.h b/src/core/CL/CLKernels.h
new file mode 100644
index 0000000000..282cc96dd8
--- /dev/null
+++ b/src/core/CL/CLKernels.h
@@ -0,0 +1,163 @@
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLKERNELS_H
+#define ARM_COMPUTE_CLKERNELS_H
+
+/* Header regrouping all the CL kernels */
+#include "src/core/CL/kernels/CLAbsoluteDifferenceKernel.h"
+#include "src/core/CL/kernels/CLAccumulateKernel.h"
+#include "src/core/CL/kernels/CLActivationLayerKernel.h"
+#include "src/core/CL/kernels/CLArgMinMaxLayerKernel.h"
+#include "src/core/CL/kernels/CLBatchConcatenateLayerKernel.h"
+#include "src/core/CL/kernels/CLBatchNormalizationLayerKernel.h"
+#include "src/core/CL/kernels/CLBatchToSpaceLayerKernel.h"
+#include "src/core/CL/kernels/CLBitwiseAndKernel.h"
+#include "src/core/CL/kernels/CLBitwiseNotKernel.h"
+#include "src/core/CL/kernels/CLBitwiseOrKernel.h"
+#include "src/core/CL/kernels/CLBitwiseXorKernel.h"
+#include "src/core/CL/kernels/CLBoundingBoxTransformKernel.h"
+#include "src/core/CL/kernels/CLBox3x3Kernel.h"
+#include "src/core/CL/kernels/CLCannyEdgeKernel.h"
+#include "src/core/CL/kernels/CLChannelCombineKernel.h"
+#include "src/core/CL/kernels/CLChannelExtractKernel.h"
+#include "src/core/CL/kernels/CLChannelShuffleLayerKernel.h"
+#include "src/core/CL/kernels/CLCol2ImKernel.h"
+#include "src/core/CL/kernels/CLColorConvertKernel.h"
+#include "src/core/CL/kernels/CLComparisonKernel.h"
+#include "src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h"
+#include "src/core/CL/kernels/CLConvolutionKernel.h"
+#include "src/core/CL/kernels/CLCopyKernel.h"
+#include "src/core/CL/kernels/CLCropKernel.h"
+#include "src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h"
+#include "src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h"
+#include "src/core/CL/kernels/CLDepthConcatenateLayerKernel.h"
+#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
+#include "src/core/CL/kernels/CLDepthToSpaceLayerKernel.h"
+#include "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h"
+#include "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h"
+#include "src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h"
+#include "src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h"
+#include "src/core/CL/kernels/CLDequantizationLayerKernel.h"
+#include "src/core/CL/kernels/CLDerivativeKernel.h"
+#include "src/core/CL/kernels/CLDilateKernel.h"
+#include "src/core/CL/kernels/CLDirectConvolutionLayerKernel.h"
+#include "src/core/CL/kernels/CLElementWiseUnaryLayerKernel.h"
+#include "src/core/CL/kernels/CLElementwiseOperationKernel.h"
+#include "src/core/CL/kernels/CLErodeKernel.h"
+#include "src/core/CL/kernels/CLFFTDigitReverseKernel.h"
+#include "src/core/CL/kernels/CLFFTRadixStageKernel.h"
+#include "src/core/CL/kernels/CLFFTScaleKernel.h"
+#include "src/core/CL/kernels/CLFastCornersKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLFlattenLayerKernel.h"
+#include "src/core/CL/kernels/CLFloorKernel.h"
+#include "src/core/CL/kernels/CLFuseBatchNormalizationKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLGatherKernel.h"
+#include "src/core/CL/kernels/CLGaussian3x3Kernel.h"
+#include "src/core/CL/kernels/CLGaussian5x5Kernel.h"
+#include "src/core/CL/kernels/CLGaussianPyramidKernel.h"
+#include "src/core/CL/kernels/CLGenerateProposalsLayerKernel.h"
+#include "src/core/CL/kernels/CLHOGDescriptorKernel.h"
+#include "src/core/CL/kernels/CLHOGDetectorKernel.h"
+#include "src/core/CL/kernels/CLHarrisCornersKernel.h"
+#include "src/core/CL/kernels/CLHeightConcatenateLayerKernel.h"
+#include "src/core/CL/kernels/CLHistogramKernel.h"
+#include "src/core/CL/kernels/CLIm2ColKernel.h"
+#include "src/core/CL/kernels/CLInstanceNormalizationLayerKernel.h"
+#include "src/core/CL/kernels/CLIntegralImageKernel.h"
+#include "src/core/CL/kernels/CLL2NormalizeLayerKernel.h"
+#include "src/core/CL/kernels/CLLKTrackerKernel.h"
+#include "src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h"
+#include "src/core/CL/kernels/CLMagnitudePhaseKernel.h"
+#include "src/core/CL/kernels/CLMaxUnpoolingLayerKernel.h"
+#include "src/core/CL/kernels/CLMeanStdDevKernel.h"
+#include "src/core/CL/kernels/CLMeanStdDevNormalizationKernel.h"
+#include "src/core/CL/kernels/CLMedian3x3Kernel.h"
+#include "src/core/CL/kernels/CLMemsetKernel.h"
+#include "src/core/CL/kernels/CLMinMaxLayerKernel.h"
+#include "src/core/CL/kernels/CLMinMaxLocationKernel.h"
+#include "src/core/CL/kernels/CLNonLinearFilterKernel.h"
+#include "src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h"
+#include "src/core/CL/kernels/CLNormalizationLayerKernel.h"
+#include "src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h"
+#include "src/core/CL/kernels/CLPadLayerKernel.h"
+#include "src/core/CL/kernels/CLPermuteKernel.h"
+#include "src/core/CL/kernels/CLPixelWiseMultiplicationKernel.h"
+#include "src/core/CL/kernels/CLPoolingLayerKernel.h"
+#include "src/core/CL/kernels/CLPriorBoxLayerKernel.h"
+#include "src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h"
+#include "src/core/CL/kernels/CLQuantizationLayerKernel.h"
+#include "src/core/CL/kernels/CLROIAlignLayerKernel.h"
+#include "src/core/CL/kernels/CLROIPoolingLayerKernel.h"
+#include "src/core/CL/kernels/CLRangeKernel.h"
+#include "src/core/CL/kernels/CLReductionOperationKernel.h"
+#include "src/core/CL/kernels/CLRemapKernel.h"
+#include "src/core/CL/kernels/CLReorgLayerKernel.h"
+#include "src/core/CL/kernels/CLReshapeLayerKernel.h"
+#include "src/core/CL/kernels/CLReverseKernel.h"
+#include "src/core/CL/kernels/CLScaleKernel.h"
+#include "src/core/CL/kernels/CLScharr3x3Kernel.h"
+#include "src/core/CL/kernels/CLSelectKernel.h"
+#include "src/core/CL/kernels/CLSobel3x3Kernel.h"
+#include "src/core/CL/kernels/CLSobel5x5Kernel.h"
+#include "src/core/CL/kernels/CLSobel7x7Kernel.h"
+#include "src/core/CL/kernels/CLSoftmaxLayerKernel.h"
+#include "src/core/CL/kernels/CLSpaceToBatchLayerKernel.h"
+#include "src/core/CL/kernels/CLSpaceToDepthLayerKernel.h"
+#include "src/core/CL/kernels/CLStackLayerKernel.h"
+#include "src/core/CL/kernels/CLStridedSliceKernel.h"
+#include "src/core/CL/kernels/CLTableLookupKernel.h"
+#include "src/core/CL/kernels/CLThresholdKernel.h"
+#include "src/core/CL/kernels/CLTileKernel.h"
+#include "src/core/CL/kernels/CLTransposeKernel.h"
+#include "src/core/CL/kernels/CLUpsampleLayerKernel.h"
+#include "src/core/CL/kernels/CLWarpAffineKernel.h"
+#include "src/core/CL/kernels/CLWarpPerspectiveKernel.h"
+#include "src/core/CL/kernels/CLWeightsReshapeKernel.h"
+#include "src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h"
+#include "src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h"
+#include "src/core/CL/kernels/CLWidthConcatenateLayerKernel.h"
+#include "src/core/CL/kernels/CLWinogradFilterTransformKernel.h"
+#include "src/core/CL/kernels/CLWinogradInputTransformKernel.h"
+#include "src/core/CL/kernels/CLWinogradOutputTransformKernel.h"
+#include "src/core/CL/kernels/CLYOLOLayerKernel.h"
+#include "src/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h"
+
+#endif /* ARM_COMPUTE_CLKERNELS_H */
diff --git a/src/core/CL/CLTracePoint.cpp b/src/core/CL/CLTracePoint.cpp
index 631cb84878..d603f40c26 100644
--- a/src/core/CL/CLTracePoint.cpp
+++ b/src/core/CL/CLTracePoint.cpp
@@ -23,6 +23,7 @@
*/
#include "arm_compute/core/TracePoint.h"
+#include "arm_compute/core/CL/CLTypes.h"
#include "arm_compute/core/CL/ICLArray.h"
#include "arm_compute/core/CL/ICLDistribution1D.h"
#include "arm_compute/core/CL/ICLHOG.h"
@@ -30,7 +31,6 @@
#include "arm_compute/core/CL/ICLMultiHOG.h"
#include "arm_compute/core/CL/ICLMultiImage.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLLKTrackerKernel.h"
#include "utils/TypePrinter.h"
#include <vector>
diff --git a/src/core/CL/CLValidate.h b/src/core/CL/CLValidate.h
index cbbdf2d9d2..7b5294e452 100644
--- a/src/core/CL/CLValidate.h
+++ b/src/core/CL/CLValidate.h
@@ -24,6 +24,7 @@
#ifndef ARM_COMPUTE_CL_VALIDATE_H
#define ARM_COMPUTE_CL_VALIDATE_H
+#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/Validate.h"
namespace arm_compute
diff --git a/src/core/CL/ICLKernel.cpp b/src/core/CL/ICLKernel.cpp
index f91510b4a7..2b259bf28a 100644
--- a/src/core/CL/ICLKernel.cpp
+++ b/src/core/CL/ICLKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Helpers.h"
diff --git a/arm_compute/core/CL/ICLKernel.h b/src/core/CL/ICLKernel.h
index a24cd8c798..a24cd8c798 100644
--- a/arm_compute/core/CL/ICLKernel.h
+++ b/src/core/CL/ICLKernel.h
diff --git a/src/core/CL/ICLSimple2DKernel.cpp b/src/core/CL/ICLSimple2DKernel.cpp
index dfef5822b2..5d8295bdfe 100644
--- a/src/core/CL/ICLSimple2DKernel.cpp
+++ b/src/core/CL/ICLSimple2DKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
+#include "src/core/CL/ICLSimple2DKernel.h"
#include "src/core/helpers/WindowHelpers.h"
diff --git a/arm_compute/core/CL/ICLSimple2DKernel.h b/src/core/CL/ICLSimple2DKernel.h
index 86561cd562..5246492401 100644
--- a/arm_compute/core/CL/ICLSimple2DKernel.h
+++ b/src/core/CL/ICLSimple2DKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_ICLSIMPLE2DKERNEL_H
#define ARM_COMPUTE_ICLSIMPLE2DKERNEL_H
-#include "arm_compute/core/CL/ICLSimpleKernel.h"
+#include "src/core/CL/ICLSimpleKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/ICLSimple3DKernel.cpp b/src/core/CL/ICLSimple3DKernel.cpp
index 3d08262b5f..fef1a86125 100644
--- a/src/core/CL/ICLSimple3DKernel.cpp
+++ b/src/core/CL/ICLSimple3DKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/ICLSimple3DKernel.h"
+#include "src/core/CL/ICLSimple3DKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Helpers.h"
diff --git a/arm_compute/core/CL/ICLSimple3DKernel.h b/src/core/CL/ICLSimple3DKernel.h
index 3b4eaf7350..ff0b274663 100644
--- a/arm_compute/core/CL/ICLSimple3DKernel.h
+++ b/src/core/CL/ICLSimple3DKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_ICLSIMPLE3DKERNEL_H
#define ARM_COMPUTE_ICLSIMPLE3DKERNEL_H
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
+#include "src/core/CL/ICLSimple2DKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/ICLSimpleKernel.cpp b/src/core/CL/ICLSimpleKernel.cpp
index 90b5be8069..d67fefdf71 100644
--- a/src/core/CL/ICLSimpleKernel.cpp
+++ b/src/core/CL/ICLSimpleKernel.cpp
@@ -21,8 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/ICLSimpleKernel.h"
-
+#include "src/core/CL/ICLSimpleKernel.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/IAccessWindow.h"
#include "arm_compute/core/Validate.h"
diff --git a/arm_compute/core/CL/ICLSimpleKernel.h b/src/core/CL/ICLSimpleKernel.h
index 805342f830..b35547a217 100644
--- a/arm_compute/core/CL/ICLSimpleKernel.h
+++ b/src/core/CL/ICLSimpleKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,9 +24,9 @@
#ifndef ARM_COMPUTE_ICLSIMPLEKERNEL_H
#define ARM_COMPUTE_ICLSIMPLEKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Helpers.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp b/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp
index 29745beee7..76b60cb9f8 100644
--- a/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp
+++ b/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp
@@ -21,14 +21,14 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h"
-
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Validate.h"
+
+#include "src/core/CL/kernels/CLAbsoluteDifferenceKernel.h"
#include "src/core/helpers/WindowHelpers.h"
#include <set>
diff --git a/arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h b/src/core/CL/kernels/CLAbsoluteDifferenceKernel.h
index f62855cbb9..28f28fe44f 100644
--- a/arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h
+++ b/src/core/CL/kernels/CLAbsoluteDifferenceKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLABSOLUTEDIFFERENCEKERNEL_H
#define ARM_COMPUTE_CLABSOLUTEDIFFERENCEKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLAccumulateKernel.cpp b/src/core/CL/kernels/CLAccumulateKernel.cpp
index f161906646..b0a8eba644 100644
--- a/src/core/CL/kernels/CLAccumulateKernel.cpp
+++ b/src/core/CL/kernels/CLAccumulateKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLAccumulateKernel.h"
+#include "src/core/CL/kernels/CLAccumulateKernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
diff --git a/arm_compute/core/CL/kernels/CLAccumulateKernel.h b/src/core/CL/kernels/CLAccumulateKernel.h
index e067da084f..16a715319d 100644
--- a/arm_compute/core/CL/kernels/CLAccumulateKernel.h
+++ b/src/core/CL/kernels/CLAccumulateKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLACCUMULATEKERNEL_H
#define ARM_COMPUTE_CLACCUMULATEKERNEL_H
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
+#include "src/core/CL/ICLSimple2DKernel.h"
#include <cstdint>
diff --git a/src/core/CL/kernels/CLActivationLayerKernel.cpp b/src/core/CL/kernels/CLActivationLayerKernel.cpp
index f0e3047796..8ddf8d8f9e 100644
--- a/src/core/CL/kernels/CLActivationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLActivationLayerKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLActivationLayerKernel.h"
+#include "src/core/CL/kernels/CLActivationLayerKernel.h"
#include "arm_compute/core/CL/CLCoreRuntimeContext.h"
#include "arm_compute/core/CL/CLHelpers.h"
diff --git a/arm_compute/core/CL/kernels/CLActivationLayerKernel.h b/src/core/CL/kernels/CLActivationLayerKernel.h
index 81d4ccb065..821418f835 100644
--- a/arm_compute/core/CL/kernels/CLActivationLayerKernel.h
+++ b/src/core/CL/kernels/CLActivationLayerKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLACTIVATIONLAYERKERNEL_H
#define ARM_COMPUTE_CLACTIVATIONLAYERKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp b/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp
index b5a801a97f..0e6fc6599c 100644
--- a/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp
+++ b/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLArgMinMaxLayerKernel.h"
+#include "src/core/CL/kernels/CLArgMinMaxLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLArgMinMaxLayerKernel.h b/src/core/CL/kernels/CLArgMinMaxLayerKernel.h
index 48876c0b56..929677f905 100644
--- a/arm_compute/core/CL/kernels/CLArgMinMaxLayerKernel.h
+++ b/src/core/CL/kernels/CLArgMinMaxLayerKernel.h
@@ -24,8 +24,8 @@
#ifndef ARM_COMPUTE_CLARGMINMAXLAYERKERNEL_H
#define ARM_COMPUTE_CLARGMINMAXLAYERKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp
index 7a8c9ad0fb..7e9424f58b 100644
--- a/src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp
+++ b/src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h"
+#include "src/core/CL/kernels/CLBatchConcatenateLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h b/src/core/CL/kernels/CLBatchConcatenateLayerKernel.h
index bb8968ca83..54a89eb243 100644
--- a/arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h
+++ b/src/core/CL/kernels/CLBatchConcatenateLayerKernel.h
@@ -25,8 +25,8 @@
#ifndef ARM_COMPUTE_CLBATCHCONCATENATEKERNEL_H
#define ARM_COMPUTE_CLBATCHCONCATENATEKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp
index 09b668d6cd..9aeca3bcfe 100644
--- a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h"
+#include "src/core/CL/kernels/CLBatchNormalizationLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.h
index c556a0c6f4..743f4a9594 100644
--- a/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h
+++ b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLBATCHNORMALIZATIONLAYERKERNEL_H
#define ARM_COMPUTE_CLBATCHNORMALIZATIONLAYERKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp b/src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp
index e5997fb4d2..da41feb7b8 100644
--- a/src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp
+++ b/src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLBatchToSpaceLayerKernel.h"
+#include "src/core/CL/kernels/CLBatchToSpaceLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/ICLTensor.h"
diff --git a/arm_compute/core/CL/kernels/CLBatchToSpaceLayerKernel.h b/src/core/CL/kernels/CLBatchToSpaceLayerKernel.h
index 7af88d8986..131a43e59c 100644
--- a/arm_compute/core/CL/kernels/CLBatchToSpaceLayerKernel.h
+++ b/src/core/CL/kernels/CLBatchToSpaceLayerKernel.h
@@ -24,8 +24,8 @@
#ifndef ARM_COMPUTE_CLBATCHTOSPACELAYERKERNEL_H
#define ARM_COMPUTE_CLBATCHTOSPACELAYERKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLBitwiseAndKernel.cpp b/src/core/CL/kernels/CLBitwiseAndKernel.cpp
index 53a438dcf6..91a659284a 100644
--- a/src/core/CL/kernels/CLBitwiseAndKernel.cpp
+++ b/src/core/CL/kernels/CLBitwiseAndKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLBitwiseAndKernel.h"
+#include "src/core/CL/kernels/CLBitwiseAndKernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
diff --git a/arm_compute/core/CL/kernels/CLBitwiseAndKernel.h b/src/core/CL/kernels/CLBitwiseAndKernel.h
index e291f08b9a..01018ee09d 100644
--- a/arm_compute/core/CL/kernels/CLBitwiseAndKernel.h
+++ b/src/core/CL/kernels/CLBitwiseAndKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLBITWISEANDKERNEL_H
#define ARM_COMPUTE_CLBITWISEANDKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLBitwiseNotKernel.cpp b/src/core/CL/kernels/CLBitwiseNotKernel.cpp
index 08e4c54957..118bfe8139 100644
--- a/src/core/CL/kernels/CLBitwiseNotKernel.cpp
+++ b/src/core/CL/kernels/CLBitwiseNotKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLBitwiseNotKernel.h"
+#include "src/core/CL/kernels/CLBitwiseNotKernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
diff --git a/arm_compute/core/CL/kernels/CLBitwiseNotKernel.h b/src/core/CL/kernels/CLBitwiseNotKernel.h
index f57bbf4778..bf68bc7ae5 100644
--- a/arm_compute/core/CL/kernels/CLBitwiseNotKernel.h
+++ b/src/core/CL/kernels/CLBitwiseNotKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLBITWISENOTKERNEL_H
#define ARM_COMPUTE_CLBITWISENOTKERNEL_H
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
+#include "src/core/CL/ICLSimple2DKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLBitwiseOrKernel.cpp b/src/core/CL/kernels/CLBitwiseOrKernel.cpp
index 0e2e5d4f3c..8954d9aa6d 100644
--- a/src/core/CL/kernels/CLBitwiseOrKernel.cpp
+++ b/src/core/CL/kernels/CLBitwiseOrKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLBitwiseOrKernel.h"
+#include "src/core/CL/kernels/CLBitwiseOrKernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
diff --git a/arm_compute/core/CL/kernels/CLBitwiseOrKernel.h b/src/core/CL/kernels/CLBitwiseOrKernel.h
index 944224ecb9..c27d0c27e2 100644
--- a/arm_compute/core/CL/kernels/CLBitwiseOrKernel.h
+++ b/src/core/CL/kernels/CLBitwiseOrKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLBITWISEORKERNEL_H
#define ARM_COMPUTE_CLBITWISEORKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLBitwiseXorKernel.cpp b/src/core/CL/kernels/CLBitwiseXorKernel.cpp
index 65b17c02bd..69eb38e2e6 100644
--- a/src/core/CL/kernels/CLBitwiseXorKernel.cpp
+++ b/src/core/CL/kernels/CLBitwiseXorKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLBitwiseXorKernel.h"
+#include "src/core/CL/kernels/CLBitwiseXorKernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
diff --git a/arm_compute/core/CL/kernels/CLBitwiseXorKernel.h b/src/core/CL/kernels/CLBitwiseXorKernel.h
index 732ae8659e..b4861ea757 100644
--- a/arm_compute/core/CL/kernels/CLBitwiseXorKernel.h
+++ b/src/core/CL/kernels/CLBitwiseXorKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLBITWISEXORKERNEL_H
#define ARM_COMPUTE_CLBITWISEXORKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp b/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp
index b8c0d2f2b8..bcfd9b8e5a 100644
--- a/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp
+++ b/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h"
+#include "src/core/CL/kernels/CLBoundingBoxTransformKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h b/src/core/CL/kernels/CLBoundingBoxTransformKernel.h
index 4e8c5a6f18..08f350e86a 100644
--- a/arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h
+++ b/src/core/CL/kernels/CLBoundingBoxTransformKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLBOUNDINGBOXTRANSFORMKERNEL_H
#define ARM_COMPUTE_CLBOUNDINGBOXTRANSFORMKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLBox3x3Kernel.cpp b/src/core/CL/kernels/CLBox3x3Kernel.cpp
index 2f6c09df0b..9f493b4fb8 100644
--- a/src/core/CL/kernels/CLBox3x3Kernel.cpp
+++ b/src/core/CL/kernels/CLBox3x3Kernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLBox3x3Kernel.h"
+#include "src/core/CL/kernels/CLBox3x3Kernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
diff --git a/arm_compute/core/CL/kernels/CLBox3x3Kernel.h b/src/core/CL/kernels/CLBox3x3Kernel.h
index 1a8572dd68..2373c4a928 100644
--- a/arm_compute/core/CL/kernels/CLBox3x3Kernel.h
+++ b/src/core/CL/kernels/CLBox3x3Kernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLBOX3X3KERNEL_H
#define ARM_COMPUTE_CLBOX3X3KERNEL_H
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
+#include "src/core/CL/ICLSimple2DKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLCannyEdgeKernel.cpp b/src/core/CL/kernels/CLCannyEdgeKernel.cpp
index c76ec6769e..1fe944c8a2 100644
--- a/src/core/CL/kernels/CLCannyEdgeKernel.cpp
+++ b/src/core/CL/kernels/CLCannyEdgeKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLCannyEdgeKernel.h"
+#include "src/core/CL/kernels/CLCannyEdgeKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLCannyEdgeKernel.h b/src/core/CL/kernels/CLCannyEdgeKernel.h
index c4d0297aef..7543822d8d 100644
--- a/arm_compute/core/CL/kernels/CLCannyEdgeKernel.h
+++ b/src/core/CL/kernels/CLCannyEdgeKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLCANNYEDGEKERNEL_H
#define ARM_COMPUTE_CLCANNYEDGEKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLChannelCombineKernel.cpp b/src/core/CL/kernels/CLChannelCombineKernel.cpp
index d574f352ae..52ba9dd065 100644
--- a/src/core/CL/kernels/CLChannelCombineKernel.cpp
+++ b/src/core/CL/kernels/CLChannelCombineKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLChannelCombineKernel.h"
+#include "src/core/CL/kernels/CLChannelCombineKernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLMultiImage.h"
diff --git a/arm_compute/core/CL/kernels/CLChannelCombineKernel.h b/src/core/CL/kernels/CLChannelCombineKernel.h
index f9c33df7c1..f19995aa8e 100644
--- a/arm_compute/core/CL/kernels/CLChannelCombineKernel.h
+++ b/src/core/CL/kernels/CLChannelCombineKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLCHANNELCOMBINEKERNEL_H
#define ARM_COMPUTE_CLCHANNELCOMBINEKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
#include <array>
#include <cstdint>
diff --git a/src/core/CL/kernels/CLChannelExtractKernel.cpp b/src/core/CL/kernels/CLChannelExtractKernel.cpp
index 7911b948ae..cbf504b98b 100644
--- a/src/core/CL/kernels/CLChannelExtractKernel.cpp
+++ b/src/core/CL/kernels/CLChannelExtractKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLChannelExtractKernel.h"
+#include "src/core/CL/kernels/CLChannelExtractKernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLMultiImage.h"
diff --git a/arm_compute/core/CL/kernels/CLChannelExtractKernel.h b/src/core/CL/kernels/CLChannelExtractKernel.h
index 1ccf38bb8c..37abde548c 100644
--- a/arm_compute/core/CL/kernels/CLChannelExtractKernel.h
+++ b/src/core/CL/kernels/CLChannelExtractKernel.h
@@ -24,8 +24,8 @@
#ifndef ARM_COMPUTE_CLCHANNELEXTRACTKERNEL_H
#define ARM_COMPUTE_CLCHANNELEXTRACTKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
#include <cstdint>
diff --git a/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp b/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp
index 301a762850..c969792c3e 100644
--- a/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp
+++ b/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLChannelShuffleLayerKernel.h"
+#include "src/core/CL/kernels/CLChannelShuffleLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLChannelShuffleLayerKernel.h b/src/core/CL/kernels/CLChannelShuffleLayerKernel.h
index bf58525248..31c007f17e 100644
--- a/arm_compute/core/CL/kernels/CLChannelShuffleLayerKernel.h
+++ b/src/core/CL/kernels/CLChannelShuffleLayerKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLCHANNELSHUFFLELAYERKERNEL_H
#define ARM_COMPUTE_CLCHANNELSHUFFLELAYERKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLCol2ImKernel.cpp b/src/core/CL/kernels/CLCol2ImKernel.cpp
index 3dc007d9e0..44b8471725 100644
--- a/src/core/CL/kernels/CLCol2ImKernel.cpp
+++ b/src/core/CL/kernels/CLCol2ImKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLCol2ImKernel.h"
+#include "src/core/CL/kernels/CLCol2ImKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLCol2ImKernel.h b/src/core/CL/kernels/CLCol2ImKernel.h
index c3a1ff3a50..710e048bca 100644
--- a/arm_compute/core/CL/kernels/CLCol2ImKernel.h
+++ b/src/core/CL/kernels/CLCol2ImKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLCOL2IMKERNEL_H
#define ARM_COMPUTE_CLCOL2IMKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLColorConvertKernel.cpp b/src/core/CL/kernels/CLColorConvertKernel.cpp
index 0f82d87348..6c61fec997 100644
--- a/src/core/CL/kernels/CLColorConvertKernel.cpp
+++ b/src/core/CL/kernels/CLColorConvertKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLColorConvertKernel.h"
+#include "src/core/CL/kernels/CLColorConvertKernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLMultiImage.h"
diff --git a/arm_compute/core/CL/kernels/CLColorConvertKernel.h b/src/core/CL/kernels/CLColorConvertKernel.h
index d57bb3de03..0f082914cd 100644
--- a/arm_compute/core/CL/kernels/CLColorConvertKernel.h
+++ b/src/core/CL/kernels/CLColorConvertKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLCOLORCONVERTKERNEL_H
#define ARM_COMPUTE_CLCOLORCONVERTKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLComparisonKernel.cpp b/src/core/CL/kernels/CLComparisonKernel.cpp
index 2b72946f49..e2aee36bd8 100644
--- a/src/core/CL/kernels/CLComparisonKernel.cpp
+++ b/src/core/CL/kernels/CLComparisonKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLComparisonKernel.h"
+#include "src/core/CL/kernels/CLComparisonKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/ICLTensor.h"
diff --git a/arm_compute/core/CL/kernels/CLComparisonKernel.h b/src/core/CL/kernels/CLComparisonKernel.h
index bbf5f19e2f..0b94190183 100644
--- a/arm_compute/core/CL/kernels/CLComparisonKernel.h
+++ b/src/core/CL/kernels/CLComparisonKernel.h
@@ -24,8 +24,8 @@
#ifndef ARM_COMPUTE_CLCOMPARISONKERNEL_H
#define ARM_COMPUTE_CLCOMPARISONKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp b/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp
index c7888c9c76..dcf4e6662e 100644
--- a/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp
+++ b/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h"
+#include "src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h b/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h
index 5d9e9bdd85..d1da793df2 100644
--- a/arm_compute/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h
+++ b/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLCONVERTFULLYCONNECTEDWEIGHTSKERNEL_H
#define ARM_COMPUTE_CLCONVERTFULLYCONNECTEDWEIGHTSKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLConvolutionKernel.cpp b/src/core/CL/kernels/CLConvolutionKernel.cpp
index 48b185f78d..21f1047cc6 100644
--- a/src/core/CL/kernels/CLConvolutionKernel.cpp
+++ b/src/core/CL/kernels/CLConvolutionKernel.cpp
@@ -21,11 +21,10 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLConvolutionKernel.h"
+#include "src/core/CL/kernels/CLConvolutionKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
#include "arm_compute/core/Error.h"
@@ -33,6 +32,7 @@
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/CL/ICLKernel.h"
#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
diff --git a/arm_compute/core/CL/kernels/CLConvolutionKernel.h b/src/core/CL/kernels/CLConvolutionKernel.h
index 0f500fb63a..33e73caf11 100644
--- a/arm_compute/core/CL/kernels/CLConvolutionKernel.h
+++ b/src/core/CL/kernels/CLConvolutionKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLCONVOLUTIONKERNEL_H
#define ARM_COMPUTE_CLCONVOLUTIONKERNEL_H
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
+#include "src/core/CL/ICLSimple2DKernel.h"
#include <cstdint>
diff --git a/src/core/CL/kernels/CLCopyKernel.cpp b/src/core/CL/kernels/CLCopyKernel.cpp
index 184b80caa8..ca38b65df4 100644
--- a/src/core/CL/kernels/CLCopyKernel.cpp
+++ b/src/core/CL/kernels/CLCopyKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLCopyKernel.h"
+#include "src/core/CL/kernels/CLCopyKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLCopyKernel.h b/src/core/CL/kernels/CLCopyKernel.h
index 5c91e27935..9a20b88884 100644
--- a/arm_compute/core/CL/kernels/CLCopyKernel.h
+++ b/src/core/CL/kernels/CLCopyKernel.h
@@ -24,8 +24,8 @@
#ifndef ARM_COMPUTE_CLCOPYKERNEL_H
#define ARM_COMPUTE_CLCOPYKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLCropKernel.cpp b/src/core/CL/kernels/CLCropKernel.cpp
index 2c99d46929..9cf15ff93b 100644
--- a/src/core/CL/kernels/CLCropKernel.cpp
+++ b/src/core/CL/kernels/CLCropKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLCropKernel.h"
+#include "src/core/CL/kernels/CLCropKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLCropKernel.h b/src/core/CL/kernels/CLCropKernel.h
index 91d70e6c1b..cbfada58ab 100644
--- a/arm_compute/core/CL/kernels/CLCropKernel.h
+++ b/src/core/CL/kernels/CLCropKernel.h
@@ -24,8 +24,8 @@
#ifndef ARM_COMPUTE_CLCROPKERNEL_H
#define ARM_COMPUTE_CLCROPKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp
index 9ba3dc3d8f..d01a00d61e 100644
--- a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp
+++ b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h"
+#include "src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h
index 84265a2329..e0d1322341 100644
--- a/arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h
+++ b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLEKERNEL_H
#define ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLEKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.cpp b/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.cpp
index 1514d906dc..ea22ec0067 100644
--- a/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.cpp
+++ b/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h"
+#include "src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h b/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h
index 688c943593..ce354fa86f 100644
--- a/arm_compute/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h
+++ b/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLDECONVOLUTIONLAYERRESHAPEOUTPUTKERNEL_H
#define ARM_COMPUTE_CLDECONVOLUTIONLAYERRESHAPEOUTPUTKERNEL_H
-#include "arm_compute/core/CL/ICLSimpleKernel.h"
+#include "src/core/CL/ICLSimpleKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp
index cb5d727e9b..78adfd202f 100644
--- a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h"
+#include "src/core/CL/kernels/CLDepthConcatenateLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.h
index d8493bc5d8..6c73bd4bf4 100644
--- a/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h
+++ b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.h
@@ -25,8 +25,8 @@
#ifndef ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H
#define ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp b/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp
index 452a14bd29..c98d66f390 100644
--- a/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h"
+#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h b/src/core/CL/kernels/CLDepthConvertLayerKernel.h
index 7f9696d835..8b511c6707 100644
--- a/arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h
+++ b/src/core/CL/kernels/CLDepthConvertLayerKernel.h
@@ -24,8 +24,8 @@
#ifndef ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H
#define ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H
-#include "arm_compute/core/CL/ICLSimple3DKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLSimple3DKernel.h"
#include <cstdint>
diff --git a/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp b/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp
index c3a40a286a..8946f2a713 100644
--- a/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h"
+#include "src/core/CL/kernels/CLDepthToSpaceLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/ICLTensor.h"
diff --git a/arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h b/src/core/CL/kernels/CLDepthToSpaceLayerKernel.h
index 1bd1e8e763..1f7f77b569 100644
--- a/arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h
+++ b/src/core/CL/kernels/CLDepthToSpaceLayerKernel.h
@@ -24,8 +24,8 @@
#ifndef ARM_COMPUTE_CLDEPTHTOSPACELAYERKERNEL_H
#define ARM_COMPUTE_CLDEPTHTOSPACELAYERKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp
index 7958230aac..c928677f30 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp
@@ -21,11 +21,10 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h"
+#include "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
@@ -34,6 +33,7 @@
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "src/core/AccessWindowStatic.h"
#include "src/core/CL/CLValidate.h"
+#include "src/core/CL/ICLKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h
index 93e7e374b0..45b5869676 100644
--- a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNCHWKERNEL3x3_H
#define ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNCHWKERNEL3x3_H
-#include "arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h"
+#include "src/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp
index 876ef1ec5d..0b673ccdba 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp
@@ -21,11 +21,10 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h"
+#include "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
@@ -34,6 +33,7 @@
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "src/core/AccessWindowStatic.h"
#include "src/core/CL/CLValidate.h"
+#include "src/core/CL/ICLKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h
index db57439de0..ce0bf5ceb3 100644
--- a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNHWCKERNEL3x3_H
#define ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNHWCKERNEL3x3_H
-#include "arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h"
+#include "src/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp
index 4580968d38..748f4a3848 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp
@@ -21,11 +21,10 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h"
+#include "src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
@@ -33,6 +32,7 @@
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "src/core/CL/CLValidate.h"
+#include "src/core/CL/ICLKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h b/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h
index 03a0106cc9..325f4e7067 100644
--- a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H
#define ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
#include "arm_compute/core/KernelDescriptors.h"
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.cpp
index 0ff3c520ba..b10c23bde9 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.cpp
@@ -21,11 +21,10 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h"
+#include "src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
@@ -33,6 +32,7 @@
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "src/core/AccessWindowStatic.h"
#include "src/core/CL/CLValidate.h"
+#include "src/core/CL/ICLKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h b/src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h
index 51aaf17600..650fe9a11b 100644
--- a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERRESHAPEWEIGHTSKERNEL_H
#define ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERRESHAPEWEIGHTSKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp
index e2c49fbf66..3723c651fe 100644
--- a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h"
+#include "src/core/CL/kernels/CLDequantizationLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h b/src/core/CL/kernels/CLDequantizationLayerKernel.h
index 7a582da132..5579b5bc71 100644
--- a/arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h
+++ b/src/core/CL/kernels/CLDequantizationLayerKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLDEQUANTIZATIONLAYERKERNEL_H
#define ARM_COMPUTE_CLDEQUANTIZATIONLAYERKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLDerivativeKernel.cpp b/src/core/CL/kernels/CLDerivativeKernel.cpp
index 659a7cb209..5ff11362cc 100644
--- a/src/core/CL/kernels/CLDerivativeKernel.cpp
+++ b/src/core/CL/kernels/CLDerivativeKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLDerivativeKernel.h"
+#include "src/core/CL/kernels/CLDerivativeKernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
diff --git a/arm_compute/core/CL/kernels/CLDerivativeKernel.h b/src/core/CL/kernels/CLDerivativeKernel.h
index b49905a5e6..14dd05d084 100644
--- a/arm_compute/core/CL/kernels/CLDerivativeKernel.h
+++ b/src/core/CL/kernels/CLDerivativeKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLDERIVATIVEKERNEL_H
#define ARM_COMPUTE_CLDERIVATIVEKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLDilateKernel.cpp b/src/core/CL/kernels/CLDilateKernel.cpp
index 1e59c349e7..cac5bc1c72 100644
--- a/src/core/CL/kernels/CLDilateKernel.cpp
+++ b/src/core/CL/kernels/CLDilateKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLDilateKernel.h"
+#include "src/core/CL/kernels/CLDilateKernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
diff --git a/arm_compute/core/CL/kernels/CLDilateKernel.h b/src/core/CL/kernels/CLDilateKernel.h
index 747f8fa5ca..591ec8ccfc 100644
--- a/arm_compute/core/CL/kernels/CLDilateKernel.h
+++ b/src/core/CL/kernels/CLDilateKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLDILATEKERNEL_H
#define ARM_COMPUTE_CLDILATEKERNEL_H
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
+#include "src/core/CL/ICLSimple2DKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp
index 161b221e81..a642eabc4e 100644
--- a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h"
+#include "src/core/CL/kernels/CLDirectConvolutionLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.h
index 5281a0c306..5cd674f631 100644
--- a/arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h
+++ b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.h
@@ -24,8 +24,8 @@
#ifndef ARM_COMPUTE_CLDIRECTCONVOLUTIONLAYERKERNEL_H
#define ARM_COMPUTE_CLDIRECTCONVOLUTIONLAYERKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp b/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp
index bff0db07a0..38a7f1bae1 100644
--- a/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp
+++ b/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLElementWiseUnaryLayerKernel.h"
+#include "src/core/CL/kernels/CLElementWiseUnaryLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/ICLTensor.h"
diff --git a/arm_compute/core/CL/kernels/CLElementWiseUnaryLayerKernel.h b/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.h
index 82cd953b68..95b5872796 100644
--- a/arm_compute/core/CL/kernels/CLElementWiseUnaryLayerKernel.h
+++ b/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.h
@@ -24,9 +24,9 @@
#ifndef ARM_COMPUTE_CLELEMENTWISEUNARYLAYERKERNEL_H
#define ARM_COMPUTE_CLELEMENTWISEUNARYLAYERKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/CL/ICLSimpleKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLSimpleKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLElementwiseOperationKernel.cpp b/src/core/CL/kernels/CLElementwiseOperationKernel.cpp
index da28f3d886..896ee119c1 100644
--- a/src/core/CL/kernels/CLElementwiseOperationKernel.cpp
+++ b/src/core/CL/kernels/CLElementwiseOperationKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h"
+#include "src/core/CL/kernels/CLElementwiseOperationKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/ICLTensor.h"
diff --git a/arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h b/src/core/CL/kernels/CLElementwiseOperationKernel.h
index 80737cb8eb..75030cf3a3 100644
--- a/arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h
+++ b/src/core/CL/kernels/CLElementwiseOperationKernel.h
@@ -24,8 +24,8 @@
#ifndef ARM_COMPUTE_CLELEMENTWISEOPERATIONKERNEL_H
#define ARM_COMPUTE_CLELEMENTWISEOPERATIONKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLErodeKernel.cpp b/src/core/CL/kernels/CLErodeKernel.cpp
index 29a32979a3..f6d98a5488 100644
--- a/src/core/CL/kernels/CLErodeKernel.cpp
+++ b/src/core/CL/kernels/CLErodeKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLErodeKernel.h"
+#include "src/core/CL/kernels/CLErodeKernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
diff --git a/arm_compute/core/CL/kernels/CLErodeKernel.h b/src/core/CL/kernels/CLErodeKernel.h
index 620201d625..4da97ae358 100644
--- a/arm_compute/core/CL/kernels/CLErodeKernel.h
+++ b/src/core/CL/kernels/CLErodeKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLERODEKERNEL_H
#define ARM_COMPUTE_CLERODEKERNEL_H
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
+#include "src/core/CL/ICLSimple2DKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLFFTDigitReverseKernel.cpp b/src/core/CL/kernels/CLFFTDigitReverseKernel.cpp
index 0478f550f9..922e50aa73 100644
--- a/src/core/CL/kernels/CLFFTDigitReverseKernel.cpp
+++ b/src/core/CL/kernels/CLFFTDigitReverseKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h"
+#include "src/core/CL/kernels/CLFFTDigitReverseKernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
diff --git a/arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h b/src/core/CL/kernels/CLFFTDigitReverseKernel.h
index a196c8c64f..2e2f1bdff4 100644
--- a/arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h
+++ b/src/core/CL/kernels/CLFFTDigitReverseKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLFFTDIGITREVERSEKERNEL_H
#define ARM_COMPUTE_CLFFTDIGITREVERSEKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
#include "arm_compute/core/KernelDescriptors.h"
diff --git a/src/core/CL/kernels/CLFFTRadixStageKernel.cpp b/src/core/CL/kernels/CLFFTRadixStageKernel.cpp
index 7b17a227e1..0f06640b64 100644
--- a/src/core/CL/kernels/CLFFTRadixStageKernel.cpp
+++ b/src/core/CL/kernels/CLFFTRadixStageKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h"
+#include "src/core/CL/kernels/CLFFTRadixStageKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h b/src/core/CL/kernels/CLFFTRadixStageKernel.h
index d6d6067bc4..c3cc510bdd 100644
--- a/arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h
+++ b/src/core/CL/kernels/CLFFTRadixStageKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLFFTRADIXSTAGEKERNEL_H
#define ARM_COMPUTE_CLFFTRADIXSTAGEKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
#include "arm_compute/core/KernelDescriptors.h"
diff --git a/src/core/CL/kernels/CLFFTScaleKernel.cpp b/src/core/CL/kernels/CLFFTScaleKernel.cpp
index 49fcbb6c7b..4dbe8d2e86 100644
--- a/src/core/CL/kernels/CLFFTScaleKernel.cpp
+++ b/src/core/CL/kernels/CLFFTScaleKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLFFTScaleKernel.h"
+#include "src/core/CL/kernels/CLFFTScaleKernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
diff --git a/arm_compute/core/CL/kernels/CLFFTScaleKernel.h b/src/core/CL/kernels/CLFFTScaleKernel.h
index c6dd176f58..cb007e5307 100644
--- a/arm_compute/core/CL/kernels/CLFFTScaleKernel.h
+++ b/src/core/CL/kernels/CLFFTScaleKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLFFTSCALEKERNEL_H
#define ARM_COMPUTE_CLFFTSCALEKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
#include "arm_compute/core/KernelDescriptors.h"
diff --git a/src/core/CL/kernels/CLFastCornersKernel.cpp b/src/core/CL/kernels/CLFastCornersKernel.cpp
index ebdfd2741f..7481fd1c27 100644
--- a/src/core/CL/kernels/CLFastCornersKernel.cpp
+++ b/src/core/CL/kernels/CLFastCornersKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLFastCornersKernel.h"
+#include "src/core/CL/kernels/CLFastCornersKernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
diff --git a/arm_compute/core/CL/kernels/CLFastCornersKernel.h b/src/core/CL/kernels/CLFastCornersKernel.h
index 5d0da7d5d5..0c1b564c2f 100644
--- a/arm_compute/core/CL/kernels/CLFastCornersKernel.h
+++ b/src/core/CL/kernels/CLFastCornersKernel.h
@@ -25,8 +25,8 @@
#define ARM_COMPUTE_CLFASTCORNERSKERNEL_H
#include "arm_compute/core/CL/ICLArray.h"
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
#include <cstdint>
diff --git a/src/core/CL/kernels/CLFillBorderKernel.cpp b/src/core/CL/kernels/CLFillBorderKernel.cpp
index e92619a242..5d77c291d7 100644
--- a/src/core/CL/kernels/CLFillBorderKernel.cpp
+++ b/src/core/CL/kernels/CLFillBorderKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLFillBorderKernel.h b/src/core/CL/kernels/CLFillBorderKernel.h
index 5323af4c0e..7951f48171 100644
--- a/arm_compute/core/CL/kernels/CLFillBorderKernel.h
+++ b/src/core/CL/kernels/CLFillBorderKernel.h
@@ -24,9 +24,9 @@
#ifndef ARM_COMPUTE_CLFILLBORDERKERNEL_H
#define ARM_COMPUTE_CLFILLBORDERKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLFlattenLayerKernel.cpp b/src/core/CL/kernels/CLFlattenLayerKernel.cpp
index 590fcee6fd..b3f84b6928 100644
--- a/src/core/CL/kernels/CLFlattenLayerKernel.cpp
+++ b/src/core/CL/kernels/CLFlattenLayerKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLFlattenLayerKernel.h"
+#include "src/core/CL/kernels/CLFlattenLayerKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
diff --git a/arm_compute/core/CL/kernels/CLFlattenLayerKernel.h b/src/core/CL/kernels/CLFlattenLayerKernel.h
index 4df0b33c8e..2471cf2e4a 100644
--- a/arm_compute/core/CL/kernels/CLFlattenLayerKernel.h
+++ b/src/core/CL/kernels/CLFlattenLayerKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLFLATTENLAYERKERNEL_H
#define ARM_COMPUTE_CLFLATTENLAYERKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLFloorKernel.cpp b/src/core/CL/kernels/CLFloorKernel.cpp
index 8884f3fe36..2af0089bf0 100644
--- a/src/core/CL/kernels/CLFloorKernel.cpp
+++ b/src/core/CL/kernels/CLFloorKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLFloorKernel.h"
+#include "src/core/CL/kernels/CLFloorKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLFloorKernel.h b/src/core/CL/kernels/CLFloorKernel.h
index 3b1d3f10cc..f5635141e4 100644
--- a/arm_compute/core/CL/kernels/CLFloorKernel.h
+++ b/src/core/CL/kernels/CLFloorKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLFLOORKERNEL_H
#define ARM_COMPUTE_CLFLOORKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp b/src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp
index 357231940b..2116239080 100644
--- a/src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp
+++ b/src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLFuseBatchNormalizationKernel.h"
+#include "src/core/CL/kernels/CLFuseBatchNormalizationKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLFuseBatchNormalizationKernel.h b/src/core/CL/kernels/CLFuseBatchNormalizationKernel.h
index 3ec251c858..78b1e74cab 100644
--- a/arm_compute/core/CL/kernels/CLFuseBatchNormalizationKernel.h
+++ b/src/core/CL/kernels/CLFuseBatchNormalizationKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLFUSEBATCHNORMALIZATIONKERNEL_H
#define ARM_COMPUTE_CLFUSEBATCHNORMALIZATIONKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.cpp b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.cpp
index af7755b4e4..1f89865908 100644
--- a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h
index 2d5e4a3346..125f0c6948 100644
--- a/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h
+++ b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYNATIVEKERNEL_H
#define ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYNATIVEKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.cpp b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.cpp
index 713d822f9b..ded4b29ae7 100644
--- a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h
index f2eb447834..100100b1b1 100644
--- a/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h
+++ b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYRESHAPEDKERNEL_H
#define ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYRESHAPEDKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp
index 33fb903813..95aa30d14a 100644
--- a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
@@ -195,11 +195,11 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input0, ITe
{
ARM_COMPUTE_UNUSED(vector_sum_row, vector_sum_col, output_multipliers, bias, output_shifts);
- const GEMMLowpOutputStageInfo output_stage = gemm_info.output_stage;
- unsigned int &num_elems_processed_per_iteration_x = num_elements_processed[0];
- unsigned int &num_elems_processed_per_iteration_y = num_elements_processed[1];
- bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d;
- bool reinterpret_output_as_3d = (gemm_info.depth_output_gemm3d != 0);
+ const GEMMLowpOutputStageInfo output_stage = gemm_info.output_stage;
+ unsigned int &num_elems_processed_per_iteration_x = num_elements_processed[0];
+ unsigned int &num_elems_processed_per_iteration_y = num_elements_processed[1];
+ bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d;
+ bool reinterpret_output_as_3d = (gemm_info.depth_output_gemm3d != 0);
Window win{};
Window win_out{};
@@ -297,7 +297,7 @@ void CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel::configure(const CLCompileCon
output_multipliers != nullptr ? output_multipliers->info() : nullptr,
output_shifts != nullptr ? output_shifts->info() : nullptr));
- auto padding_info = get_padding_info({ input0, input1, output, vector_sum_col, vector_sum_row, bias, output_multipliers, output_shifts });
+ auto padding_info = get_padding_info({ input0, input1, output, vector_sum_col, vector_sum_row, bias, output_multipliers, output_shifts });
const GEMMRHSMatrixInfo rhs_info = gemm_info.rhs_info;
const GEMMLHSMatrixInfo lhs_info = gemm_info.lhs_info;
const GEMMLowpOutputStageInfo output_stage = gemm_info.output_stage;
@@ -349,7 +349,7 @@ void CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel::configure(const CLCompileCon
// we will dispatch a batched-GEMM to reduce the complexity of the address calculation within the OpenCL kernel.
// This means that the actual m used by the kernel is given by output->info()->dimension(1) and not by gemm_info.m
const unsigned int internal_m = _reinterpret_output_as_3d ? gemm_info.m : output->info()->dimension(1);
- // Calculate partial (store instead of load) M0 and partial N0 for the partial blocks at the end of a row/column if any. This is to avoid padding.
+ // Calculate partial (store instead of load) M0 and partial N0 for the partial blocks at the end of a row/column if any. This is to avoid padding.
const unsigned int partial_store_m0 = internal_m % lhs_info.m0;
const unsigned int partial_store_n0 = gemm_info.n % rhs_info.n0;
diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h
index a2295143de..222a8615e4 100644
--- a/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h
+++ b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h
@@ -24,8 +24,8 @@
#ifndef ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H
#define ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/KernelDescriptors.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp b/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp
index aa4eea60ca..c7844b9c28 100644
--- a/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Helpers.h"
diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h b/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h
index 1d3b3110b3..f8705595a0 100644
--- a/arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h
+++ b/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLGEMMLOWPOFFSETCONTRIBUTIONKERNEL_H
#define ARM_COMPUTE_CLGEMMLOWPOFFSETCONTRIBUTIONKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.cpp b/src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.cpp
index afa7bdbfdf..b41d8704bd 100644
--- a/src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/ICLTensor.h"
diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h b/src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h
index e3f88c11e6..15f54d17a5 100644
--- a/arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h
+++ b/src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLGEMMLOWPOFFSETCONTRIBUTIONOUTPUTSTAGEKERNEL_H
#define ARM_COMPUTE_CLGEMMLOWPOFFSETCONTRIBUTIONOUTPUTSTAGEKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
@@ -91,7 +91,8 @@ public:
* @param[in] output_shifts Output shifts tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM).
* Supported data types: S32
*/
- void configure(const CLCompileContext &compile_context, const ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, ICLTensor *output, int32_t k,
+ void configure(const CLCompileContext &compile_context, const ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, ICLTensor *output,
+ int32_t k,
int32_t a_offset, int32_t b_offset,
const GEMMLowpOutputStageInfo &output_stage, const ICLTensor *output_multipliers, const ICLTensor *output_shifts);
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpOffsetContributionKernel
diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.cpp
index ab1b5a2203..d0f016879e 100644
--- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/ICLTensor.h"
diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.h b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.h
index 23040e7bcc..8653102cd8 100644
--- a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.h
+++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEBYFIXEDPOINTKERNEL_H
#define ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEBYFIXEDPOINTKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.cpp
index ad5bac015b..1d29dfe4b3 100644
--- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/ICLTensor.h"
diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h
index 0b3f23dab3..0a8d5e1942 100644
--- a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h
+++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEBYFLOATKERNEL_H
#define ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEBYFLOATKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.cpp
index 8e4b291dbe..d32d328fc2 100644
--- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/ICLTensor.h"
diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h
index 1a284f0701..abdf33ea43 100644
--- a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h
+++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEKERNEL_H
#define ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp b/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp
index 339049ff9a..d508bf6f21 100644
--- a/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/ICLTensor.h"
diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h b/src/core/CL/kernels/CLGEMMLowpReductionKernel.h
index 6066e2a815..237d8099b7 100644
--- a/arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h
+++ b/src/core/CL/kernels/CLGEMMLowpReductionKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLGEMMLOWREDUCTIONKERNEL_H
#define ARM_COMPUTE_CLGEMMLOWREDUCTIONKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp
index fd0978230d..b0d08a756c 100644
--- a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h
index 4abd60c202..71d223b8ac 100644
--- a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h
+++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLGEMMMATRIXMULTIPLYKERNEL_H
#define ARM_COMPUTE_CLGEMMMATRIXMULTIPLYKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp
index cea147b10c..f613937f54 100644
--- a/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h b/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h
index 006b2bf91f..6b6004b464 100644
--- a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h
+++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLGEMMMATRIXMULTIPLYNATIVEKERNEL_H
#define ARM_COMPUTE_CLGEMMMATRIXMULTIPLYNATIVEKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
#include "arm_compute/core/KernelDescriptors.h"
diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp
index eaf57086a3..fb15b42fe2 100644
--- a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h
index 962645749e..2ffc322def 100644
--- a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h
+++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDKERNEL_H
#define ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
#include "arm_compute/core/KernelDescriptors.h"
diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp
index d53aede3c8..1f296f8e26 100644
--- a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Helpers.h"
@@ -356,10 +356,10 @@ void CLGEMMMatrixMultiplyReshapedOnlyRHSKernel::run(const Window &window, cl::Co
ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != 0);
}
- const size_t lhs_idx_batch_size = _reinterpret_input_as_3d && !_has_pad_y? 3u : 2u;
+ const size_t lhs_idx_batch_size = _reinterpret_input_as_3d && !_has_pad_y ? 3u : 2u;
const size_t rhs_idx_batch_size = 2u;
const size_t bia_idx_batch_size = 2u;
- const size_t out_idx_batch_size = _reinterpret_output_as_3d && !_has_pad_y? 3u : 2u;
+ const size_t out_idx_batch_size = _reinterpret_output_as_3d && !_has_pad_y ? 3u : 2u;
Window slice = window.first_slice_window_3D();
Window slice_matrix_b = slice;
diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h
index eab7fd219e..5b96679a46 100644
--- a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h
+++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H
#define ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
#include "arm_compute/core/KernelDescriptors.h"
diff --git a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp
index 04aa061e98..ee0abc56d3 100644
--- a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h
index 95ed87d95b..bef8c231ac 100644
--- a/arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h
+++ b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLGEMMMATRIXVECTORMULTIPLYKERNEL_H
#define ARM_COMPUTE_CLGEMMMATRIXVECTORMULTIPLYKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.cpp b/src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.cpp
index f2ad677976..3e2fc79704 100644
--- a/src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h b/src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h
index 0f74cb85e4..92202a26fc 100644
--- a/arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h
+++ b/src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLGEMMRESHAPELHSMATRIXKERNEL_H
#define ARM_COMPUTE_CLGEMMRESHAPELHSMATRIXKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.cpp b/src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.cpp
index d94e834d2c..33de61ed01 100644
--- a/src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h b/src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h
index 557f71b07d..911484ea76 100644
--- a/arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h
+++ b/src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLGEMMRESHAPERHSMATRIXKERNEL_H
#define ARM_COMPUTE_CLGEMMRESHAPERHSMATRIXKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
@@ -46,6 +46,8 @@ public:
CLGEMMReshapeRHSMatrixKernel(CLGEMMReshapeRHSMatrixKernel &&) = default;
/** Allow instances of this class to be moved */
CLGEMMReshapeRHSMatrixKernel &operator=(CLGEMMReshapeRHSMatrixKernel &&) = default;
+ /** Default destructor */
+ ~CLGEMMReshapeRHSMatrixKernel() = default;
/** Initialise the kernel's input and output.
*
* @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will guarantee the OpenCL pitch alignment for the output tensor,
diff --git a/src/core/CL/kernels/CLGatherKernel.cpp b/src/core/CL/kernels/CLGatherKernel.cpp
index a8508bed2d..9e802c20fb 100644
--- a/src/core/CL/kernels/CLGatherKernel.cpp
+++ b/src/core/CL/kernels/CLGatherKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGatherKernel.h"
+#include "src/core/CL/kernels/CLGatherKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "src/core/helpers/AutoConfiguration.h"
diff --git a/arm_compute/core/CL/kernels/CLGatherKernel.h b/src/core/CL/kernels/CLGatherKernel.h
index c8a96327b6..8f472a4696 100644
--- a/arm_compute/core/CL/kernels/CLGatherKernel.h
+++ b/src/core/CL/kernels/CLGatherKernel.h
@@ -24,8 +24,8 @@
#ifndef ARM_COMPUTE_CLGATHERKERNEL_H
#define ARM_COMPUTE_CLGATHERKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLGaussian3x3Kernel.cpp b/src/core/CL/kernels/CLGaussian3x3Kernel.cpp
index c9ed1ac0d7..40e9658ab4 100644
--- a/src/core/CL/kernels/CLGaussian3x3Kernel.cpp
+++ b/src/core/CL/kernels/CLGaussian3x3Kernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h"
+#include "src/core/CL/kernels/CLGaussian3x3Kernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
diff --git a/arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h b/src/core/CL/kernels/CLGaussian3x3Kernel.h
index a783527de4..139b05d44c 100644
--- a/arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h
+++ b/src/core/CL/kernels/CLGaussian3x3Kernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLGAUSSIAN3X3KERNEL_H
#define ARM_COMPUTE_CLGAUSSIAN3X3KERNEL_H
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
+#include "src/core/CL/ICLSimple2DKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLGaussian5x5Kernel.cpp b/src/core/CL/kernels/CLGaussian5x5Kernel.cpp
index 5b3639f025..46a7576154 100644
--- a/src/core/CL/kernels/CLGaussian5x5Kernel.cpp
+++ b/src/core/CL/kernels/CLGaussian5x5Kernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h"
+#include "src/core/CL/kernels/CLGaussian5x5Kernel.h"
#include <cstdint>
diff --git a/arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h b/src/core/CL/kernels/CLGaussian5x5Kernel.h
index e8c2268e26..711710b3b3 100644
--- a/arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h
+++ b/src/core/CL/kernels/CLGaussian5x5Kernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLGAUSSIAN5X5KERNEL_H
#define ARM_COMPUTE_CLGAUSSIAN5X5KERNEL_H
-#include "arm_compute/core/CL/kernels/CLConvolutionKernel.h"
+#include "src/core/CL/kernels/CLConvolutionKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLGaussianPyramidKernel.cpp b/src/core/CL/kernels/CLGaussianPyramidKernel.cpp
index 2686e8b32e..065f7f7e92 100644
--- a/src/core/CL/kernels/CLGaussianPyramidKernel.cpp
+++ b/src/core/CL/kernels/CLGaussianPyramidKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h"
+#include "src/core/CL/kernels/CLGaussianPyramidKernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
diff --git a/arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h b/src/core/CL/kernels/CLGaussianPyramidKernel.h
index 36e095d4d1..a6595440f6 100644
--- a/arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h
+++ b/src/core/CL/kernels/CLGaussianPyramidKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLGAUSSIANPYRAMIDKERNEL_H
#define ARM_COMPUTE_CLGAUSSIANPYRAMIDKERNEL_H
-#include "arm_compute/core/CL/ICLSimpleKernel.h"
+#include "src/core/CL/ICLSimpleKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp b/src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp
index a2fcbbab78..dd3faf50a2 100644
--- a/src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp
+++ b/src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGenerateProposalsLayerKernel.h"
+#include "src/core/CL/kernels/CLGenerateProposalsLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLGenerateProposalsLayerKernel.h b/src/core/CL/kernels/CLGenerateProposalsLayerKernel.h
index 9dfe4a42ce..d26795ac7d 100644
--- a/arm_compute/core/CL/kernels/CLGenerateProposalsLayerKernel.h
+++ b/src/core/CL/kernels/CLGenerateProposalsLayerKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLGENERATEPROPOSALSLAYERKERNEL_H
#define ARM_COMPUTE_CLGENERATEPROPOSALSLAYERKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
class ICLTensor;
diff --git a/src/core/CL/kernels/CLHOGDescriptorKernel.cpp b/src/core/CL/kernels/CLHOGDescriptorKernel.cpp
index eaf5ea4880..cd3f1ee216 100644
--- a/src/core/CL/kernels/CLHOGDescriptorKernel.cpp
+++ b/src/core/CL/kernels/CLHOGDescriptorKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h"
+#include "src/core/CL/kernels/CLHOGDescriptorKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h b/src/core/CL/kernels/CLHOGDescriptorKernel.h
index c001aa2c9f..eee2fa36bc 100644
--- a/arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h
+++ b/src/core/CL/kernels/CLHOGDescriptorKernel.h
@@ -24,9 +24,9 @@
#ifndef ARM_COMPUTE_CLHOGDESCRIPTORKERNEL_H
#define ARM_COMPUTE_CLHOGDESCRIPTORKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/IHOG.h"
#include "arm_compute/core/Size2D.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLHOGDetectorKernel.cpp b/src/core/CL/kernels/CLHOGDetectorKernel.cpp
index 6e14996732..861155b9a2 100644
--- a/src/core/CL/kernels/CLHOGDetectorKernel.cpp
+++ b/src/core/CL/kernels/CLHOGDetectorKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLHOGDetectorKernel.h"
+#include "src/core/CL/kernels/CLHOGDetectorKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLHOGDetectorKernel.h b/src/core/CL/kernels/CLHOGDetectorKernel.h
index dc9bba8f20..c28e6ebe74 100644
--- a/arm_compute/core/CL/kernels/CLHOGDetectorKernel.h
+++ b/src/core/CL/kernels/CLHOGDetectorKernel.h
@@ -26,8 +26,8 @@
#include "arm_compute/core/CL/ICLArray.h"
#include "arm_compute/core/CL/ICLHOG.h"
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/CL/OpenCL.h"
+#include "src/core/CL/ICLKernel.h"
namespace cl
{
diff --git a/src/core/CL/kernels/CLHarrisCornersKernel.cpp b/src/core/CL/kernels/CLHarrisCornersKernel.cpp
index 19c4e579a0..cbc056fb77 100644
--- a/src/core/CL/kernels/CLHarrisCornersKernel.cpp
+++ b/src/core/CL/kernels/CLHarrisCornersKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLHarrisCornersKernel.h"
+#include "src/core/CL/kernels/CLHarrisCornersKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLHarrisCornersKernel.h b/src/core/CL/kernels/CLHarrisCornersKernel.h
index 38a2f04adf..6482b0aa4e 100644
--- a/arm_compute/core/CL/kernels/CLHarrisCornersKernel.h
+++ b/src/core/CL/kernels/CLHarrisCornersKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLHARRISCORNERSKERNEL_H
#define ARM_COMPUTE_CLHARRISCORNERSKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
#include <cstdint>
diff --git a/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp
index 3f5e91e5a1..8aa7366d50 100644
--- a/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp
+++ b/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h"
+#include "src/core/CL/kernels/CLHeightConcatenateLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h b/src/core/CL/kernels/CLHeightConcatenateLayerKernel.h
index f362441944..f4cb627052 100644
--- a/arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h
+++ b/src/core/CL/kernels/CLHeightConcatenateLayerKernel.h
@@ -25,8 +25,8 @@
#ifndef ARM_COMPUTE_CLHEIGHTCONCATENATELAYERKERNEL_H
#define ARM_COMPUTE_CLHEIGHTCONCATENATELAYERKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLHistogramKernel.cpp b/src/core/CL/kernels/CLHistogramKernel.cpp
index a85429c1a0..ca5322aa51 100644
--- a/src/core/CL/kernels/CLHistogramKernel.cpp
+++ b/src/core/CL/kernels/CLHistogramKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLHistogramKernel.h"
+#include "src/core/CL/kernels/CLHistogramKernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLDistribution1D.h"
diff --git a/arm_compute/core/CL/kernels/CLHistogramKernel.h b/src/core/CL/kernels/CLHistogramKernel.h
index 7cb79db6e9..9c97c6590d 100644
--- a/arm_compute/core/CL/kernels/CLHistogramKernel.h
+++ b/src/core/CL/kernels/CLHistogramKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLHISTOGRAMKERNEL_H
#define ARM_COMPUTE_CLHISTOGRAMKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLIm2ColKernel.cpp b/src/core/CL/kernels/CLIm2ColKernel.cpp
index 76490f82f6..0789cdc8a7 100644
--- a/src/core/CL/kernels/CLIm2ColKernel.cpp
+++ b/src/core/CL/kernels/CLIm2ColKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLIm2ColKernel.h"
+#include "src/core/CL/kernels/CLIm2ColKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLIm2ColKernel.h b/src/core/CL/kernels/CLIm2ColKernel.h
index 7b7bd03108..2920c7d138 100644
--- a/arm_compute/core/CL/kernels/CLIm2ColKernel.h
+++ b/src/core/CL/kernels/CLIm2ColKernel.h
@@ -24,8 +24,8 @@
#ifndef ARM_COMPUTE_CLIM2COLKERNEL_H
#define ARM_COMPUTE_CLIM2COLKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/Size2D.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.cpp
index e97b856456..4c3b404be7 100644
--- a/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernel.h"
+#include "src/core/CL/kernels/CLInstanceNormalizationLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernel.h b/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.h
index a3fdd3c4e7..d4444f0b20 100644
--- a/arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernel.h
+++ b/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYERKERNEL_H
#define ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYERKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
#include "arm_compute/core/KernelDescriptors.h"
diff --git a/src/core/CL/kernels/CLIntegralImageKernel.cpp b/src/core/CL/kernels/CLIntegralImageKernel.cpp
index 82f6da85a5..5e5683d231 100644
--- a/src/core/CL/kernels/CLIntegralImageKernel.cpp
+++ b/src/core/CL/kernels/CLIntegralImageKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLIntegralImageKernel.h"
+#include "src/core/CL/kernels/CLIntegralImageKernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
diff --git a/arm_compute/core/CL/kernels/CLIntegralImageKernel.h b/src/core/CL/kernels/CLIntegralImageKernel.h
index cef699ab54..0e40e3afbc 100644
--- a/arm_compute/core/CL/kernels/CLIntegralImageKernel.h
+++ b/src/core/CL/kernels/CLIntegralImageKernel.h
@@ -24,8 +24,8 @@
#ifndef ARM_COMPUTE_CLINTEGRALIMAGEKERNEL_H
#define ARM_COMPUTE_CLINTEGRALIMAGEKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
+#include "src/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLSimple2DKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp b/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp
index 9936e29c5f..9e91d98f7c 100644
--- a/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp
+++ b/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h"
+#include "src/core/CL/kernels/CLL2NormalizeLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h b/src/core/CL/kernels/CLL2NormalizeLayerKernel.h
index 55fe563954..edc0585217 100644
--- a/arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h
+++ b/src/core/CL/kernels/CLL2NormalizeLayerKernel.h
@@ -24,8 +24,8 @@
#ifndef ARM_COMPUTE_CLL2NORMALIZELAYERKERNEL_H
#define ARM_COMPUTE_CLL2NORMALIZELAYERKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLLKTrackerKernel.cpp b/src/core/CL/kernels/CLLKTrackerKernel.cpp
index 0fa2e703ec..a439c2448e 100644
--- a/src/core/CL/kernels/CLLKTrackerKernel.cpp
+++ b/src/core/CL/kernels/CLLKTrackerKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLLKTrackerKernel.h"
+#include "src/core/CL/kernels/CLLKTrackerKernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLArray.h"
diff --git a/arm_compute/core/CL/kernels/CLLKTrackerKernel.h b/src/core/CL/kernels/CLLKTrackerKernel.h
index fdc2ef8333..2d2966854a 100644
--- a/arm_compute/core/CL/kernels/CLLKTrackerKernel.h
+++ b/src/core/CL/kernels/CLLKTrackerKernel.h
@@ -25,8 +25,8 @@
#define ARM_COMPUTE_CLLKTRACKERKERNEL_H
#include "arm_compute/core/CL/ICLArray.h"
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
#include <cstddef>
#include <cstdint>
@@ -35,40 +35,6 @@ namespace arm_compute
{
class ICLTensor;
-/** Internal keypoint structure for Lucas-Kanade Optical Flow */
-struct CLLKInternalKeypoint
-{
- float x{ 0.f }; /**< x coordinate of the keypoint */
- float y{ 0.f }; /**< y coordinate of the keypoint */
- float tracking_status{ 0.f }; /**< the tracking status of the keypoint */
- float dummy{ 0.f }; /**< Dummy field, to make sure the data structure 128-bit align, so that GPU can use vload4 */
-};
-
-/** Structure for storing Spatial Gradient Matrix and the minimum eigenvalue for each keypoint */
-struct CLCoefficientTable
-{
- float A11; /**< iA11 * FLT_SCALE */
- float A12; /**< iA11 * FLT_SCALE */
- float A22; /**< iA11 * FLT_SCALE */
- float min_eig; /**< Minimum eigenvalue */
-};
-
-/** Structure for storing ival, ixval and iyval for each point inside the window */
-struct CLOldValue
-{
- int16_t ival; /**< ival extracts from old image */
- int16_t ixval; /**< ixval extracts from scharr Gx image */
- int16_t iyval; /**< iyval extracts from scharr Gy image */
- int16_t dummy; /**< Dummy field, to make sure the data structure 128-bit align, so that GPU can use vload4 */
-};
-
-/** Interface for OpenCL Array of Internal Key Points. */
-using ICLLKInternalKeypointArray = ICLArray<CLLKInternalKeypoint>;
-/** Interface for OpenCL Array of Coefficient Tables. */
-using ICLCoefficientTableArray = ICLArray<CLCoefficientTable>;
-/** Interface for OpenCL Array of Old Values. */
-using ICLOldValArray = ICLArray<CLOldValue>;
-
/** Interface to run the initialization step of LKTracker */
class CLLKTrackerInitKernel : public ICLKernel
{
diff --git a/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp
index 6e4c45eab7..49e04c32c2 100644
--- a/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp
+++ b/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h"
+#include "src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/ICLTensor.h"
diff --git a/arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h b/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h
index d5653f83ea..5d0a22afa5 100644
--- a/arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h
+++ b/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLLOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H
#define ARM_COMPUTE_CLLOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLMagnitudePhaseKernel.cpp b/src/core/CL/kernels/CLMagnitudePhaseKernel.cpp
index dc130d0ff9..9845dd6169 100644
--- a/src/core/CL/kernels/CLMagnitudePhaseKernel.cpp
+++ b/src/core/CL/kernels/CLMagnitudePhaseKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h"
+#include "src/core/CL/kernels/CLMagnitudePhaseKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h b/src/core/CL/kernels/CLMagnitudePhaseKernel.h
index a741b1745a..514036b2ff 100644
--- a/arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h
+++ b/src/core/CL/kernels/CLMagnitudePhaseKernel.h
@@ -24,8 +24,8 @@
#ifndef ARM_COMPUTE_CLMAGNITUDEPHASEKERNEL_H
#define ARM_COMPUTE_CLMAGNITUDEPHASEKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLMaxUnpoolingLayerKernel.cpp b/src/core/CL/kernels/CLMaxUnpoolingLayerKernel.cpp
index a78996ddae..2a1312af94 100644
--- a/src/core/CL/kernels/CLMaxUnpoolingLayerKernel.cpp
+++ b/src/core/CL/kernels/CLMaxUnpoolingLayerKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLMaxUnpoolingLayerKernel.h"
+#include "src/core/CL/kernels/CLMaxUnpoolingLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLMaxUnpoolingLayerKernel.h b/src/core/CL/kernels/CLMaxUnpoolingLayerKernel.h
index 9d51f6b59c..86267ec0f7 100644
--- a/arm_compute/core/CL/kernels/CLMaxUnpoolingLayerKernel.h
+++ b/src/core/CL/kernels/CLMaxUnpoolingLayerKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLMAXUNPOOLINGLAYERKERNEL_H
#define ARM_COMPUTE_CLMAXUNPOOLINGLAYERKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLMeanStdDevKernel.cpp b/src/core/CL/kernels/CLMeanStdDevKernel.cpp
index 5acc3ac3d6..aed6e6eaf7 100644
--- a/src/core/CL/kernels/CLMeanStdDevKernel.cpp
+++ b/src/core/CL/kernels/CLMeanStdDevKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLMeanStdDevKernel.h"
+#include "src/core/CL/kernels/CLMeanStdDevKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLMeanStdDevKernel.h b/src/core/CL/kernels/CLMeanStdDevKernel.h
index 2a5a5f2e33..179a2025b7 100644
--- a/arm_compute/core/CL/kernels/CLMeanStdDevKernel.h
+++ b/src/core/CL/kernels/CLMeanStdDevKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLMEANSTDDEVKERNEL_H
#define ARM_COMPUTE_CLMEANSTDDEVKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace cl
{
diff --git a/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp b/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp
index 82a22a9f19..a889df7930 100644
--- a/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp
+++ b/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLMeanStdDevNormalizationKernel.h"
+#include "src/core/CL/kernels/CLMeanStdDevNormalizationKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLMeanStdDevNormalizationKernel.h b/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.h
index ff5e9ab0f7..a1ba2b905e 100644
--- a/arm_compute/core/CL/kernels/CLMeanStdDevNormalizationKernel.h
+++ b/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLMEANSTDDEVNORMALIZATIONKERNEL_H
#define ARM_COMPUTE_CLMEANSTDDEVNORMALIZATIONKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLMedian3x3Kernel.cpp b/src/core/CL/kernels/CLMedian3x3Kernel.cpp
index 4b899502f9..23a21d6b19 100644
--- a/src/core/CL/kernels/CLMedian3x3Kernel.cpp
+++ b/src/core/CL/kernels/CLMedian3x3Kernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLMedian3x3Kernel.h"
+#include "src/core/CL/kernels/CLMedian3x3Kernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
diff --git a/arm_compute/core/CL/kernels/CLMedian3x3Kernel.h b/src/core/CL/kernels/CLMedian3x3Kernel.h
index ccb475360f..8cc5ed7279 100644
--- a/arm_compute/core/CL/kernels/CLMedian3x3Kernel.h
+++ b/src/core/CL/kernels/CLMedian3x3Kernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLMEDIAN3X3KERNEL_H
#define ARM_COMPUTE_CLMEDIAN3X3KERNEL_H
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
+#include "src/core/CL/ICLSimple2DKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLMemsetKernel.cpp b/src/core/CL/kernels/CLMemsetKernel.cpp
index 186ed2a38c..2543b07a1a 100644
--- a/src/core/CL/kernels/CLMemsetKernel.cpp
+++ b/src/core/CL/kernels/CLMemsetKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLMemsetKernel.h"
+#include "src/core/CL/kernels/CLMemsetKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "src/core/helpers/WindowHelpers.h"
diff --git a/arm_compute/core/CL/kernels/CLMemsetKernel.h b/src/core/CL/kernels/CLMemsetKernel.h
index 5bda480306..dc103f580f 100644
--- a/arm_compute/core/CL/kernels/CLMemsetKernel.h
+++ b/src/core/CL/kernels/CLMemsetKernel.h
@@ -24,9 +24,9 @@
#ifndef ARM_COMPUTE_CLMEMSETKERNEL_H
#define ARM_COMPUTE_CLMEMSETKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLMinMaxLayerKernel.cpp b/src/core/CL/kernels/CLMinMaxLayerKernel.cpp
index bf645f82e9..7017efa3c2 100644
--- a/src/core/CL/kernels/CLMinMaxLayerKernel.cpp
+++ b/src/core/CL/kernels/CLMinMaxLayerKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLMinMaxLayerKernel.h"
+#include "src/core/CL/kernels/CLMinMaxLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLMinMaxLayerKernel.h b/src/core/CL/kernels/CLMinMaxLayerKernel.h
index a693cfdb27..aa2ff3f375 100644
--- a/arm_compute/core/CL/kernels/CLMinMaxLayerKernel.h
+++ b/src/core/CL/kernels/CLMinMaxLayerKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLMINMAXLAYERKERNEL_H
#define ARM_COMPUTE_CLMINMAXLAYERKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLMinMaxLocationKernel.cpp b/src/core/CL/kernels/CLMinMaxLocationKernel.cpp
index 634b58077a..675cfc19a9 100644
--- a/src/core/CL/kernels/CLMinMaxLocationKernel.cpp
+++ b/src/core/CL/kernels/CLMinMaxLocationKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h"
+#include "src/core/CL/kernels/CLMinMaxLocationKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h b/src/core/CL/kernels/CLMinMaxLocationKernel.h
index fbcf69752c..2196abe033 100644
--- a/arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h
+++ b/src/core/CL/kernels/CLMinMaxLocationKernel.h
@@ -25,7 +25,7 @@
#define ARM_COMPUTE_CLMINMAXLOCATIONKERNEL_H
#include "arm_compute/core/CL/ICLArray.h"
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
#include <array>
diff --git a/src/core/CL/kernels/CLNonLinearFilterKernel.cpp b/src/core/CL/kernels/CLNonLinearFilterKernel.cpp
index 0a8472bf04..c73acaf1d8 100644
--- a/src/core/CL/kernels/CLNonLinearFilterKernel.cpp
+++ b/src/core/CL/kernels/CLNonLinearFilterKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h"
+#include "src/core/CL/kernels/CLNonLinearFilterKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h b/src/core/CL/kernels/CLNonLinearFilterKernel.h
index cee64480b6..ed42063d2b 100644
--- a/arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h
+++ b/src/core/CL/kernels/CLNonLinearFilterKernel.h
@@ -24,8 +24,8 @@
#ifndef ARM_COMPUTE_CLNONLINEARFILTERKERNEL_H
#define ARM_COMPUTE_CLNONLINEARFILTERKERNEL_H
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLSimple2DKernel.h"
#include <cstdint>
diff --git a/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp b/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp
index 9c6d44b6c5..7d5c5ba7e1 100644
--- a/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp
+++ b/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h"
+#include "src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h b/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h
index d1bba4f480..d9ed60ce6b 100644
--- a/arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h
+++ b/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLNONMAXIMASUPPRESSION3x3KERNEL_H
#define ARM_COMPUTE_CLNONMAXIMASUPPRESSION3x3KERNEL_H
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
+#include "src/core/CL/ICLSimple2DKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLNormalizationLayerKernel.cpp
index 686e6f1b26..d1982e77b9 100644
--- a/src/core/CL/kernels/CLNormalizationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLNormalizationLayerKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h"
+#include "src/core/CL/kernels/CLNormalizationLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h b/src/core/CL/kernels/CLNormalizationLayerKernel.h
index 6233d28b0a..739a2ae9f1 100644
--- a/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h
+++ b/src/core/CL/kernels/CLNormalizationLayerKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLNORMALIZATIONLAYERKERNEL_H
#define ARM_COMPUTE_CLNORMALIZATIONLAYERKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp b/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp
index 407ce6626b..18cbe217be 100644
--- a/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp
+++ b/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h"
+#include "src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h b/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h
index 2e2e60df0b..6db4433e78 100644
--- a/arm_compute/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h
+++ b/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLNORMALIZEPLANARYUVLAYERKERNEL_H
#define ARM_COMPUTE_CLNORMALIZEPLANARYUVLAYERKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLPadLayerKernel.cpp b/src/core/CL/kernels/CLPadLayerKernel.cpp
index 45729738fb..485676667c 100644
--- a/src/core/CL/kernels/CLPadLayerKernel.cpp
+++ b/src/core/CL/kernels/CLPadLayerKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLPadLayerKernel.h"
+#include "src/core/CL/kernels/CLPadLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/ICLTensor.h"
diff --git a/arm_compute/core/CL/kernels/CLPadLayerKernel.h b/src/core/CL/kernels/CLPadLayerKernel.h
index 5bf5841803..2b0abb18df 100644
--- a/arm_compute/core/CL/kernels/CLPadLayerKernel.h
+++ b/src/core/CL/kernels/CLPadLayerKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLPADLAYERKERNEL_H
#define ARM_COMPUTE_CLPADLAYERKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLPermuteKernel.cpp b/src/core/CL/kernels/CLPermuteKernel.cpp
index 620665791f..4d289f28e6 100644
--- a/src/core/CL/kernels/CLPermuteKernel.cpp
+++ b/src/core/CL/kernels/CLPermuteKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLPermuteKernel.h"
+#include "src/core/CL/kernels/CLPermuteKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "src/core/helpers/AutoConfiguration.h"
diff --git a/arm_compute/core/CL/kernels/CLPermuteKernel.h b/src/core/CL/kernels/CLPermuteKernel.h
index bb841b1c83..d1bb875d7a 100644
--- a/arm_compute/core/CL/kernels/CLPermuteKernel.h
+++ b/src/core/CL/kernels/CLPermuteKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLPERMUTEKERNEL_H
#define ARM_COMPUTE_CLPERMUTEKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp
index a7bd4dad60..a6255f8018 100644
--- a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp
+++ b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h"
+#include "src/core/CL/kernels/CLPixelWiseMultiplicationKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.h
index 6b5bd11bde..0cc4005875 100644
--- a/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h
+++ b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.h
@@ -24,8 +24,8 @@
#ifndef ARM_COMPUTE_CLPIXELWISEMULTIPLICATIONKERNEL_H
#define ARM_COMPUTE_CLPIXELWISEMULTIPLICATIONKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLPoolingLayerKernel.cpp b/src/core/CL/kernels/CLPoolingLayerKernel.cpp
index 0570887b91..905610c31f 100644
--- a/src/core/CL/kernels/CLPoolingLayerKernel.cpp
+++ b/src/core/CL/kernels/CLPoolingLayerKernel.cpp
@@ -21,11 +21,10 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLPoolingLayerKernel.h"
+#include "src/core/CL/kernels/CLPoolingLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
#include "arm_compute/core/Helpers.h"
@@ -34,6 +33,7 @@
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "src/core/AccessWindowStatic.h"
#include "src/core/CL/CLValidate.h"
+#include "src/core/CL/ICLKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
@@ -176,7 +176,7 @@ std::tuple<Status, Window, CLPoolingConfig> validate_and_configure_window(ITenso
case DataLayout::NHWC:
{
// Initialize border size
- border_size = BorderSize();
+ border_size = BorderSize();
num_elems_processed_per_iteration = adjust_vec_size(4, output->dimension(0));
win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration));
diff --git a/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h b/src/core/CL/kernels/CLPoolingLayerKernel.h
index 85585e4587..d88402a792 100644
--- a/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h
+++ b/src/core/CL/kernels/CLPoolingLayerKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLPOOLINGLAYERKERNEL_H
#define ARM_COMPUTE_CLPOOLINGLAYERKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
#include "arm_compute/core/Error.h"
diff --git a/src/core/CL/kernels/CLPriorBoxLayerKernel.cpp b/src/core/CL/kernels/CLPriorBoxLayerKernel.cpp
index 202e9fbb37..7b9caf0063 100644
--- a/src/core/CL/kernels/CLPriorBoxLayerKernel.cpp
+++ b/src/core/CL/kernels/CLPriorBoxLayerKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLPriorBoxLayerKernel.h"
+#include "src/core/CL/kernels/CLPriorBoxLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLPriorBoxLayerKernel.h b/src/core/CL/kernels/CLPriorBoxLayerKernel.h
index b4a69ac496..6c369a7a4e 100644
--- a/arm_compute/core/CL/kernels/CLPriorBoxLayerKernel.h
+++ b/src/core/CL/kernels/CLPriorBoxLayerKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLPRIORBOXLAYERKERNEL_H
#define ARM_COMPUTE_CLPRIORBOXLAYERKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.cpp b/src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.cpp
index ff6cc86103..3a66d084b9 100644
--- a/src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.cpp
+++ b/src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h"
+#include "src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "src/core/helpers/AutoConfiguration.h"
diff --git a/arm_compute/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h b/src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h
index 51c50bc011..31085c37ba 100644
--- a/arm_compute/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h
+++ b/src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLQLSTMLAYERVNORMALIZATIONKERNEL_H
#define ARM_COMPUTE_CLQLSTMLAYERVNORMALIZATIONKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLQuantizationLayerKernel.cpp b/src/core/CL/kernels/CLQuantizationLayerKernel.cpp
index 44889b9407..76e703f0dd 100644
--- a/src/core/CL/kernels/CLQuantizationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLQuantizationLayerKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLQuantizationLayerKernel.h"
+#include "src/core/CL/kernels/CLQuantizationLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLQuantizationLayerKernel.h b/src/core/CL/kernels/CLQuantizationLayerKernel.h
index b0144bf8b0..e9d03decb3 100644
--- a/arm_compute/core/CL/kernels/CLQuantizationLayerKernel.h
+++ b/src/core/CL/kernels/CLQuantizationLayerKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLQUANTIZATIONLAYERKERNEL_H
#define ARM_COMPUTE_CLQUANTIZATIONLAYERKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLROIAlignLayerKernel.cpp b/src/core/CL/kernels/CLROIAlignLayerKernel.cpp
index ca6c6fad1a..38eafc6e97 100644
--- a/src/core/CL/kernels/CLROIAlignLayerKernel.cpp
+++ b/src/core/CL/kernels/CLROIAlignLayerKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLROIAlignLayerKernel.h"
+#include "src/core/CL/kernels/CLROIAlignLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLROIAlignLayerKernel.h b/src/core/CL/kernels/CLROIAlignLayerKernel.h
index 6a0468d331..cbf0e00165 100644
--- a/arm_compute/core/CL/kernels/CLROIAlignLayerKernel.h
+++ b/src/core/CL/kernels/CLROIAlignLayerKernel.h
@@ -25,7 +25,7 @@
#define ARM_COMPUTE_CLROIALIGNLAYERKERNEL_H
#include "arm_compute/core/CL/ICLArray.h"
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp b/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp
index 55fe5a5321..43492a3d50 100644
--- a/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp
+++ b/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h"
+#include "src/core/CL/kernels/CLROIPoolingLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h b/src/core/CL/kernels/CLROIPoolingLayerKernel.h
index ee422e10ee..35f42a9676 100644
--- a/arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h
+++ b/src/core/CL/kernels/CLROIPoolingLayerKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLROIPOOLINGLAYERKERNEL_H
#define ARM_COMPUTE_CLROIPOOLINGLAYERKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
#include "arm_compute/core/CL/ICLArray.h"
diff --git a/src/core/CL/kernels/CLRangeKernel.cpp b/src/core/CL/kernels/CLRangeKernel.cpp
index a4c30b63c2..892f1c7c9f 100644
--- a/src/core/CL/kernels/CLRangeKernel.cpp
+++ b/src/core/CL/kernels/CLRangeKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLRangeKernel.h"
+#include "src/core/CL/kernels/CLRangeKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/ICLTensor.h"
diff --git a/arm_compute/core/CL/kernels/CLRangeKernel.h b/src/core/CL/kernels/CLRangeKernel.h
index b5c64b2480..1b94a099ed 100644
--- a/arm_compute/core/CL/kernels/CLRangeKernel.h
+++ b/src/core/CL/kernels/CLRangeKernel.h
@@ -24,8 +24,8 @@
#ifndef ARM_COMPUTE_CLRANGEKERNEL_H
#define ARM_COMPUTE_CLRANGEKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLReductionOperationKernel.cpp b/src/core/CL/kernels/CLReductionOperationKernel.cpp
index 325e4b994c..9d49a2193a 100644
--- a/src/core/CL/kernels/CLReductionOperationKernel.cpp
+++ b/src/core/CL/kernels/CLReductionOperationKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLReductionOperationKernel.h"
+#include "src/core/CL/kernels/CLReductionOperationKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLReductionOperationKernel.h b/src/core/CL/kernels/CLReductionOperationKernel.h
index 2ecd1c9fd4..ff9fd61484 100644
--- a/arm_compute/core/CL/kernels/CLReductionOperationKernel.h
+++ b/src/core/CL/kernels/CLReductionOperationKernel.h
@@ -24,8 +24,8 @@
#ifndef ARM_COMPUTE_CLREDUCTIONOPERATIONKERNEL_H
#define ARM_COMPUTE_CLREDUCTIONOPERATIONKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLRemapKernel.cpp b/src/core/CL/kernels/CLRemapKernel.cpp
index 8d3f41b35f..0ebeefcc74 100644
--- a/src/core/CL/kernels/CLRemapKernel.cpp
+++ b/src/core/CL/kernels/CLRemapKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLRemapKernel.h"
+#include "src/core/CL/kernels/CLRemapKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLRemapKernel.h b/src/core/CL/kernels/CLRemapKernel.h
index fd261cd465..8efcf091ed 100644
--- a/arm_compute/core/CL/kernels/CLRemapKernel.h
+++ b/src/core/CL/kernels/CLRemapKernel.h
@@ -24,8 +24,8 @@
#ifndef ARM_COMPUTE_CLREMAPKERNEL_H
#define ARM_COMPUTE_CLREMAPKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLReorgLayerKernel.cpp b/src/core/CL/kernels/CLReorgLayerKernel.cpp
index ade7761b91..662c790ca2 100644
--- a/src/core/CL/kernels/CLReorgLayerKernel.cpp
+++ b/src/core/CL/kernels/CLReorgLayerKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLReorgLayerKernel.h"
+#include "src/core/CL/kernels/CLReorgLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLReorgLayerKernel.h b/src/core/CL/kernels/CLReorgLayerKernel.h
index e3edc9f724..455a6170c6 100644
--- a/arm_compute/core/CL/kernels/CLReorgLayerKernel.h
+++ b/src/core/CL/kernels/CLReorgLayerKernel.h
@@ -24,8 +24,8 @@
#ifndef ARM_COMPUTE_CLREORGLAYERKERNEL_H
#define ARM_COMPUTE_CLREORGLAYERKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLReshapeLayerKernel.cpp b/src/core/CL/kernels/CLReshapeLayerKernel.cpp
index b14013bc34..58d7843624 100644
--- a/src/core/CL/kernels/CLReshapeLayerKernel.cpp
+++ b/src/core/CL/kernels/CLReshapeLayerKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLReshapeLayerKernel.h"
+#include "src/core/CL/kernels/CLReshapeLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLReshapeLayerKernel.h b/src/core/CL/kernels/CLReshapeLayerKernel.h
index 6e3f255c52..902c44649b 100644
--- a/arm_compute/core/CL/kernels/CLReshapeLayerKernel.h
+++ b/src/core/CL/kernels/CLReshapeLayerKernel.h
@@ -24,8 +24,8 @@
#ifndef ARM_COMPUTE_CLRESHAPELAYERKERNEL_H
#define ARM_COMPUTE_CLRESHAPELAYERKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLReverseKernel.cpp b/src/core/CL/kernels/CLReverseKernel.cpp
index f8240984d1..9a876258e9 100644
--- a/src/core/CL/kernels/CLReverseKernel.cpp
+++ b/src/core/CL/kernels/CLReverseKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLReverseKernel.h"
+#include "src/core/CL/kernels/CLReverseKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLReverseKernel.h b/src/core/CL/kernels/CLReverseKernel.h
index 17f1a4a20f..4a21e4f802 100644
--- a/arm_compute/core/CL/kernels/CLReverseKernel.h
+++ b/src/core/CL/kernels/CLReverseKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLREVERSEKERNEL_H
#define ARM_COMPUTE_CLREVERSEKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLScaleKernel.cpp b/src/core/CL/kernels/CLScaleKernel.cpp
index 8233f210b4..5a7d5830fd 100644
--- a/src/core/CL/kernels/CLScaleKernel.cpp
+++ b/src/core/CL/kernels/CLScaleKernel.cpp
@@ -21,11 +21,10 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLScaleKernel.h"
+#include "src/core/CL/kernels/CLScaleKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
#include "arm_compute/core/Error.h"
@@ -33,6 +32,7 @@
#include "arm_compute/core/TensorInfo.h"
#include "src/core/AccessWindowStatic.h"
#include "src/core/CL/CLValidate.h"
+#include "src/core/CL/ICLKernel.h"
#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
diff --git a/arm_compute/core/CL/kernels/CLScaleKernel.h b/src/core/CL/kernels/CLScaleKernel.h
index 79f7ed181a..a72e3938d9 100644
--- a/arm_compute/core/CL/kernels/CLScaleKernel.h
+++ b/src/core/CL/kernels/CLScaleKernel.h
@@ -24,8 +24,8 @@
#ifndef ARM_COMPUTE_CLSCALEKERNEL_H
#define ARM_COMPUTE_CLSCALEKERNEL_H
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
#include "arm_compute/core/KernelDescriptors.h"
+#include "src/core/CL/ICLSimple2DKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLScharr3x3Kernel.cpp b/src/core/CL/kernels/CLScharr3x3Kernel.cpp
index 1e33af3047..7ceddc9626 100644
--- a/src/core/CL/kernels/CLScharr3x3Kernel.cpp
+++ b/src/core/CL/kernels/CLScharr3x3Kernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLScharr3x3Kernel.h"
+#include "src/core/CL/kernels/CLScharr3x3Kernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
diff --git a/arm_compute/core/CL/kernels/CLScharr3x3Kernel.h b/src/core/CL/kernels/CLScharr3x3Kernel.h
index 1af56a764e..a670da5b6f 100644
--- a/arm_compute/core/CL/kernels/CLScharr3x3Kernel.h
+++ b/src/core/CL/kernels/CLScharr3x3Kernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLSCHARR3X3KERNEL_H
#define ARM_COMPUTE_CLSCHARR3X3KERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLSelectKernel.cpp b/src/core/CL/kernels/CLSelectKernel.cpp
index d9a1044e1f..53e5414c88 100644
--- a/src/core/CL/kernels/CLSelectKernel.cpp
+++ b/src/core/CL/kernels/CLSelectKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLSelectKernel.h"
+#include "src/core/CL/kernels/CLSelectKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLSelectKernel.h b/src/core/CL/kernels/CLSelectKernel.h
index 4015a273ea..93ae27f444 100644
--- a/arm_compute/core/CL/kernels/CLSelectKernel.h
+++ b/src/core/CL/kernels/CLSelectKernel.h
@@ -24,8 +24,8 @@
#ifndef ARM_COMPUTE_CLSELECTKERNEL_H
#define ARM_COMPUTE_CLSELECTKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLSobel3x3Kernel.cpp b/src/core/CL/kernels/CLSobel3x3Kernel.cpp
index 89e5207c44..a87677a38f 100644
--- a/src/core/CL/kernels/CLSobel3x3Kernel.cpp
+++ b/src/core/CL/kernels/CLSobel3x3Kernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLSobel3x3Kernel.h"
+#include "src/core/CL/kernels/CLSobel3x3Kernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
diff --git a/arm_compute/core/CL/kernels/CLSobel3x3Kernel.h b/src/core/CL/kernels/CLSobel3x3Kernel.h
index e24767852e..fed8068762 100644
--- a/arm_compute/core/CL/kernels/CLSobel3x3Kernel.h
+++ b/src/core/CL/kernels/CLSobel3x3Kernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLSOBEL3X3KERNEL_H
#define ARM_COMPUTE_CLSOBEL3X3KERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLSobel5x5Kernel.cpp b/src/core/CL/kernels/CLSobel5x5Kernel.cpp
index 3e765e47fb..c450becd1d 100644
--- a/src/core/CL/kernels/CLSobel5x5Kernel.cpp
+++ b/src/core/CL/kernels/CLSobel5x5Kernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLSobel5x5Kernel.h"
+#include "src/core/CL/kernels/CLSobel5x5Kernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
diff --git a/arm_compute/core/CL/kernels/CLSobel5x5Kernel.h b/src/core/CL/kernels/CLSobel5x5Kernel.h
index 82831ed14d..a163ac932a 100644
--- a/arm_compute/core/CL/kernels/CLSobel5x5Kernel.h
+++ b/src/core/CL/kernels/CLSobel5x5Kernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLSOBEL5X5KERNEL_H
#define ARM_COMPUTE_CLSOBEL5X5KERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLSobel7x7Kernel.cpp b/src/core/CL/kernels/CLSobel7x7Kernel.cpp
index 37ceaba502..1cfa74f7b3 100644
--- a/src/core/CL/kernels/CLSobel7x7Kernel.cpp
+++ b/src/core/CL/kernels/CLSobel7x7Kernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLSobel7x7Kernel.h"
+#include "src/core/CL/kernels/CLSobel7x7Kernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
diff --git a/arm_compute/core/CL/kernels/CLSobel7x7Kernel.h b/src/core/CL/kernels/CLSobel7x7Kernel.h
index d55993d1f0..c85f0aedf9 100644
--- a/arm_compute/core/CL/kernels/CLSobel7x7Kernel.h
+++ b/src/core/CL/kernels/CLSobel7x7Kernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLSOBEL7X7KERNEL_H
#define ARM_COMPUTE_CLSOBEL7X7KERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp
index 5c0acda41a..d9f498c522 100644
--- a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp
+++ b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h"
+#include "src/core/CL/kernels/CLSoftmaxLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h b/src/core/CL/kernels/CLSoftmaxLayerKernel.h
index f8c1019d53..29e0f63e46 100644
--- a/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h
+++ b/src/core/CL/kernels/CLSoftmaxLayerKernel.h
@@ -24,8 +24,8 @@
#ifndef ARM_COMPUTE_CLSOFTMAXLAYERKERNEL_H
#define ARM_COMPUTE_CLSOFTMAXLAYERKERNEL_H
-#include "arm_compute/core/CL/ICLSimple3DKernel.h"
#include "arm_compute/core/KernelDescriptors.h"
+#include "src/core/CL/ICLSimple3DKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp b/src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp
index c6f70c3c09..91b889a10a 100644
--- a/src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp
+++ b/src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLSpaceToBatchLayerKernel.h"
+#include "src/core/CL/kernels/CLSpaceToBatchLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/ICLTensor.h"
diff --git a/arm_compute/core/CL/kernels/CLSpaceToBatchLayerKernel.h b/src/core/CL/kernels/CLSpaceToBatchLayerKernel.h
index 93221f7b5a..4819c80fce 100644
--- a/arm_compute/core/CL/kernels/CLSpaceToBatchLayerKernel.h
+++ b/src/core/CL/kernels/CLSpaceToBatchLayerKernel.h
@@ -24,8 +24,8 @@
#ifndef ARM_COMPUTE_CLSPACETOBATCHLAYERKERNEL_H
#define ARM_COMPUTE_CLSPACETOBATCHLAYERKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
@@ -84,7 +84,8 @@ public:
* @param[in] padding_right The right padding of the output tensor.
* @param[out] output Tensor output. Data types supported: same as @p input
*/
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, ICLTensor *output);
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right,
+ ICLTensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref CLSpaceToBatchLayerKernel
*
* @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
diff --git a/src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp b/src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp
index 2d46aade34..1c648e0944 100644
--- a/src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp
+++ b/src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLSpaceToDepthLayerKernel.h"
+#include "src/core/CL/kernels/CLSpaceToDepthLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/ICLTensor.h"
diff --git a/arm_compute/core/CL/kernels/CLSpaceToDepthLayerKernel.h b/src/core/CL/kernels/CLSpaceToDepthLayerKernel.h
index af0aa12598..bb1ac5f9a6 100644
--- a/arm_compute/core/CL/kernels/CLSpaceToDepthLayerKernel.h
+++ b/src/core/CL/kernels/CLSpaceToDepthLayerKernel.h
@@ -24,8 +24,8 @@
#ifndef ARM_COMPUTE_CLSPACETODEPTHLAYERKERNEL_H
#define ARM_COMPUTE_CLSPACETODEPTHLAYERKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLStackLayerKernel.cpp b/src/core/CL/kernels/CLStackLayerKernel.cpp
index 5055065779..9bdcc8dc3f 100644
--- a/src/core/CL/kernels/CLStackLayerKernel.cpp
+++ b/src/core/CL/kernels/CLStackLayerKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLStackLayerKernel.h"
+#include "src/core/CL/kernels/CLStackLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLStackLayerKernel.h b/src/core/CL/kernels/CLStackLayerKernel.h
index cfefcd97dd..2865127a90 100644
--- a/arm_compute/core/CL/kernels/CLStackLayerKernel.h
+++ b/src/core/CL/kernels/CLStackLayerKernel.h
@@ -25,8 +25,8 @@
#ifndef ARM_COMPUTE_CLSTACKLAYERKERNEL_H
#define ARM_COMPUTE_CLSTACKLAYERKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLStridedSliceKernel.cpp b/src/core/CL/kernels/CLStridedSliceKernel.cpp
index b632e05d84..c87fcb9765 100644
--- a/src/core/CL/kernels/CLStridedSliceKernel.cpp
+++ b/src/core/CL/kernels/CLStridedSliceKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLStridedSliceKernel.h"
+#include "src/core/CL/kernels/CLStridedSliceKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/utils/helpers/tensor_transform.h"
diff --git a/arm_compute/core/CL/kernels/CLStridedSliceKernel.h b/src/core/CL/kernels/CLStridedSliceKernel.h
index 74311b71fa..599cf34c39 100644
--- a/arm_compute/core/CL/kernels/CLStridedSliceKernel.h
+++ b/src/core/CL/kernels/CLStridedSliceKernel.h
@@ -24,8 +24,8 @@
#ifndef ARM_COMPUTE_CL_STRIDED_SLICE_KERNEL_H
#define ARM_COMPUTE_CL_STRIDED_SLICE_KERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
#include <cstdint>
diff --git a/src/core/CL/kernels/CLTableLookupKernel.cpp b/src/core/CL/kernels/CLTableLookupKernel.cpp
index 3b8ca60ab1..b82f4c9889 100644
--- a/src/core/CL/kernels/CLTableLookupKernel.cpp
+++ b/src/core/CL/kernels/CLTableLookupKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLTableLookupKernel.h"
+#include "src/core/CL/kernels/CLTableLookupKernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLLut.h"
diff --git a/arm_compute/core/CL/kernels/CLTableLookupKernel.h b/src/core/CL/kernels/CLTableLookupKernel.h
index 9f1d28c47a..c8d15cbee2 100644
--- a/arm_compute/core/CL/kernels/CLTableLookupKernel.h
+++ b/src/core/CL/kernels/CLTableLookupKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLTABLELOOKUPKERNEL_H
#define ARM_COMPUTE_CLTABLELOOKUPKERNEL_H
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
+#include "src/core/CL/ICLSimple2DKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLThresholdKernel.cpp b/src/core/CL/kernels/CLThresholdKernel.cpp
index de81644edd..72c22f043c 100644
--- a/src/core/CL/kernels/CLThresholdKernel.cpp
+++ b/src/core/CL/kernels/CLThresholdKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLThresholdKernel.h"
+#include "src/core/CL/kernels/CLThresholdKernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
diff --git a/arm_compute/core/CL/kernels/CLThresholdKernel.h b/src/core/CL/kernels/CLThresholdKernel.h
index 7e01fd6aaa..511eaed1bf 100644
--- a/arm_compute/core/CL/kernels/CLThresholdKernel.h
+++ b/src/core/CL/kernels/CLThresholdKernel.h
@@ -24,9 +24,9 @@
#ifndef ARM_COMPUTE_CLTHRESHOLDKERNEL_H
#define ARM_COMPUTE_CLTHRESHOLDKERNEL_H
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
#include "arm_compute/core/KernelDescriptors.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLSimple2DKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLTileKernel.cpp b/src/core/CL/kernels/CLTileKernel.cpp
index 43c8953363..c0c3d2e2ee 100644
--- a/src/core/CL/kernels/CLTileKernel.cpp
+++ b/src/core/CL/kernels/CLTileKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLTileKernel.h"
+#include "src/core/CL/kernels/CLTileKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "src/core/helpers/AutoConfiguration.h"
diff --git a/arm_compute/core/CL/kernels/CLTileKernel.h b/src/core/CL/kernels/CLTileKernel.h
index 56e1df8de3..41752ca90b 100644
--- a/arm_compute/core/CL/kernels/CLTileKernel.h
+++ b/src/core/CL/kernels/CLTileKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLTILEKERNEL_H
#define ARM_COMPUTE_CLTILEKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLTransposeKernel.cpp b/src/core/CL/kernels/CLTransposeKernel.cpp
index bd910196e9..8d967e901f 100644
--- a/src/core/CL/kernels/CLTransposeKernel.cpp
+++ b/src/core/CL/kernels/CLTransposeKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLTransposeKernel.h"
+#include "src/core/CL/kernels/CLTransposeKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLTransposeKernel.h b/src/core/CL/kernels/CLTransposeKernel.h
index 4a9887f2cf..0c4b7b4aff 100644
--- a/arm_compute/core/CL/kernels/CLTransposeKernel.h
+++ b/src/core/CL/kernels/CLTransposeKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLTRANSPOSEKERNEL_H
#define ARM_COMPUTE_CLTRANSPOSEKERNEL_H
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
+#include "src/core/CL/ICLSimple2DKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLUpsampleLayerKernel.cpp b/src/core/CL/kernels/CLUpsampleLayerKernel.cpp
index a4fc10f26a..acb2fbcd04 100644
--- a/src/core/CL/kernels/CLUpsampleLayerKernel.cpp
+++ b/src/core/CL/kernels/CLUpsampleLayerKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h"
+#include "src/core/CL/kernels/CLUpsampleLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h b/src/core/CL/kernels/CLUpsampleLayerKernel.h
index b523b97233..f90ee07bf4 100644
--- a/arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h
+++ b/src/core/CL/kernels/CLUpsampleLayerKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLUPSAMPLELAYERKERNEL_H
#define ARM_COMPUTE_CLUPSAMPLELAYERKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLWarpAffineKernel.cpp b/src/core/CL/kernels/CLWarpAffineKernel.cpp
index 95a7c1b875..600c67a528 100644
--- a/src/core/CL/kernels/CLWarpAffineKernel.cpp
+++ b/src/core/CL/kernels/CLWarpAffineKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLWarpAffineKernel.h"
+#include "src/core/CL/kernels/CLWarpAffineKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLWarpAffineKernel.h b/src/core/CL/kernels/CLWarpAffineKernel.h
index 440febab96..c600ee780d 100644
--- a/arm_compute/core/CL/kernels/CLWarpAffineKernel.h
+++ b/src/core/CL/kernels/CLWarpAffineKernel.h
@@ -24,8 +24,8 @@
#ifndef ARM_COMPUTE_CLWARPAFFINEKERNEL_H
#define ARM_COMPUTE_CLWARPAFFINEKERNEL_H
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLSimple2DKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLWarpPerspectiveKernel.cpp b/src/core/CL/kernels/CLWarpPerspectiveKernel.cpp
index 2fe1feb485..5f20a0bdd3 100644
--- a/src/core/CL/kernels/CLWarpPerspectiveKernel.cpp
+++ b/src/core/CL/kernels/CLWarpPerspectiveKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h"
+#include "src/core/CL/kernels/CLWarpPerspectiveKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h b/src/core/CL/kernels/CLWarpPerspectiveKernel.h
index 6614989059..dcbe1c5560 100644
--- a/arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h
+++ b/src/core/CL/kernels/CLWarpPerspectiveKernel.h
@@ -24,8 +24,8 @@
#ifndef ARM_COMPUTE_CLWARPERSPECTIVEKERNEL_H
#define ARM_COMPUTE_CLWARPERSPECTIVEKERNEL_H
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLSimple2DKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLWeightsReshapeKernel.cpp b/src/core/CL/kernels/CLWeightsReshapeKernel.cpp
index c06c2d3ec7..559f47ce26 100644
--- a/src/core/CL/kernels/CLWeightsReshapeKernel.cpp
+++ b/src/core/CL/kernels/CLWeightsReshapeKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h"
+#include "src/core/CL/kernels/CLWeightsReshapeKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
diff --git a/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h b/src/core/CL/kernels/CLWeightsReshapeKernel.h
index c74255bac0..402a60472b 100644
--- a/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h
+++ b/src/core/CL/kernels/CLWeightsReshapeKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLWEIGHTSRESHAPEKERNEL_H
#define ARM_COMPUTE_CLWEIGHTSRESHAPEKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp b/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp
index a7a3463f59..d6697ba46b 100644
--- a/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp
+++ b/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h"
+#include "src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h b/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h
index a379b5f0b8..2af89e12eb 100644
--- a/arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h
+++ b/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h
@@ -25,8 +25,8 @@
#ifndef ARM_COMPUTE_CLWIDTHCONCATENATE_2TENSORS_KERNEL_H
#define ARM_COMPUTE_CLWIDTHCONCATENATE_2TENSORS_KERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp b/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp
index 1c8fef2db3..7ecdd30224 100644
--- a/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp
+++ b/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h"
+#include "src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h b/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h
index 6b0e8ee21d..0caf87114d 100644
--- a/arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h
+++ b/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h
@@ -25,8 +25,8 @@
#ifndef ARM_COMPUTE_CLWIDTHCONCATENATE_4TENSORS_KERNEL_H
#define ARM_COMPUTE_CLWIDTHCONCATENATE_4TENSORS_KERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp
index a9a601dc8e..30d0a481bd 100644
--- a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp
+++ b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h"
+#include "src/core/CL/kernels/CLWidthConcatenateLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.h
index 16cf167b25..09c3f4455d 100644
--- a/arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h
+++ b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.h
@@ -25,8 +25,8 @@
#ifndef ARM_COMPUTE_CLWIDTHCONCATENATELAYERKERNEL_H
#define ARM_COMPUTE_CLWIDTHCONCATENATELAYERKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp b/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp
index e2f9ca5726..bd45ddb65f 100644
--- a/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp
+++ b/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h"
+#include "src/core/CL/kernels/CLWinogradFilterTransformKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
@@ -83,7 +83,7 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
const unsigned int num_elems_processed_per_iteration_y = input->dimension(1);
const unsigned int num_elems_read_per_iteration_z = input->data_layout() == DataLayout::NCHW ? 1 : input->dimension(2);
- Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y, num_elems_read_per_iteration_z));
+ Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y, num_elems_read_per_iteration_z));
Window win_collapsed = win.collapse(win, Window::DimZ);
return std::make_pair(Status{}, win_collapsed);
}
diff --git a/arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h b/src/core/CL/kernels/CLWinogradFilterTransformKernel.h
index b689be820f..d22fedebcd 100644
--- a/arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h
+++ b/src/core/CL/kernels/CLWinogradFilterTransformKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLWINOGRADFILTERTRANSFORMKERNEL_H
#define ARM_COMPUTE_CLWINOGRADFILTERTRANSFORMKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp b/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp
index 15c239e849..6f695c93db 100644
--- a/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp
+++ b/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h"
+#include "src/core/CL/kernels/CLWinogradInputTransformKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h b/src/core/CL/kernels/CLWinogradInputTransformKernel.h
index 4f198f034a..25301877e6 100644
--- a/arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h
+++ b/src/core/CL/kernels/CLWinogradInputTransformKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLWINOGRADINPUTTRANSFORMKERNEL_H
#define ARM_COMPUTE_CLWINOGRADINPUTTRANSFORMKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp b/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp
index 89a5176756..2018559f60 100644
--- a/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp
+++ b/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h"
+#include "src/core/CL/kernels/CLWinogradOutputTransformKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h b/src/core/CL/kernels/CLWinogradOutputTransformKernel.h
index f7cbd05020..632a5629d9 100644
--- a/arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h
+++ b/src/core/CL/kernels/CLWinogradOutputTransformKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLWINOGRADOUTPUTTRANSFORMKERNEL_H
#define ARM_COMPUTE_CLWINOGRADOUTPUTTRANSFORMKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLYOLOLayerKernel.cpp b/src/core/CL/kernels/CLYOLOLayerKernel.cpp
index 0c7588d740..e12d1e7a65 100644
--- a/src/core/CL/kernels/CLYOLOLayerKernel.cpp
+++ b/src/core/CL/kernels/CLYOLOLayerKernel.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLYOLOLayerKernel.h"
+#include "src/core/CL/kernels/CLYOLOLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLHelpers.h"
diff --git a/arm_compute/core/CL/kernels/CLYOLOLayerKernel.h b/src/core/CL/kernels/CLYOLOLayerKernel.h
index 52b069868e..5b1d56e9e5 100644
--- a/arm_compute/core/CL/kernels/CLYOLOLayerKernel.h
+++ b/src/core/CL/kernels/CLYOLOLayerKernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLYOLOLAYERKERNEL_H
#define ARM_COMPUTE_CLYOLOLAYERKERNEL_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h b/src/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h
index d182e386b8..4c92ae417f 100644
--- a/arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h
+++ b/src/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_ICLDEPTHWISECONVOLUTIONKERNEL3x3_H
#define ARM_COMPUTE_ICLDEPTHWISECONVOLUTIONKERNEL3x3_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/graph/backends/CL/CLFunctionsFactory.cpp b/src/graph/backends/CL/CLFunctionsFactory.cpp
index cd732553be..98013b9e49 100644
--- a/src/graph/backends/CL/CLFunctionsFactory.cpp
+++ b/src/graph/backends/CL/CLFunctionsFactory.cpp
@@ -28,6 +28,7 @@
#include "arm_compute/graph/backends/FunctionHelpers.h"
#include "arm_compute/runtime/CL/CLFunctions.h"
#include "arm_compute/runtime/CPP/CPPFunctions.h"
+#include "src/core/CL/CLKernels.h"
#include "support/Cast.h"
using namespace arm_compute::utils::cast;
diff --git a/src/graph/backends/CL/CLNodeValidator.cpp b/src/graph/backends/CL/CLNodeValidator.cpp
index 8c1fedd93f..830f54ce3f 100644
--- a/src/graph/backends/CL/CLNodeValidator.cpp
+++ b/src/graph/backends/CL/CLNodeValidator.cpp
@@ -28,6 +28,17 @@
#include "arm_compute/runtime/CL/CLFunctions.h"
#include "arm_compute/runtime/CPP/CPPFunctions.h"
+#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLIm2ColKernel.h"
+#include "src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h"
+#include "src/core/CL/kernels/CLWeightsReshapeKernel.h"
#include "support/Cast.h"
using namespace arm_compute::utils::cast;
diff --git a/src/runtime/CL/CLOperator.cpp b/src/runtime/CL/CLOperator.cpp
index 57a4d0ec57..075a544077 100644
--- a/src/runtime/CL/CLOperator.cpp
+++ b/src/runtime/CL/CLOperator.cpp
@@ -24,6 +24,8 @@
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/CL/ICLOperator.h"
+#include "src/core/CL/ICLKernel.h"
+
namespace arm_compute
{
namespace experimental
diff --git a/src/runtime/CL/CLScheduler.cpp b/src/runtime/CL/CLScheduler.cpp
index ccef5cbd1b..6fc7baed63 100644
--- a/src/runtime/CL/CLScheduler.cpp
+++ b/src/runtime/CL/CLScheduler.cpp
@@ -24,10 +24,10 @@
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/runtime/CL/CLHelpers.h"
#include "arm_compute/runtime/CL/CLTuner.h"
#include "arm_compute/runtime/CL/tuners/Tuners.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
diff --git a/src/runtime/CL/CLTuner.cpp b/src/runtime/CL/CLTuner.cpp
index adfe67fb11..ed85e606cf 100644
--- a/src/runtime/CL/CLTuner.cpp
+++ b/src/runtime/CL/CLTuner.cpp
@@ -24,9 +24,9 @@
#include "arm_compute/runtime/CL/CLTuner.h"
#include "arm_compute/runtime/CL/tuners/CLLWSList.h"
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/ICLKernel.h"
#include "support/StringSupport.h"
#include <cerrno>
diff --git a/src/runtime/CL/ICLSimpleFunction.cpp b/src/runtime/CL/ICLSimpleFunction.cpp
index b00ad5e74f..b075aa17e3 100644
--- a/src/runtime/CL/ICLSimpleFunction.cpp
+++ b/src/runtime/CL/ICLSimpleFunction.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,19 +26,24 @@
#include "arm_compute/core/Error.h"
#include "arm_compute/runtime/CL/CLHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/ICLKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "support/MemorySupport.h"
using namespace arm_compute;
ICLSimpleFunction::ICLSimpleFunction(CLRuntimeContext *ctx) // NOLINT
: _kernel(),
- _border_handler(),
+ _border_handler(support::cpp14::make_unique<CLFillBorderKernel>()),
_ctx(ctx)
{
}
+ICLSimpleFunction::~ICLSimpleFunction() = default;
+
void ICLSimpleFunction::run()
{
ARM_COMPUTE_ERROR_ON_MSG(!_kernel, "The child class didn't set the CL kernel or function isn't configured");
- schedule_kernel_on_ctx(_ctx, &_border_handler, false);
+ schedule_kernel_on_ctx(_ctx, _border_handler.get(), false);
schedule_kernel_on_ctx(_ctx, _kernel.get());
}
diff --git a/src/runtime/CL/functions/CLAbsoluteDifference.cpp b/src/runtime/CL/functions/CLAbsoluteDifference.cpp
index d5d1bbdd7a..b7f40a516c 100644
--- a/src/runtime/CL/functions/CLAbsoluteDifference.cpp
+++ b/src/runtime/CL/functions/CLAbsoluteDifference.cpp
@@ -23,7 +23,7 @@
*/
#include "arm_compute/runtime/CL/functions/CLAbsoluteDifference.h"
-#include "arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h"
+#include "src/core/CL/kernels/CLAbsoluteDifferenceKernel.h"
#include "support/MemorySupport.h"
#include <utility>
diff --git a/src/runtime/CL/functions/CLAccumulate.cpp b/src/runtime/CL/functions/CLAccumulate.cpp
index 2f06252446..742de64e34 100644
--- a/src/runtime/CL/functions/CLAccumulate.cpp
+++ b/src/runtime/CL/functions/CLAccumulate.cpp
@@ -23,7 +23,7 @@
*/
#include "arm_compute/runtime/CL/functions/CLAccumulate.h"
-#include "arm_compute/core/CL/kernels/CLAccumulateKernel.h"
+#include "src/core/CL/kernels/CLAccumulateKernel.h"
#include "support/MemorySupport.h"
#include <utility>
diff --git a/src/runtime/CL/functions/CLActivationLayer.cpp b/src/runtime/CL/functions/CLActivationLayer.cpp
index 5ddf227382..61c82b33eb 100644
--- a/src/runtime/CL/functions/CLActivationLayer.cpp
+++ b/src/runtime/CL/functions/CLActivationLayer.cpp
@@ -24,9 +24,9 @@
#include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLActivationLayerKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLRuntimeContext.h"
+#include "src/core/CL/kernels/CLActivationLayerKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
diff --git a/src/runtime/CL/functions/CLArgMinMaxLayer.cpp b/src/runtime/CL/functions/CLArgMinMaxLayer.cpp
index 57c4f685f6..5fc849e3c5 100644
--- a/src/runtime/CL/functions/CLArgMinMaxLayer.cpp
+++ b/src/runtime/CL/functions/CLArgMinMaxLayer.cpp
@@ -30,8 +30,10 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "src/core/CL/CLValidate.h"
+#include "src/core/CL/kernels/CLArgMinMaxLayerKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/runtime/Utils.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
@@ -40,6 +42,8 @@ CLArgMinMaxLayer::CLArgMinMaxLayer(std::shared_ptr<IMemoryManager> memory_manage
{
}
+CLArgMinMaxLayer::~CLArgMinMaxLayer() = default;
+
Status CLArgMinMaxLayer::validate(const ITensorInfo *input, int axis, const ITensorInfo *output, const ReductionOperation &op)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
@@ -124,13 +128,19 @@ void CLArgMinMaxLayer::configure(const CLCompileContext &compile_context, const
auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape).set_data_type(output_data_type).reset_padding().set_is_resizable(true));
// Configure reduction operation kernels
- _reduction_kernels_vector.resize(_num_of_stages);
+ _reduction_kernels_vector.reserve(_num_of_stages);
+
+ auto add_reduction_kernel = [this, &compile_context, axis, op](const ICLTensor * input, const ICLTensor * prev_output, ICLTensor * output)
+ {
+ _reduction_kernels_vector.emplace_back(support::cpp14::make_unique<CLArgMinMaxLayerKernel>());
+ _reduction_kernels_vector.back()->configure(compile_context, input, prev_output, output, axis, op);
+ };
_memory_group.manage(&_not_reshaped_output);
// Create temporary tensors
if(_num_of_stages == 1)
{
- _reduction_kernels_vector[0].configure(compile_context, input, nullptr, &_not_reshaped_output, axis, op);
+ add_reduction_kernel(input, nullptr, &_not_reshaped_output);
}
else
{
@@ -144,19 +154,19 @@ void CLArgMinMaxLayer::configure(const CLCompileContext &compile_context, const
// Apply ReductionOperation only on first kernel
_memory_group.manage(&_results_vector[0]);
- _reduction_kernels_vector[0].configure(compile_context, input, nullptr, &_results_vector[0], axis, op);
+ add_reduction_kernel(input, nullptr, &_results_vector[0]);
// Apply ReductionOperation on intermediate stages
for(unsigned int i = 1; i < _num_of_stages - 1; ++i)
{
_memory_group.manage(&_results_vector[i]);
- _reduction_kernels_vector[i].configure(compile_context, input, &_results_vector[i - 1], &_results_vector[i], axis, op);
+ add_reduction_kernel(input, &_results_vector[i - 1], &_results_vector[i]);
_results_vector[i - 1].allocator()->allocate();
}
// Apply ReductionOperation on the last stage
const unsigned int last_stage = _num_of_stages - 1;
- _reduction_kernels_vector[last_stage].configure(compile_context, input, &_results_vector[last_stage - 1], &_not_reshaped_output, axis, op);
+ add_reduction_kernel(input, &_results_vector[last_stage - 1], &_not_reshaped_output);
_results_vector[last_stage - 1].allocator()->allocate();
}
_reshape.configure(compile_context, &_not_reshaped_output, output);
@@ -169,7 +179,7 @@ void CLArgMinMaxLayer::run()
for(unsigned int i = 0; i < _num_of_stages; ++i)
{
- CLScheduler::get().enqueue(_reduction_kernels_vector[i], false);
+ CLScheduler::get().enqueue(*_reduction_kernels_vector[i], false);
}
_reshape.run();
}
diff --git a/src/runtime/CL/functions/CLBatchNormalizationLayer.cpp b/src/runtime/CL/functions/CLBatchNormalizationLayer.cpp
index 701add074e..77eed1140f 100644
--- a/src/runtime/CL/functions/CLBatchNormalizationLayer.cpp
+++ b/src/runtime/CL/functions/CLBatchNormalizationLayer.cpp
@@ -29,14 +29,19 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "support/MemorySupport.h"
-using namespace arm_compute;
+#include "src/core/CL/kernels/CLBatchNormalizationLayerKernel.h"
+namespace arm_compute
+{
CLBatchNormalizationLayer::CLBatchNormalizationLayer()
- : _norm_kernel()
+ : _norm_kernel(support::cpp14::make_unique<CLBatchNormalizationLayerKernel>())
{
}
+CLBatchNormalizationLayer::~CLBatchNormalizationLayer() = default;
+
void CLBatchNormalizationLayer::configure(ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta, const ICLTensor *gamma, float epsilon,
ActivationLayerInfo act_info)
{
@@ -47,7 +52,7 @@ void CLBatchNormalizationLayer::configure(const CLCompileContext &compile_contex
const ICLTensor *gamma, float epsilon,
ActivationLayerInfo act_info)
{
- _norm_kernel.configure(compile_context, input, output, mean, var, beta, gamma, epsilon, act_info);
+ _norm_kernel->configure(compile_context, input, output, mean, var, beta, gamma, epsilon, act_info);
}
Status CLBatchNormalizationLayer::validate(const ITensorInfo *input, const ITensorInfo *output,
@@ -60,5 +65,6 @@ Status CLBatchNormalizationLayer::validate(const ITensorInfo *input, const ITens
void CLBatchNormalizationLayer::run()
{
- CLScheduler::get().enqueue(_norm_kernel, true);
+ CLScheduler::get().enqueue(*_norm_kernel, true);
}
+} // namespace arm_compute \ No newline at end of file
diff --git a/src/runtime/CL/functions/CLBatchToSpaceLayer.cpp b/src/runtime/CL/functions/CLBatchToSpaceLayer.cpp
index 5ba3b5bc9c..e0a2c430ed 100644
--- a/src/runtime/CL/functions/CLBatchToSpaceLayer.cpp
+++ b/src/runtime/CL/functions/CLBatchToSpaceLayer.cpp
@@ -30,13 +30,18 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
-using namespace arm_compute;
+#include "src/core/CL/kernels/CLBatchToSpaceLayerKernel.h"
+#include "support/MemorySupport.h"
+namespace arm_compute
+{
CLBatchToSpaceLayer::CLBatchToSpaceLayer()
- : _batch_to_space_kernel()
+ : _batch_to_space_kernel(support::cpp14::make_unique<CLBatchToSpaceLayerKernel>())
{
}
+CLBatchToSpaceLayer::~CLBatchToSpaceLayer() = default;
+
void CLBatchToSpaceLayer::configure(const ICLTensor *input, const ICLTensor *block_shape, ICLTensor *output)
{
configure(CLKernelLibrary::get().get_compile_context(), input, block_shape, output);
@@ -44,7 +49,7 @@ void CLBatchToSpaceLayer::configure(const ICLTensor *input, const ICLTensor *blo
void CLBatchToSpaceLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *block_shape, ICLTensor *output)
{
- _batch_to_space_kernel.configure(compile_context, input, block_shape, output);
+ _batch_to_space_kernel->configure(compile_context, input, block_shape, output);
}
void CLBatchToSpaceLayer::configure(const ICLTensor *input, int32_t block_shape_x, int32_t block_shape_y, ICLTensor *output)
@@ -54,7 +59,7 @@ void CLBatchToSpaceLayer::configure(const ICLTensor *input, int32_t block_shape_
void CLBatchToSpaceLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, int32_t block_shape_x, int32_t block_shape_y, ICLTensor *output)
{
- _batch_to_space_kernel.configure(compile_context, input, block_shape_x, block_shape_y, output);
+ _batch_to_space_kernel->configure(compile_context, input, block_shape_x, block_shape_y, output);
}
Status CLBatchToSpaceLayer::validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *output)
@@ -69,5 +74,6 @@ Status CLBatchToSpaceLayer::validate(const ITensorInfo *input, int32_t block_sha
void CLBatchToSpaceLayer::run()
{
- CLScheduler::get().enqueue(_batch_to_space_kernel, true);
+ CLScheduler::get().enqueue(*_batch_to_space_kernel, true);
}
+} // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLBitwiseAnd.cpp b/src/runtime/CL/functions/CLBitwiseAnd.cpp
index cb49e61e84..cfcd63f170 100644
--- a/src/runtime/CL/functions/CLBitwiseAnd.cpp
+++ b/src/runtime/CL/functions/CLBitwiseAnd.cpp
@@ -23,7 +23,7 @@
*/
#include "arm_compute/runtime/CL/functions/CLBitwiseAnd.h"
-#include "arm_compute/core/CL/kernels/CLBitwiseAndKernel.h"
+#include "src/core/CL/kernels/CLBitwiseAndKernel.h"
#include "support/MemorySupport.h"
#include <utility>
diff --git a/src/runtime/CL/functions/CLBitwiseNot.cpp b/src/runtime/CL/functions/CLBitwiseNot.cpp
index 22c575ca8d..588c793f6a 100644
--- a/src/runtime/CL/functions/CLBitwiseNot.cpp
+++ b/src/runtime/CL/functions/CLBitwiseNot.cpp
@@ -23,7 +23,7 @@
*/
#include "arm_compute/runtime/CL/functions/CLBitwiseNot.h"
-#include "arm_compute/core/CL/kernels/CLBitwiseNotKernel.h"
+#include "src/core/CL/kernels/CLBitwiseNotKernel.h"
#include "support/MemorySupport.h"
#include <utility>
diff --git a/src/runtime/CL/functions/CLBitwiseOr.cpp b/src/runtime/CL/functions/CLBitwiseOr.cpp
index 4bbb8909fe..3a5de193a3 100644
--- a/src/runtime/CL/functions/CLBitwiseOr.cpp
+++ b/src/runtime/CL/functions/CLBitwiseOr.cpp
@@ -23,7 +23,7 @@
*/
#include "arm_compute/runtime/CL/functions/CLBitwiseOr.h"
-#include "arm_compute/core/CL/kernels/CLBitwiseOrKernel.h"
+#include "src/core/CL/kernels/CLBitwiseOrKernel.h"
#include "support/MemorySupport.h"
#include <utility>
diff --git a/src/runtime/CL/functions/CLBitwiseXor.cpp b/src/runtime/CL/functions/CLBitwiseXor.cpp
index bc37f6eaab..62aeaaa31f 100644
--- a/src/runtime/CL/functions/CLBitwiseXor.cpp
+++ b/src/runtime/CL/functions/CLBitwiseXor.cpp
@@ -23,7 +23,7 @@
*/
#include "arm_compute/runtime/CL/functions/CLBitwiseXor.h"
-#include "arm_compute/core/CL/kernels/CLBitwiseXorKernel.h"
+#include "src/core/CL/kernels/CLBitwiseXorKernel.h"
#include "support/MemorySupport.h"
#include <utility>
diff --git a/src/runtime/CL/functions/CLBoundingBoxTransform.cpp b/src/runtime/CL/functions/CLBoundingBoxTransform.cpp
index 2384fc4132..600d36290c 100644
--- a/src/runtime/CL/functions/CLBoundingBoxTransform.cpp
+++ b/src/runtime/CL/functions/CLBoundingBoxTransform.cpp
@@ -23,7 +23,7 @@
*/
#include "arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h"
-#include "arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h"
+#include "src/core/CL/kernels/CLBoundingBoxTransformKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
diff --git a/src/runtime/CL/functions/CLBox3x3.cpp b/src/runtime/CL/functions/CLBox3x3.cpp
index 0300899b59..be40f25055 100644
--- a/src/runtime/CL/functions/CLBox3x3.cpp
+++ b/src/runtime/CL/functions/CLBox3x3.cpp
@@ -23,8 +23,9 @@
*/
#include "arm_compute/runtime/CL/functions/CLBox3x3.h"
-#include "arm_compute/core/CL/kernels/CLBox3x3Kernel.h"
#include "arm_compute/core/PixelValue.h"
+#include "src/core/CL/kernels/CLBox3x3Kernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
#include "support/MemorySupport.h"
#include <utility>
@@ -41,5 +42,5 @@ void CLBox3x3::configure(const CLCompileContext &compile_context, ICLTensor *inp
auto k = arm_compute::support::cpp14::make_unique<CLBox3x3Kernel>();
k->configure(compile_context, input, output, border_mode == BorderMode::UNDEFINED);
_kernel = std::move(k);
- _border_handler.configure(compile_context, input, BorderSize(1), border_mode, PixelValue(constant_border_value));
+ _border_handler->configure(compile_context, input, BorderSize(1), border_mode, PixelValue(constant_border_value));
}
diff --git a/src/runtime/CL/functions/CLCannyEdge.cpp b/src/runtime/CL/functions/CLCannyEdge.cpp
index cd2d6b478a..5a32564d2d 100644
--- a/src/runtime/CL/functions/CLCannyEdge.cpp
+++ b/src/runtime/CL/functions/CLCannyEdge.cpp
@@ -31,6 +31,10 @@
#include "arm_compute/runtime/CL/functions/CLSobel3x3.h"
#include "arm_compute/runtime/CL/functions/CLSobel5x5.h"
#include "arm_compute/runtime/CL/functions/CLSobel7x7.h"
+#include "src/core/CL/kernels/CLCannyEdgeKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLSobel5x5Kernel.h"
+#include "src/core/CL/kernels/CLSobel7x7Kernel.h"
#include "support/MemorySupport.h"
using namespace arm_compute;
@@ -38,10 +42,10 @@ using namespace arm_compute;
CLCannyEdge::CLCannyEdge(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
: _memory_group(std::move(memory_manager)),
_sobel(),
- _gradient(),
- _border_mag_gradient(),
- _non_max_suppr(),
- _edge_trace(),
+ _gradient(support::cpp14::make_unique<CLGradientKernel>()),
+ _border_mag_gradient(support::cpp14::make_unique<CLFillBorderKernel>()),
+ _non_max_suppr(support::cpp14::make_unique<CLEdgeNonMaxSuppressionKernel>()),
+ _edge_trace(support::cpp14::make_unique<CLEdgeTraceKernel>()),
_gx(),
_gy(),
_mag(),
@@ -55,6 +59,8 @@ CLCannyEdge::CLCannyEdge(std::shared_ptr<IMemoryManager> memory_manager) // NOLI
{
}
+CLCannyEdge::~CLCannyEdge() = default;
+
void CLCannyEdge::configure(ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr, int32_t gradient_size, int32_t norm_type, BorderMode border_mode,
uint8_t constant_border_value)
{
@@ -143,7 +149,7 @@ void CLCannyEdge::configure(const CLCompileContext &compile_context, ICLTensor *
_memory_group.manage(&_phase);
// Configure gradient
- _gradient.configure(compile_context, &_gx, &_gy, &_mag, &_phase, norm_type);
+ _gradient->configure(compile_context, &_gx, &_gy, &_mag, &_phase, norm_type);
// Allocate intermediate buffers
_gx.allocator()->allocate();
@@ -153,14 +159,14 @@ void CLCannyEdge::configure(const CLCompileContext &compile_context, ICLTensor *
_memory_group.manage(&_nonmax);
// Configure non-maxima suppression
- _non_max_suppr.configure(compile_context, &_mag, &_phase, &_nonmax, lower_thr, border_mode == BorderMode::UNDEFINED);
+ _non_max_suppr->configure(compile_context, &_mag, &_phase, &_nonmax, lower_thr, border_mode == BorderMode::UNDEFINED);
// Allocate intermediate buffers
_phase.allocator()->allocate();
// Fill border around magnitude image as non-maxima suppression will access
// it. If border mode is undefined filling the border is a nop.
- _border_mag_gradient.configure(compile_context, &_mag, _non_max_suppr.border_size(), border_mode, constant_border_value);
+ _border_mag_gradient->configure(compile_context, &_mag, _non_max_suppr->border_size(), border_mode, constant_border_value);
// Allocate intermediate buffers
_mag.allocator()->allocate();
@@ -172,7 +178,7 @@ void CLCannyEdge::configure(const CLCompileContext &compile_context, ICLTensor *
_memory_group.manage(&_l1_list_counter);
// Configure edge tracing
- _edge_trace.configure(compile_context, &_nonmax, output, upper_thr, lower_thr, &_visited, &_recorded, &_l1_stack, &_l1_list_counter);
+ _edge_trace->configure(compile_context, &_nonmax, output, upper_thr, lower_thr, &_visited, &_recorded, &_l1_stack, &_l1_list_counter);
// Allocate intermediate buffers
_visited.allocator()->allocate();
@@ -190,14 +196,14 @@ void CLCannyEdge::run()
_sobel->run();
// Run phase and magnitude calculation
- CLScheduler::get().enqueue(_gradient, false);
+ CLScheduler::get().enqueue(*_gradient, false);
// Fill border before non-maxima suppression. Nop for border mode undefined.
- CLScheduler::get().enqueue(_border_mag_gradient, false);
+ CLScheduler::get().enqueue(*_border_mag_gradient, false);
// Run non max suppresion
_nonmax.clear(CLScheduler::get().queue());
- CLScheduler::get().enqueue(_non_max_suppr, false);
+ CLScheduler::get().enqueue(*_non_max_suppr, false);
// Clear temporary structures and run edge trace
_output->clear(CLScheduler::get().queue());
@@ -205,5 +211,5 @@ void CLCannyEdge::run()
_recorded.clear(CLScheduler::get().queue());
_l1_list_counter.clear(CLScheduler::get().queue());
_l1_stack.clear(CLScheduler::get().queue());
- CLScheduler::get().enqueue(_edge_trace, true);
+ CLScheduler::get().enqueue(*_edge_trace, true);
}
diff --git a/src/runtime/CL/functions/CLCast.cpp b/src/runtime/CL/functions/CLCast.cpp
index 95cc0e9239..2a28e06845 100644
--- a/src/runtime/CL/functions/CLCast.cpp
+++ b/src/runtime/CL/functions/CLCast.cpp
@@ -23,7 +23,7 @@
*/
#include "arm_compute/runtime/CL/functions/CLCast.h"
-#include "arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h"
+#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
#include "support/MemorySupport.h"
#include <utility>
diff --git a/src/runtime/CL/functions/CLChannelCombine.cpp b/src/runtime/CL/functions/CLChannelCombine.cpp
index 326caa8c74..e93aea31f4 100644
--- a/src/runtime/CL/functions/CLChannelCombine.cpp
+++ b/src/runtime/CL/functions/CLChannelCombine.cpp
@@ -23,7 +23,7 @@
*/
#include "arm_compute/runtime/CL/functions/CLChannelCombine.h"
-#include "arm_compute/core/CL/kernels/CLChannelCombineKernel.h"
+#include "src/core/CL/kernels/CLChannelCombineKernel.h"
#include "support/MemorySupport.h"
#include <utility>
diff --git a/src/runtime/CL/functions/CLChannelExtract.cpp b/src/runtime/CL/functions/CLChannelExtract.cpp
index aa37af9988..8b4a3f7458 100644
--- a/src/runtime/CL/functions/CLChannelExtract.cpp
+++ b/src/runtime/CL/functions/CLChannelExtract.cpp
@@ -23,7 +23,7 @@
*/
#include "arm_compute/runtime/CL/functions/CLChannelExtract.h"
-#include "arm_compute/core/CL/kernels/CLChannelExtractKernel.h"
+#include "src/core/CL/kernels/CLChannelExtractKernel.h"
#include "support/MemorySupport.h"
#include <utility>
diff --git a/src/runtime/CL/functions/CLChannelShuffleLayer.cpp b/src/runtime/CL/functions/CLChannelShuffleLayer.cpp
index b79afdb3b4..c443df3b37 100644
--- a/src/runtime/CL/functions/CLChannelShuffleLayer.cpp
+++ b/src/runtime/CL/functions/CLChannelShuffleLayer.cpp
@@ -23,8 +23,8 @@
*/
#include "arm_compute/runtime/CL/functions/CLChannelShuffleLayer.h"
-#include "arm_compute/core/CL/kernels/CLChannelShuffleLayerKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/kernels/CLChannelShuffleLayerKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
diff --git a/src/runtime/CL/functions/CLColorConvert.cpp b/src/runtime/CL/functions/CLColorConvert.cpp
index 2bbb30e24c..95f4257929 100644
--- a/src/runtime/CL/functions/CLColorConvert.cpp
+++ b/src/runtime/CL/functions/CLColorConvert.cpp
@@ -23,7 +23,7 @@
*/
#include "arm_compute/runtime/CL/functions/CLColorConvert.h"
-#include "arm_compute/core/CL/kernels/CLColorConvertKernel.h"
+#include "src/core/CL/kernels/CLColorConvertKernel.h"
#include "support/MemorySupport.h"
#include <utility>
diff --git a/src/runtime/CL/functions/CLComparison.cpp b/src/runtime/CL/functions/CLComparison.cpp
index 8c18b35583..9b5840aa95 100644
--- a/src/runtime/CL/functions/CLComparison.cpp
+++ b/src/runtime/CL/functions/CLComparison.cpp
@@ -24,8 +24,9 @@
#include "arm_compute/runtime/CL/functions/CLComparison.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLComparisonKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/kernels/CLComparisonKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
@@ -47,7 +48,7 @@ void CLComparison::configure(const CLCompileContext &compile_context, ICLTensor
if(broadcasted_info->info()->dimension(0) == 1)
{
- _border_handler.configure(compile_context, broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE);
+ _border_handler->configure(compile_context, broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE);
}
}
}
@@ -76,7 +77,7 @@ void CLComparisonStatic<COP>::configure(const CLCompileContext &compile_context,
if(broadcasted_info->info()->dimension(0) == 1)
{
- _border_handler.configure(compile_context, broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE);
+ _border_handler->configure(compile_context, broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE);
}
}
}
diff --git a/src/runtime/CL/functions/CLComputeAllAnchors.cpp b/src/runtime/CL/functions/CLComputeAllAnchors.cpp
index be86fc4f78..2cae0ee455 100644
--- a/src/runtime/CL/functions/CLComputeAllAnchors.cpp
+++ b/src/runtime/CL/functions/CLComputeAllAnchors.cpp
@@ -22,6 +22,7 @@
* SOFTWARE.
*/
#include "arm_compute/runtime/CL/functions/CLComputeAllAnchors.h"
+#include "src/core/CL/kernels/CLGenerateProposalsLayerKernel.h"
#include "support/MemorySupport.h"
diff --git a/src/runtime/CL/functions/CLConcatenateLayer.cpp b/src/runtime/CL/functions/CLConcatenateLayer.cpp
index 2eb310b893..54f71f9765 100644
--- a/src/runtime/CL/functions/CLConcatenateLayer.cpp
+++ b/src/runtime/CL/functions/CLConcatenateLayer.cpp
@@ -23,19 +23,19 @@
*/
#include "arm_compute/runtime/CL/functions/CLConcatenateLayer.h"
-#include "arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h"
-#include "arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h"
-#include "arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLDepthConcatenateLayerKernel.h"
+#include "src/core/CL/kernels/CLHeightConcatenateLayerKernel.h"
+#include "src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h"
+#include "src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h"
+#include "src/core/CL/kernels/CLWidthConcatenateLayerKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/kernels/CLBatchConcatenateLayerKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "support/MemorySupport.h"
diff --git a/src/runtime/CL/functions/CLConvertFullyConnectedWeights.cpp b/src/runtime/CL/functions/CLConvertFullyConnectedWeights.cpp
index b291ae5b88..8ecc114343 100644
--- a/src/runtime/CL/functions/CLConvertFullyConnectedWeights.cpp
+++ b/src/runtime/CL/functions/CLConvertFullyConnectedWeights.cpp
@@ -22,6 +22,8 @@
* SOFTWARE.
*/
#include "arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h"
+#include "src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
#include "support/MemorySupport.h"
diff --git a/src/runtime/CL/functions/CLConvolution.cpp b/src/runtime/CL/functions/CLConvolution.cpp
index bc962d0052..1ad32d309c 100644
--- a/src/runtime/CL/functions/CLConvolution.cpp
+++ b/src/runtime/CL/functions/CLConvolution.cpp
@@ -24,7 +24,6 @@
#include "arm_compute/runtime/CL/functions/CLConvolution.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLConvolutionKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/TensorInfo.h"
@@ -32,6 +31,8 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/ITensorAllocator.h"
+#include "src/core/CL/kernels/CLConvolutionKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
#include "support/MemorySupport.h"
#include <utility>
@@ -49,16 +50,21 @@ void CLConvolution3x3::configure(const CLCompileContext &compile_context, ICLTen
auto k = arm_compute::support::cpp14::make_unique<CLConvolution3x3Kernel>();
k->configure(compile_context, input, output, conv, scale, border_mode == BorderMode::UNDEFINED);
_kernel = std::move(k);
- _border_handler.configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
}
template <unsigned int matrix_size>
CLConvolutionSquare<matrix_size>::CLConvolutionSquare(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _tmp(), _is_separable(false), _kernel_hor(), _kernel_vert(), _kernel(), _border_handler()
+ : _memory_group(std::move(memory_manager)), _tmp(), _is_separable(false), _kernel_hor(support::cpp14::make_unique<CLSeparableConvolutionHorKernel<matrix_size>>()),
+ _kernel_vert(support::cpp14::make_unique<CLSeparableConvolutionVertKernel<matrix_size>>()), _kernel(support::cpp14::make_unique<CLConvolutionKernel<matrix_size>>()),
+ _border_handler(support::cpp14::make_unique<CLFillBorderKernel>())
{
}
template <unsigned int matrix_size>
+CLConvolutionSquare<matrix_size>::~CLConvolutionSquare() = default;
+
+template <unsigned int matrix_size>
void CLConvolutionSquare<matrix_size>::configure(ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode,
uint8_t constant_border_value)
{
@@ -88,35 +94,35 @@ void CLConvolutionSquare<matrix_size>::configure(const CLCompileContext &compile
scale = calculate_matrix_scale(conv, matrix_size);
}
- _kernel_hor.configure(compile_context, input, &_tmp, conv_row.data(), border_mode == BorderMode::UNDEFINED);
- _kernel_vert.configure(compile_context, &_tmp, output, conv_col.data(), scale, border_mode == BorderMode::UNDEFINED, type_pair.second);
- _border_handler.configure(compile_context, input, _kernel_hor.border_size(), border_mode, PixelValue(constant_border_value));
+ _kernel_hor->configure(compile_context, input, &_tmp, conv_row.data(), border_mode == BorderMode::UNDEFINED);
+ _kernel_vert->configure(compile_context, &_tmp, output, conv_col.data(), scale, border_mode == BorderMode::UNDEFINED, type_pair.second);
+ _border_handler->configure(compile_context, input, _kernel_hor->border_size(), border_mode, PixelValue(constant_border_value));
// Allocate intermediate buffer
_tmp.allocator()->allocate();
}
else
{
- _kernel.configure(compile_context, input, output, conv, scale, border_mode == BorderMode::UNDEFINED);
- _border_handler.configure(compile_context, input, _kernel.border_size(), border_mode, PixelValue(constant_border_value));
+ _kernel->configure(compile_context, input, output, conv, scale, border_mode == BorderMode::UNDEFINED);
+ _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
}
}
template <unsigned int matrix_size>
void CLConvolutionSquare<matrix_size>::run()
{
- CLScheduler::get().enqueue(_border_handler);
+ CLScheduler::get().enqueue(*_border_handler);
if(_is_separable)
{
MemoryGroupResourceScope scope_mg(_memory_group);
- CLScheduler::get().enqueue(_kernel_hor, false);
- CLScheduler::get().enqueue(_kernel_vert);
+ CLScheduler::get().enqueue(*_kernel_hor, false);
+ CLScheduler::get().enqueue(*_kernel_vert);
}
else
{
- CLScheduler::get().enqueue(_kernel);
+ CLScheduler::get().enqueue(*_kernel);
}
}
@@ -135,5 +141,5 @@ void CLConvolutionRectangle::configure(const CLCompileContext &compile_context,
auto k = arm_compute::support::cpp14::make_unique<CLConvolutionRectangleKernel>();
k->configure(compile_context, input, output, conv, rows, cols, scale, border_mode == BorderMode::UNDEFINED);
_kernel = std::move(k);
- _border_handler.configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
}
diff --git a/src/runtime/CL/functions/CLConvolutionLayer.cpp b/src/runtime/CL/functions/CLConvolutionLayer.cpp
index 85355f0f17..e214bdf0f2 100644
--- a/src/runtime/CL/functions/CLConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLConvolutionLayer.cpp
@@ -29,7 +29,6 @@
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
-
#include "support/MemorySupport.h"
#include <cmath>
@@ -45,6 +44,8 @@ CLConvolutionLayer::CLConvolutionLayer(std::shared_ptr<IMemoryManager> memory_ma
{
}
+CLConvolutionLayer::~CLConvolutionLayer() = default;
+
void CLConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info,
const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math, unsigned int num_groups)
{
diff --git a/src/runtime/CL/functions/CLCopy.cpp b/src/runtime/CL/functions/CLCopy.cpp
index acdc52d4f7..f7b016a779 100644
--- a/src/runtime/CL/functions/CLCopy.cpp
+++ b/src/runtime/CL/functions/CLCopy.cpp
@@ -24,11 +24,11 @@
#include "arm_compute/runtime/CL/functions/CLCopy.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLCopyKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/CL/kernels/CLCopyKernel.h"
#include "support/MemorySupport.h"
#include <utility>
diff --git a/src/runtime/CL/functions/CLCropResize.cpp b/src/runtime/CL/functions/CLCropResize.cpp
index 4cf9f13a67..4aaa674c5c 100644
--- a/src/runtime/CL/functions/CLCropResize.cpp
+++ b/src/runtime/CL/functions/CLCropResize.cpp
@@ -25,6 +25,10 @@
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLCopyKernel.h"
+#include "src/core/CL/kernels/CLCropKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLMemsetKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
@@ -61,6 +65,8 @@ CLCropResize::CLCropResize()
{
}
+CLCropResize::~CLCropResize() = default;
+
Status CLCropResize::validate(const ITensorInfo *input, ITensorInfo *boxes, ITensorInfo *box_ind, const ITensorInfo *output,
Coordinates2D crop_size, InterpolationPolicy method, float extrapolation_value)
{
diff --git a/src/runtime/CL/functions/CLDeconvolutionLayer.cpp b/src/runtime/CL/functions/CLDeconvolutionLayer.cpp
index e6717b6d01..6fe231ea6c 100644
--- a/src/runtime/CL/functions/CLDeconvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLDeconvolutionLayer.cpp
@@ -28,7 +28,6 @@
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
-
#include "support/MemorySupport.h"
#include <cmath>
diff --git a/src/runtime/CL/functions/CLDeconvolutionLayerUpsample.cpp b/src/runtime/CL/functions/CLDeconvolutionLayerUpsample.cpp
index eb1fb7fbdf..0cf2ea623f 100644
--- a/src/runtime/CL/functions/CLDeconvolutionLayerUpsample.cpp
+++ b/src/runtime/CL/functions/CLDeconvolutionLayerUpsample.cpp
@@ -27,16 +27,21 @@
#include "arm_compute/core/Utils.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/CL/CLTensor.h"
+#include "src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h"
+#include "src/core/CL/kernels/CLMemsetKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
CLDeconvolutionLayerUpsample::CLDeconvolutionLayerUpsample() // NOLINT
- : _upsample(),
- _memset(),
+ : _upsample(support::cpp14::make_unique<CLDeconvolutionLayerUpsampleKernel>()),
+ _memset(support::cpp14::make_unique<CLMemsetKernel>()),
_output(nullptr)
{
}
+CLDeconvolutionLayerUpsample::~CLDeconvolutionLayerUpsample() = default;
+
Status CLDeconvolutionLayerUpsample::validate(const ITensorInfo *input, const ITensorInfo *output, const PadStrideInfo &info)
{
return CLDeconvolutionLayerUpsampleKernel::validate(input, output, info);
@@ -52,13 +57,13 @@ void CLDeconvolutionLayerUpsample::configure(const CLCompileContext &compile_con
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
_output = output;
- _memset.configure(compile_context, _output, PixelValue(0, _output->info()->data_type(), _output->info()->quantization_info()));
- _upsample.configure(compile_context, input, _output, info);
+ _memset->configure(compile_context, _output, PixelValue(0, _output->info()->data_type(), _output->info()->quantization_info()));
+ _upsample->configure(compile_context, input, _output, info);
}
void CLDeconvolutionLayerUpsample::run()
{
- CLScheduler::get().enqueue(_memset, false);
- CLScheduler::get().enqueue(_upsample, true);
+ CLScheduler::get().enqueue(*_memset, false);
+ CLScheduler::get().enqueue(*_upsample, true);
}
} // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLDepthConvertLayer.cpp b/src/runtime/CL/functions/CLDepthConvertLayer.cpp
index 141eb3fefc..e58c0e5f4c 100644
--- a/src/runtime/CL/functions/CLDepthConvertLayer.cpp
+++ b/src/runtime/CL/functions/CLDepthConvertLayer.cpp
@@ -23,7 +23,7 @@
*/
#include "arm_compute/runtime/CL/functions/CLDepthConvertLayer.h"
-#include "arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h"
+#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
#include "support/MemorySupport.h"
#include <utility>
diff --git a/src/runtime/CL/functions/CLDepthToSpaceLayer.cpp b/src/runtime/CL/functions/CLDepthToSpaceLayer.cpp
index 8571056104..8dbd974ceb 100644
--- a/src/runtime/CL/functions/CLDepthToSpaceLayer.cpp
+++ b/src/runtime/CL/functions/CLDepthToSpaceLayer.cpp
@@ -23,7 +23,7 @@
*/
#include "arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h"
-#include "arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h"
+#include "src/core/CL/kernels/CLDepthToSpaceLayerKernel.h"
#include "support/MemorySupport.h"
#include <utility>
diff --git a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
index bb0db2e7a7..2440384e3b 100644
--- a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
@@ -24,13 +24,19 @@
#include "arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h"
+#include "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h"
+#include "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h"
+#include "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h"
+#include "src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h"
+#include "src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
@@ -119,7 +125,7 @@ Status validate_arguments_3x3(const ITensorInfo *input, const ITensorInfo *weigh
CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerGeneric::CLDepthwiseConvolutionLayerGeneric(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)),
- _dwc_native_kernel(),
+ _dwc_native_kernel(support::cpp14::make_unique<CLDepthwiseConvolutionLayerNativeKernel>()),
_permute_input_to_nhwc(),
_permute_weights_to_nhwc(),
_permute_output_to_nchw(),
@@ -137,6 +143,8 @@ CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerGeneric::CLDepthwiseConv
{
}
+CLDepthwiseConvolutionLayer::~CLDepthwiseConvolutionLayer() = default;
+
void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerGeneric::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
{
@@ -206,9 +214,9 @@ void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerGeneric::configure(
dwc_weights_info.n0 = (depth_multiplier == 1) ? 8 : 1;
DWCKernelInfo dwc_info;
dwc_info.activation_info = act_info;
- _dwc_native_kernel.configure(compile_context, input_to_use, weights_to_use, biases, output_to_use,
- dwc_weights_info, dwc_info, conv_info, depth_multiplier, dilation,
- output_multipliers_to_use, output_shifts_to_use);
+ _dwc_native_kernel->configure(compile_context, input_to_use, weights_to_use, biases, output_to_use,
+ dwc_weights_info, dwc_info, conv_info, depth_multiplier, dilation,
+ output_multipliers_to_use, output_shifts_to_use);
if(_needs_permute)
{
@@ -302,7 +310,7 @@ void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerGeneric::run()
{
_permute_input_to_nhwc.run();
}
- CLScheduler::get().enqueue(_dwc_native_kernel);
+ CLScheduler::get().enqueue(*_dwc_native_kernel);
if(_needs_permute)
{
_permute_output_to_nchw.run();
@@ -343,11 +351,11 @@ void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerGeneric::prepare()
CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerInternal3x3::CLDepthwiseConvolutionLayerInternal3x3(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)),
_kernel(nullptr),
- _border_handler(),
+ _border_handler(support::cpp14::make_unique<CLFillBorderKernel>()),
_permute_input_to_nchw(),
_permute_weights_to_nchw(),
_permute_output_to_nhwc(),
- _reshape_weights(),
+ _reshape_weights(support::cpp14::make_unique<CLDepthwiseConvolutionLayerReshapeWeightsKernel>()),
_permuted_input(),
_permuted_weights(),
_permuted_output(),
@@ -378,14 +386,14 @@ void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerInternal3x3::config
// Perform validation step
ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
ARM_COMPUTE_ERROR_THROW_ON(CLDepthwiseConvolutionLayerInternal3x3::validate(input->info(),
- weights->info(),
- biases != nullptr ? biases->info() : nullptr,
- output->info(),
- conv_info,
- depth_multiplier,
- act_info,
- gpu_target,
- dilation));
+ weights->info(),
+ biases != nullptr ? biases->info() : nullptr,
+ output->info(),
+ conv_info,
+ depth_multiplier,
+ act_info,
+ gpu_target,
+ dilation));
const bool is_nhwc = input->info()->data_layout() == DataLayout::NHWC;
_is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
@@ -434,7 +442,7 @@ void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerInternal3x3::config
{
if(_needs_weights_reshape)
{
- _reshape_weights.configure(compile_context, weights, &_permuted_weights, info);
+ _reshape_weights->configure(compile_context, weights, &_permuted_weights, info);
weights_to_use = &_permuted_weights;
}
_kernel = arm_compute::support::cpp14::make_unique<CLDepthwiseConvolutionLayer3x3NHWCKernel>();
@@ -486,7 +494,7 @@ void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerInternal3x3::config
{
zero_value = PixelValue(static_cast<uint8_t>(input->info()->quantization_info().uniform().offset));
}
- _border_handler.configure(compile_context, input_to_use, _kernel->border_size(), BorderMode::CONSTANT, zero_value);
+ _border_handler->configure(compile_context, input_to_use, _kernel->border_size(), BorderMode::CONSTANT, zero_value);
}
Status CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerInternal3x3::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
@@ -505,7 +513,7 @@ void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerInternal3x3::run()
{
_permute_input_to_nchw.run();
}
- CLScheduler::get().enqueue(_border_handler);
+ CLScheduler::get().enqueue(*_border_handler);
CLScheduler::get().enqueue(*_kernel);
if(_needs_permute)
@@ -547,7 +555,7 @@ void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerInternal3x3::prepar
ARM_COMPUTE_ERROR_ON(_needs_permute);
ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
_permuted_weights.allocator()->allocate();
- CLScheduler::get().enqueue(_reshape_weights);
+ CLScheduler::get().enqueue(*_reshape_weights);
_original_weights->mark_as_unused();
}
_is_prepared = true;
@@ -567,7 +575,7 @@ void CLDepthwiseConvolutionLayer::configure(ICLTensor *input, const ICLTensor *w
void CLDepthwiseConvolutionLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,
const PadStrideInfo &conv_info,
- unsigned int depth_multiplier,
+ unsigned int depth_multiplier,
ActivationLayerInfo act_info, const Size2D &dilation)
{
const GPUTarget gpu_target = CLScheduler::get().target();
diff --git a/src/runtime/CL/functions/CLDequantizationLayer.cpp b/src/runtime/CL/functions/CLDequantizationLayer.cpp
index 66ac58ef95..6d63463906 100644
--- a/src/runtime/CL/functions/CLDequantizationLayer.cpp
+++ b/src/runtime/CL/functions/CLDequantizationLayer.cpp
@@ -23,7 +23,7 @@
*/
#include "arm_compute/runtime/CL/functions/CLDequantizationLayer.h"
-#include "arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h"
+#include "src/core/CL/kernels/CLDequantizationLayerKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
diff --git a/src/runtime/CL/functions/CLDerivative.cpp b/src/runtime/CL/functions/CLDerivative.cpp
index 7138281f87..a2b883ad28 100644
--- a/src/runtime/CL/functions/CLDerivative.cpp
+++ b/src/runtime/CL/functions/CLDerivative.cpp
@@ -23,8 +23,9 @@
*/
#include "arm_compute/runtime/CL/functions/CLDerivative.h"
-#include "arm_compute/core/CL/kernels/CLDerivativeKernel.h"
#include "arm_compute/core/PixelValue.h"
+#include "src/core/CL/kernels/CLDerivativeKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
#include "support/MemorySupport.h"
#include <utility>
@@ -41,5 +42,5 @@ void CLDerivative::configure(const CLCompileContext &compile_context, ICLTensor
auto k = arm_compute::support::cpp14::make_unique<CLDerivativeKernel>();
k->configure(compile_context, input, output_x, output_y, border_mode == BorderMode::UNDEFINED);
_kernel = std::move(k);
- _border_handler.configure(compile_context, input, BorderSize(1), border_mode, PixelValue(constant_border_value));
+ _border_handler->configure(compile_context, input, BorderSize(1), border_mode, PixelValue(constant_border_value));
}
diff --git a/src/runtime/CL/functions/CLDilate.cpp b/src/runtime/CL/functions/CLDilate.cpp
index 27acf9f7cc..c3d5f8845f 100644
--- a/src/runtime/CL/functions/CLDilate.cpp
+++ b/src/runtime/CL/functions/CLDilate.cpp
@@ -23,8 +23,9 @@
*/
#include "arm_compute/runtime/CL/functions/CLDilate.h"
-#include "arm_compute/core/CL/kernels/CLDilateKernel.h"
#include "arm_compute/core/PixelValue.h"
+#include "src/core/CL/kernels/CLDilateKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
#include "support/MemorySupport.h"
#include <utility>
@@ -41,5 +42,5 @@ void CLDilate::configure(const CLCompileContext &compile_context, ICLTensor *inp
auto k = arm_compute::support::cpp14::make_unique<CLDilateKernel>();
k->configure(compile_context, input, output, border_mode == BorderMode::UNDEFINED);
_kernel = std::move(k);
- _border_handler.configure(compile_context, input, BorderSize(1), border_mode, PixelValue(constant_border_value));
+ _border_handler->configure(compile_context, input, BorderSize(1), border_mode, PixelValue(constant_border_value));
}
diff --git a/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp b/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp
index 07e7a18941..bff882c28b 100644
--- a/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp
@@ -24,19 +24,24 @@
#include "arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h"
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLDirectConvolutionLayerKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "support/MemorySupport.h"
using namespace arm_compute;
CLDirectConvolutionLayer::CLDirectConvolutionLayer()
- : _direct_conv_kernel(), _input_border_handler(), _activationlayer_function(), _is_activationlayer_enabled(false)
+ : _direct_conv_kernel(support::cpp14::make_unique<CLDirectConvolutionLayerKernel>()), _input_border_handler(support::cpp14::make_unique<CLFillBorderKernel>()), _activationlayer_function(),
+ _is_activationlayer_enabled(false)
{
}
+CLDirectConvolutionLayer::~CLDirectConvolutionLayer() = default;
+
void CLDirectConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info)
{
configure(CLKernelLibrary::get().get_compile_context(), input, weights, biases, output, conv_info, act_info);
@@ -47,10 +52,10 @@ void CLDirectConvolutionLayer::configure(const CLCompileContext &compile_context
const ActivationLayerInfo &act_info)
{
// Set GPU target
- _direct_conv_kernel.set_target(CLScheduler::get().target());
+ _direct_conv_kernel->set_target(CLScheduler::get().target());
// Configure direct convolution
- _direct_conv_kernel.configure(compile_context, input, weights, biases, output, conv_info);
+ _direct_conv_kernel->configure(compile_context, input, weights, biases, output, conv_info);
// Configure border handler
PixelValue &&zero_value(0.f);
@@ -58,10 +63,10 @@ void CLDirectConvolutionLayer::configure(const CLCompileContext &compile_context
{
zero_value = PixelValue(0, input->info()->data_type(), input->info()->quantization_info());
}
- _input_border_handler.configure(compile_context, input, _direct_conv_kernel.border_size(), BorderMode::CONSTANT, zero_value);
+ _input_border_handler->configure(compile_context, input, _direct_conv_kernel->border_size(), BorderMode::CONSTANT, zero_value);
// Tune kernels
- CLScheduler::get().tune_kernel_static(_direct_conv_kernel);
+ CLScheduler::get().tune_kernel_static(*_direct_conv_kernel);
_is_activationlayer_enabled = act_info.enabled();
@@ -86,10 +91,10 @@ Status CLDirectConvolutionLayer::validate(const ITensorInfo *input, const ITenso
void CLDirectConvolutionLayer::run()
{
// Run border handler
- CLScheduler::get().enqueue(_input_border_handler, false);
+ CLScheduler::get().enqueue(*_input_border_handler, false);
// Run direct convolution
- CLScheduler::get().enqueue(_direct_conv_kernel);
+ CLScheduler::get().enqueue(*_direct_conv_kernel);
//Run Activation Layer
if(_is_activationlayer_enabled)
diff --git a/src/runtime/CL/functions/CLDirectDeconvolutionLayer.cpp b/src/runtime/CL/functions/CLDirectDeconvolutionLayer.cpp
index 0ffafa0221..0e3109439e 100644
--- a/src/runtime/CL/functions/CLDirectDeconvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLDirectDeconvolutionLayer.cpp
@@ -23,11 +23,17 @@
*/
#include "arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLMemsetKernel.h"
+#include "src/core/CL/kernels/CLWeightsReshapeKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
#include <memory>
diff --git a/src/runtime/CL/functions/CLElementWiseUnaryLayer.cpp b/src/runtime/CL/functions/CLElementWiseUnaryLayer.cpp
index de94255b48..35ed97d381 100644
--- a/src/runtime/CL/functions/CLElementWiseUnaryLayer.cpp
+++ b/src/runtime/CL/functions/CLElementWiseUnaryLayer.cpp
@@ -23,7 +23,7 @@
*/
#include "arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h"
-#include "arm_compute/core/CL/kernels/CLElementWiseUnaryLayerKernel.h"
+#include "src/core/CL/kernels/CLElementWiseUnaryLayerKernel.h"
#include "support/MemorySupport.h"
#include <utility>
diff --git a/src/runtime/CL/functions/CLElementwiseOperations.cpp b/src/runtime/CL/functions/CLElementwiseOperations.cpp
index 7b4d3c629d..736cf973a1 100644
--- a/src/runtime/CL/functions/CLElementwiseOperations.cpp
+++ b/src/runtime/CL/functions/CLElementwiseOperations.cpp
@@ -24,8 +24,8 @@
#include "arm_compute/runtime/CL/functions/CLElementwiseOperations.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLElementwiseOperationKernel.h"
#include "support/MemorySupport.h"
#include <utility>
diff --git a/src/runtime/CL/functions/CLEqualizeHistogram.cpp b/src/runtime/CL/functions/CLEqualizeHistogram.cpp
index a1158a71a5..cc927a055b 100644
--- a/src/runtime/CL/functions/CLEqualizeHistogram.cpp
+++ b/src/runtime/CL/functions/CLEqualizeHistogram.cpp
@@ -28,6 +28,9 @@
#include "arm_compute/core/CL/OpenCL.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLHistogramKernel.h"
+#include "src/core/CL/kernels/CLTableLookupKernel.h"
+#include "support/MemorySupport.h"
#include <algorithm>
#include <cmath>
@@ -83,10 +86,17 @@ void calculate_cum_dist_and_lut(CLDistribution1D &dist, CLDistribution1D &cum_di
} // namespace
CLEqualizeHistogram::CLEqualizeHistogram()
- : _histogram_kernel(), _border_histogram_kernel(), _map_histogram_kernel(), _hist(nr_bins, 0, max_range), _cum_dist(nr_bins, 0, max_range), _cd_lut(nr_bins, DataType::U8)
+ : _histogram_kernel(support::cpp14::make_unique<CLHistogramKernel>()),
+ _border_histogram_kernel(support::cpp14::make_unique<CLHistogramBorderKernel>()),
+ _map_histogram_kernel(support::cpp14::make_unique<CLTableLookupKernel>()),
+ _hist(nr_bins, 0, max_range),
+ _cum_dist(nr_bins, 0, max_range),
+ _cd_lut(nr_bins, DataType::U8)
{
}
+CLEqualizeHistogram::~CLEqualizeHistogram() = default;
+
void CLEqualizeHistogram::configure(const ICLImage *input, ICLImage *output)
{
configure(CLKernelLibrary::get().get_compile_context(), input, output);
@@ -94,22 +104,22 @@ void CLEqualizeHistogram::configure(const ICLImage *input, ICLImage *output)
void CLEqualizeHistogram::configure(const CLCompileContext &compile_context, const ICLImage *input, ICLImage *output)
{
- _histogram_kernel.configure(compile_context, input, &_hist);
- _border_histogram_kernel.configure(compile_context, input, &_hist);
- _map_histogram_kernel.configure(compile_context, input, &_cd_lut, output);
+ _histogram_kernel->configure(compile_context, input, &_hist);
+ _border_histogram_kernel->configure(compile_context, input, &_hist);
+ _map_histogram_kernel->configure(compile_context, input, &_cd_lut, output);
}
void CLEqualizeHistogram::run()
{
// Calculate histogram of input.
- CLScheduler::get().enqueue(_histogram_kernel, false);
+ CLScheduler::get().enqueue(*_histogram_kernel, false);
// Calculate remaining pixels when image is not multiple of the elements of histogram kernel
- CLScheduler::get().enqueue(_border_histogram_kernel, false);
+ CLScheduler::get().enqueue(*_border_histogram_kernel, false);
// Calculate cumulative distribution of histogram and create LUT.
calculate_cum_dist_and_lut(_hist, _cum_dist, _cd_lut);
// Map input to output using created LUT.
- CLScheduler::get().enqueue(_map_histogram_kernel);
+ CLScheduler::get().enqueue(*_map_histogram_kernel);
}
diff --git a/src/runtime/CL/functions/CLErode.cpp b/src/runtime/CL/functions/CLErode.cpp
index 5236f620f1..6880c4845a 100644
--- a/src/runtime/CL/functions/CLErode.cpp
+++ b/src/runtime/CL/functions/CLErode.cpp
@@ -23,8 +23,9 @@
*/
#include "arm_compute/runtime/CL/functions/CLErode.h"
-#include "arm_compute/core/CL/kernels/CLErodeKernel.h"
#include "arm_compute/core/PixelValue.h"
+#include "src/core/CL/kernels/CLErodeKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
#include "support/MemorySupport.h"
#include <utility>
@@ -41,5 +42,5 @@ void CLErode::configure(const CLCompileContext &compile_context, ICLTensor *inpu
auto k = arm_compute::support::cpp14::make_unique<CLErodeKernel>();
k->configure(compile_context, input, output, border_mode == BorderMode::UNDEFINED);
_kernel = std::move(k);
- _border_handler.configure(compile_context, input, BorderSize(1), border_mode, PixelValue(constant_border_value));
+ _border_handler->configure(compile_context, input, BorderSize(1), border_mode, PixelValue(constant_border_value));
}
diff --git a/src/runtime/CL/functions/CLFFT1D.cpp b/src/runtime/CL/functions/CLFFT1D.cpp
index 1269cba90d..a0078689ff 100644
--- a/src/runtime/CL/functions/CLFFT1D.cpp
+++ b/src/runtime/CL/functions/CLFFT1D.cpp
@@ -26,15 +26,28 @@
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLFFTDigitReverseKernel.h"
+#include "src/core/CL/kernels/CLFFTRadixStageKernel.h"
+#include "src/core/CL/kernels/CLFFTScaleKernel.h"
#include "src/core/utils/helpers/fft.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
CLFFT1D::CLFFT1D(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _digit_reverse_kernel(), _fft_kernels(), _scale_kernel(), _digit_reversed_input(), _digit_reverse_indices(), _num_ffts(0), _run_scale(false)
+ : _memory_group(std::move(memory_manager)),
+ _digit_reverse_kernel(support::cpp14::make_unique<CLFFTDigitReverseKernel>()),
+ _fft_kernels(),
+ _scale_kernel(support::cpp14::make_unique<CLFFTScaleKernel>()),
+ _digit_reversed_input(),
+ _digit_reverse_indices(),
+ _num_ffts(0),
+ _run_scale(false)
{
}
+CLFFT1D::~CLFFT1D() = default;
+
void CLFFT1D::configure(const ICLTensor *input, ICLTensor *output, const FFT1DInfo &config)
{
configure(CLKernelLibrary::get().get_compile_context(), input, output, config);
@@ -62,12 +75,12 @@ void CLFFT1D::configure(const CLCompileContext &compile_context, const ICLTensor
TensorInfo digit_reverse_indices_info(TensorShape(input->info()->tensor_shape()[config.axis]), 1, DataType::U32);
_digit_reverse_indices.allocator()->init(digit_reverse_indices_info);
_memory_group.manage(&_digit_reversed_input);
- _digit_reverse_kernel.configure(compile_context, input, &_digit_reversed_input, &_digit_reverse_indices, digit_reverse_config);
+ _digit_reverse_kernel->configure(compile_context, input, &_digit_reversed_input, &_digit_reverse_indices, digit_reverse_config);
// Create and configure FFT kernels
unsigned int Nx = 1;
_num_ffts = decomposed_vector.size();
- _fft_kernels.resize(_num_ffts);
+ _fft_kernels.reserve(_num_ffts);
for(unsigned int i = 0; i < _num_ffts; ++i)
{
const unsigned int radix_for_stage = decomposed_vector.at(i);
@@ -77,7 +90,8 @@ void CLFFT1D::configure(const CLCompileContext &compile_context, const ICLTensor
fft_kernel_info.radix = radix_for_stage;
fft_kernel_info.Nx = Nx;
fft_kernel_info.is_first_stage = (i == 0);
- _fft_kernels[i].configure(compile_context, &_digit_reversed_input, ((i == (_num_ffts - 1)) && !is_c2r) ? output : nullptr, fft_kernel_info);
+ _fft_kernels.emplace_back(support::cpp14::make_unique<CLFFTRadixStageKernel>());
+ _fft_kernels.back()->configure(compile_context, &_digit_reversed_input, ((i == (_num_ffts - 1)) && !is_c2r) ? output : nullptr, fft_kernel_info);
Nx *= radix_for_stage;
}
@@ -88,7 +102,7 @@ void CLFFT1D::configure(const CLCompileContext &compile_context, const ICLTensor
FFTScaleKernelInfo scale_config;
scale_config.scale = static_cast<float>(N);
scale_config.conjugate = config.direction == FFTDirection::Inverse;
- is_c2r ? _scale_kernel.configure(compile_context, &_digit_reversed_input, output, scale_config) : _scale_kernel.configure(output, nullptr, scale_config);
+ is_c2r ? _scale_kernel->configure(compile_context, &_digit_reversed_input, output, scale_config) : _scale_kernel->configure(output, nullptr, scale_config);
}
// Allocate tensors
@@ -132,18 +146,18 @@ void CLFFT1D::run()
MemoryGroupResourceScope scope_mg(_memory_group);
// Run digit reverse
- CLScheduler::get().enqueue(_digit_reverse_kernel, false);
+ CLScheduler::get().enqueue(*_digit_reverse_kernel, false);
// Run radix kernels
for(unsigned int i = 0; i < _num_ffts; ++i)
{
- CLScheduler::get().enqueue(_fft_kernels[i], i == (_num_ffts - 1) && !_run_scale);
+ CLScheduler::get().enqueue(*_fft_kernels[i], i == (_num_ffts - 1) && !_run_scale);
}
// Run output scaling
if(_run_scale)
{
- CLScheduler::get().enqueue(_scale_kernel, true);
+ CLScheduler::get().enqueue(*_scale_kernel, true);
}
}
} // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLFFT2D.cpp b/src/runtime/CL/functions/CLFFT2D.cpp
index 7ab852fa98..1d444bb15d 100644
--- a/src/runtime/CL/functions/CLFFT2D.cpp
+++ b/src/runtime/CL/functions/CLFFT2D.cpp
@@ -26,6 +26,9 @@
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLFFTDigitReverseKernel.h"
+#include "src/core/CL/kernels/CLFFTRadixStageKernel.h"
+#include "src/core/CL/kernels/CLFFTScaleKernel.h"
namespace arm_compute
{
@@ -34,6 +37,8 @@ CLFFT2D::CLFFT2D(std::shared_ptr<IMemoryManager> memory_manager)
{
}
+CLFFT2D::~CLFFT2D() = default;
+
void CLFFT2D::configure(const ICLTensor *input, ICLTensor *output, const FFT2DInfo &config)
{
configure(CLKernelLibrary::get().get_compile_context(), input, output, config);
diff --git a/src/runtime/CL/functions/CLFFTConvolutionLayer.cpp b/src/runtime/CL/functions/CLFFTConvolutionLayer.cpp
index 4d0eab81ee..5472e8469f 100644
--- a/src/runtime/CL/functions/CLFFTConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLFFTConvolutionLayer.cpp
@@ -29,6 +29,13 @@
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/CPP/CPPScheduler.h"
+#include "src/core/CL/kernels/CLCopyKernel.h"
+#include "src/core/CL/kernels/CLFFTDigitReverseKernel.h"
+#include "src/core/CL/kernels/CLFFTRadixStageKernel.h"
+#include "src/core/CL/kernels/CLFFTScaleKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLPadLayerKernel.h"
+#include "src/core/CL/kernels/CLReductionOperationKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/utils/helpers/fft.h"
diff --git a/src/runtime/CL/functions/CLFastCorners.cpp b/src/runtime/CL/functions/CLFastCorners.cpp
index 97f853fdea..110d2c3639 100644
--- a/src/runtime/CL/functions/CLFastCorners.cpp
+++ b/src/runtime/CL/functions/CLFastCorners.cpp
@@ -24,12 +24,14 @@
#include "arm_compute/runtime/CL/functions/CLFastCorners.h"
#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/CL/kernels/CLFastCornersKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/ITensorAllocator.h"
+#include "src/core/CL/kernels/CLFastCornersKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "support/MemorySupport.h"
#include <algorithm>
#include <cstring>
@@ -38,9 +40,9 @@ using namespace arm_compute;
CLFastCorners::CLFastCorners(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)),
- _fast_corners_kernel(),
+ _fast_corners_kernel(support::cpp14::make_unique<CLFastCornersKernel>()),
_suppr_func(),
- _copy_array_kernel(),
+ _copy_array_kernel(support::cpp14::make_unique<CLCopyToArrayKernel>()),
_output(),
_suppr(),
_win(),
@@ -52,6 +54,8 @@ CLFastCorners::CLFastCorners(std::shared_ptr<IMemoryManager> memory_manager)
{
}
+CLFastCorners::~CLFastCorners() = default;
+
void CLFastCorners::configure(const ICLImage *input, float threshold, bool nonmax_suppression, ICLKeyPointArray *corners,
unsigned int *num_corners, BorderMode border_mode, uint8_t constant_border_value)
{
@@ -78,11 +82,11 @@ void CLFastCorners::configure(const CLCompileContext &compile_context, const ICL
const bool update_number = (nullptr != _num_corners);
_memory_group.manage(&_output);
- _fast_corners_kernel.configure(compile_context, input, &_output, threshold, nonmax_suppression, border_mode);
+ _fast_corners_kernel->configure(compile_context, input, &_output, threshold, nonmax_suppression, border_mode);
if(!_non_max)
{
- _copy_array_kernel.configure(compile_context, &_output, update_number, _corners, &_num_buffer);
+ _copy_array_kernel->configure(compile_context, &_output, update_number, _corners, &_num_buffer);
}
else
{
@@ -90,7 +94,7 @@ void CLFastCorners::configure(const CLCompileContext &compile_context, const ICL
_memory_group.manage(&_suppr);
_suppr_func.configure(compile_context, &_output, &_suppr, border_mode);
- _copy_array_kernel.configure(compile_context, &_suppr, update_number, _corners, &_num_buffer);
+ _copy_array_kernel->configure(compile_context, &_suppr, update_number, _corners, &_num_buffer);
_suppr.allocator()->allocate();
}
@@ -113,14 +117,14 @@ void CLFastCorners::run()
q.enqueueUnmapMemObject(_output.cl_buffer(), out_buffer);
}
- CLScheduler::get().enqueue(_fast_corners_kernel, false);
+ CLScheduler::get().enqueue(*_fast_corners_kernel, false);
if(_non_max)
{
_suppr_func.run();
}
- CLScheduler::get().enqueue(_copy_array_kernel, false);
+ CLScheduler::get().enqueue(*_copy_array_kernel, false);
unsigned int get_num_corners = 0;
q.enqueueReadBuffer(_num_buffer, CL_TRUE, 0, sizeof(unsigned int), &get_num_corners);
diff --git a/src/runtime/CL/functions/CLFill.cpp b/src/runtime/CL/functions/CLFill.cpp
index a89383ec31..855ed8380a 100644
--- a/src/runtime/CL/functions/CLFill.cpp
+++ b/src/runtime/CL/functions/CLFill.cpp
@@ -23,8 +23,8 @@
*/
#include "arm_compute/runtime/CL/functions/CLFill.h"
-#include "arm_compute/core/CL/kernels/CLMemsetKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/kernels/CLMemsetKernel.h"
#include "support/MemorySupport.h"
diff --git a/src/runtime/CL/functions/CLFillBorder.cpp b/src/runtime/CL/functions/CLFillBorder.cpp
index c647bb6a02..27d132b842 100644
--- a/src/runtime/CL/functions/CLFillBorder.cpp
+++ b/src/runtime/CL/functions/CLFillBorder.cpp
@@ -23,7 +23,7 @@
*/
#include "arm_compute/runtime/CL/functions/CLFillBorder.h"
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
#include "support/MemorySupport.h"
#include <utility>
diff --git a/src/runtime/CL/functions/CLFlattenLayer.cpp b/src/runtime/CL/functions/CLFlattenLayer.cpp
index a826541017..0646a0d3a0 100644
--- a/src/runtime/CL/functions/CLFlattenLayer.cpp
+++ b/src/runtime/CL/functions/CLFlattenLayer.cpp
@@ -23,8 +23,8 @@
*/
#include "arm_compute/runtime/CL/functions/CLFlattenLayer.h"
-#include "arm_compute/core/CL/kernels/CLFlattenLayerKernel.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLFlattenLayerKernel.h"
#include "support/MemorySupport.h"
using namespace arm_compute;
diff --git a/src/runtime/CL/functions/CLFloor.cpp b/src/runtime/CL/functions/CLFloor.cpp
index 7ed92ac3df..770e6a3781 100644
--- a/src/runtime/CL/functions/CLFloor.cpp
+++ b/src/runtime/CL/functions/CLFloor.cpp
@@ -23,7 +23,7 @@
*/
#include "arm_compute/runtime/CL/functions/CLFloor.h"
-#include "arm_compute/core/CL/kernels/CLFloorKernel.h"
+#include "src/core/CL/kernels/CLFloorKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
diff --git a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
index 75e87c382b..1796443ca5 100644
--- a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
+++ b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
@@ -28,6 +28,19 @@
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLTransposeKernel.h"
#include "support/Cast.h"
#include "support/MemorySupport.h"
diff --git a/src/runtime/CL/functions/CLFuseBatchNormalization.cpp b/src/runtime/CL/functions/CLFuseBatchNormalization.cpp
index 825267c0fc..f018e5a8ae 100644
--- a/src/runtime/CL/functions/CLFuseBatchNormalization.cpp
+++ b/src/runtime/CL/functions/CLFuseBatchNormalization.cpp
@@ -28,14 +28,18 @@
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLFuseBatchNormalizationKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
CLFuseBatchNormalization::CLFuseBatchNormalization()
- : _fuse_bn_kernel()
+ : _fuse_bn_kernel(support::cpp14::make_unique<CLFuseBatchNormalizationKernel>())
{
}
+CLFuseBatchNormalization::~CLFuseBatchNormalization() = default;
+
void CLFuseBatchNormalization::configure(const ICLTensor *input_weights, const ICLTensor *bn_mean, const ICLTensor *bn_var,
ICLTensor *fused_weights, ICLTensor *fused_bias,
const ICLTensor *input_bias, const ICLTensor *bn_beta, const ICLTensor *bn_gamma,
@@ -49,7 +53,7 @@ void CLFuseBatchNormalization::configure(const CLCompileContext &compile_context
const ICLTensor *input_bias, const ICLTensor *bn_beta, const ICLTensor *bn_gamma,
float epsilon, FuseBatchNormalizationType fbn_type)
{
- _fuse_bn_kernel.configure(compile_context, input_weights, bn_mean, bn_var, fused_weights, fused_bias, input_bias, bn_beta, bn_gamma, epsilon, fbn_type);
+ _fuse_bn_kernel->configure(compile_context, input_weights, bn_mean, bn_var, fused_weights, fused_bias, input_bias, bn_beta, bn_gamma, epsilon, fbn_type);
}
Status CLFuseBatchNormalization::validate(const ITensorInfo *input_weights, const ITensorInfo *bn_mean, const ITensorInfo *bn_var,
@@ -62,6 +66,6 @@ Status CLFuseBatchNormalization::validate(const ITensorInfo *input_weights, cons
void CLFuseBatchNormalization::run()
{
- CLScheduler::get().enqueue(_fuse_bn_kernel, true);
+ CLScheduler::get().enqueue(*_fuse_bn_kernel, true);
}
} // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLGEMM.cpp b/src/runtime/CL/functions/CLGEMM.cpp
index 80c5496ede..0151485849 100644
--- a/src/runtime/CL/functions/CLGEMM.cpp
+++ b/src/runtime/CL/functions/CLGEMM.cpp
@@ -23,6 +23,7 @@
*/
#include "arm_compute/runtime/CL/functions/CLGEMM.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/GPUTarget.h"
@@ -38,6 +39,11 @@
#include "src/core/CL/ICLGEMMKernelConfiguration.h"
#include "src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfiguration.h"
#include "src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/utils/helpers/float_ops.h"
#include "src/runtime/CL/gemm/CLGEMMKernelSelection.h"
@@ -51,16 +57,58 @@ using namespace arm_compute::misc::shape_calculator;
using namespace arm_compute::cl_gemm;
using namespace arm_compute::utils::cast;
+namespace weights_transformations
+{
+CLGEMMReshapeRHSMatrixKernelManaged::CLGEMMReshapeRHSMatrixKernelManaged()
+ : _kernel(support::cpp14::make_unique<CLGEMMReshapeRHSMatrixKernel>())
+{
+}
+
+CLGEMMReshapeRHSMatrixKernelManaged::~CLGEMMReshapeRHSMatrixKernelManaged() = default;
+
+void CLGEMMReshapeRHSMatrixKernelManaged::run()
+{
+ _output.allocator()->allocate();
+ CLScheduler::get().enqueue(*_kernel, false);
+ _reshape_run = true;
+}
+
+void CLGEMMReshapeRHSMatrixKernelManaged::release()
+{
+ _output.allocator()->free();
+}
+
+ICLTensor *CLGEMMReshapeRHSMatrixKernelManaged::get_weights()
+{
+ return &_output;
+}
+
+uint32_t CLGEMMReshapeRHSMatrixKernelManaged::uid()
+{
+ return _uid;
+}
+
+void CLGEMMReshapeRHSMatrixKernelManaged::configure(const ICLTensor *input, GEMMRHSMatrixInfo info)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, info);
+}
+
+void CLGEMMReshapeRHSMatrixKernelManaged::configure(const CLCompileContext &compile_context, const ICLTensor *input, GEMMRHSMatrixInfo info)
+{
+ _kernel->configure(compile_context, input, &_output, info);
+}
+} // namespace weights_transformations
+
CLGEMM::CLGEMM(std::shared_ptr<IMemoryManager> memory_manager, IWeightsManager *weights_manager)
: _memory_group(std::move(memory_manager)),
_weights_manager(weights_manager),
- _mm_kernel(),
- _reshape_lhs_kernel(),
- _reshape_rhs_kernel(),
- _reshape_rhs_kernel_managed(),
- _mm_reshaped_kernel(),
- _mm_reshaped_only_rhs_kernel(),
- _mm_reshaped_only_rhs_fallback_kernel(),
+ _mm_kernel(support::cpp14::make_unique<CLGEMMMatrixMultiplyKernel>()),
+ _reshape_lhs_kernel(support::cpp14::make_unique<CLGEMMReshapeLHSMatrixKernel>()),
+ _reshape_rhs_kernel(support::cpp14::make_unique<CLGEMMReshapeRHSMatrixKernel>()),
+ _reshape_rhs_kernel_managed(support::cpp14::make_unique<weights_transformations::CLGEMMReshapeRHSMatrixKernelManaged>()),
+ _mm_reshaped_kernel(support::cpp14::make_unique<CLGEMMMatrixMultiplyReshapedKernel>()),
+ _mm_reshaped_only_rhs_kernel(support::cpp14::make_unique<CLGEMMMatrixMultiplyReshapedOnlyRHSKernel>()),
+ _mm_reshaped_only_rhs_fallback_kernel(support::cpp14::make_unique<CLGEMMMatrixMultiplyReshapedOnlyRHSKernel>()),
_tmp_a(),
_tmp_b(),
_original_b(nullptr),
@@ -73,6 +121,8 @@ CLGEMM::CLGEMM(std::shared_ptr<IMemoryManager> memory_manager, IWeightsManager *
{
}
+CLGEMM::~CLGEMM() = default;
+
CLGEMMKernelType CLGEMM::select_gemm_kernel(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type, bool reshape_b_only_on_first_run)
{
std::unique_ptr<ICLGEMMKernelSelection> gemm_kernel = CLGEMMKernelSelectionFactory::create(CLScheduler::get().target());
@@ -98,15 +148,15 @@ void CLGEMM::configure_native_v1(const CLCompileContext &compile_context, const
const GPUTarget gpu_target = CLScheduler::get().target();
// Set the target for the kernels
- _mm_kernel.set_target(gpu_target);
+ _mm_kernel->set_target(gpu_target);
GEMMReshapeInfo reshape_info(m, n, k, 1, 1, gemm_info.depth_output_gemm3d(), gemm_info.reinterpret_input_as_3d(), gemm_info.broadcast_bias());
// Configure and tune matrix multiply kernel
- _mm_kernel.configure(compile_context, a, b, c, output, alpha, beta, false, reshape_info, gemm_info.fp_mixed_precision(), gemm_info.activation_info());
+ _mm_kernel->configure(compile_context, a, b, c, output, alpha, beta, false, reshape_info, gemm_info.fp_mixed_precision(), gemm_info.activation_info());
// Tune kernel statically
- CLScheduler::get().tune_kernel_static(_mm_kernel);
+ CLScheduler::get().tune_kernel_static(*_mm_kernel);
}
void CLGEMM::configure_reshaped_v1(const CLCompileContext &compile_context, const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta,
@@ -122,8 +172,8 @@ void CLGEMM::configure_reshaped_v1(const CLCompileContext &compile_context, cons
int mult_interleave4x4_height = 1;
// Set the target for the kernels
- _reshape_lhs_kernel.set_target(gpu_target);
- _mm_kernel.set_target(gpu_target);
+ _reshape_lhs_kernel->set_target(gpu_target);
+ _mm_kernel->set_target(gpu_target);
if(get_arch_from_target(gpu_target) == GPUTarget::BIFROST)
{
@@ -158,24 +208,24 @@ void CLGEMM::configure_reshaped_v1(const CLCompileContext &compile_context, cons
}
// Configure interleave kernel
- _reshape_lhs_kernel.configure(compile_context, a, &_tmp_a, lhs_info, reinterpret_input_as_3d);
+ _reshape_lhs_kernel->configure(compile_context, a, &_tmp_a, lhs_info, reinterpret_input_as_3d);
// Configure transpose kernel
ICLTensor *reshaped_rhs = &_tmp_b;
if(_weights_manager && _weights_manager->are_weights_managed(b))
{
- _reshape_rhs_kernel_managed.configure(compile_context, b, rhs_info);
- reshaped_rhs = utils::cast::polymorphic_downcast<ICLTensor *>(_weights_manager->acquire(b, &_reshape_rhs_kernel_managed));
+ _reshape_rhs_kernel_managed->configure(compile_context, b, rhs_info);
+ reshaped_rhs = utils::cast::polymorphic_downcast<ICLTensor *>(_weights_manager->acquire(b, _reshape_rhs_kernel_managed.get()));
}
else
{
- _reshape_rhs_kernel.configure(compile_context, b, &_tmp_b, rhs_info);
+ _reshape_rhs_kernel->configure(compile_context, b, &_tmp_b, rhs_info);
}
// Configure and tune matrix multiply kernel
- _mm_kernel.configure(compile_context, &_tmp_a, reshaped_rhs, c, output, alpha, beta, true, reshape_info, gemm_info.fp_mixed_precision(), gemm_info.activation_info());
+ _mm_kernel->configure(compile_context, &_tmp_a, reshaped_rhs, c, output, alpha, beta, true, reshape_info, gemm_info.fp_mixed_precision(), gemm_info.activation_info());
- CLScheduler::get().tune_kernel_static(_mm_kernel);
+ CLScheduler::get().tune_kernel_static(*_mm_kernel);
// Allocate intermediate tensors
_tmp_a.allocator()->allocate();
@@ -209,8 +259,8 @@ void CLGEMM::configure_reshaped_v2(const CLCompileContext &compile_context, cons
kernel_info.activation_info = gemm_info.activation_info();
// Set the target for the kernels
- _reshape_lhs_kernel.set_target(gpu_target);
- _mm_kernel.set_target(gpu_target);
+ _reshape_lhs_kernel->set_target(gpu_target);
+ _mm_kernel->set_target(gpu_target);
const bool use_mm_b = (!_weights_manager || !_weights_manager->are_weights_managed(b));
@@ -234,21 +284,21 @@ void CLGEMM::configure_reshaped_v2(const CLCompileContext &compile_context, cons
// Configure lhs_info and rhs_info
std::tie(lhs_info, rhs_info) = gemm_config->configure(m, n, k, batch_size, data_type);
- _reshape_lhs_kernel.configure(compile_context, a, &_tmp_a, lhs_info, gemm_info.reinterpret_input_as_3d());
+ _reshape_lhs_kernel->configure(compile_context, a, &_tmp_a, lhs_info, gemm_info.reinterpret_input_as_3d());
ICLTensor *reshaped_rhs = &_tmp_b;
if(_weights_manager && _weights_manager->are_weights_managed(b))
{
- _reshape_rhs_kernel_managed.configure(compile_context, b, rhs_info);
- reshaped_rhs = utils::cast::polymorphic_downcast<ICLTensor *>(_weights_manager->acquire(b, &_reshape_rhs_kernel_managed));
+ _reshape_rhs_kernel_managed->configure(compile_context, b, rhs_info);
+ reshaped_rhs = utils::cast::polymorphic_downcast<ICLTensor *>(_weights_manager->acquire(b, _reshape_rhs_kernel_managed.get()));
}
else
{
- _reshape_rhs_kernel.configure(compile_context, b, &_tmp_b, rhs_info);
+ _reshape_rhs_kernel->configure(compile_context, b, &_tmp_b, rhs_info);
}
// Configure and tune matrix multiply kernel
- _mm_reshaped_kernel.configure(compile_context, &_tmp_a, reshaped_rhs, c, output, alpha, beta, lhs_info, rhs_info, kernel_info);
+ _mm_reshaped_kernel->configure(compile_context, &_tmp_a, reshaped_rhs, c, output, alpha, beta, lhs_info, rhs_info, kernel_info);
// Allocate intermediate tensors
_tmp_a.allocator()->allocate();
@@ -282,7 +332,7 @@ void CLGEMM::configure_reshaped_only_rhs(const CLCompileContext &compile_context
kernel_info.activation_info = gemm_info.activation_info();
// Set the target for the kernels
- _mm_kernel.set_target(gpu_target);
+ _mm_kernel->set_target(gpu_target);
const bool use_mm_b = (!_weights_manager || !_weights_manager->are_weights_managed(b));
@@ -305,12 +355,12 @@ void CLGEMM::configure_reshaped_only_rhs(const CLCompileContext &compile_context
ICLTensor *reshaped_rhs = &_tmp_b;
if(_weights_manager && _weights_manager->are_weights_managed(b))
{
- _reshape_rhs_kernel_managed.configure(compile_context, b, rhs_info);
- reshaped_rhs = utils::cast::polymorphic_downcast<ICLTensor *>(_weights_manager->acquire(b, &_reshape_rhs_kernel_managed));
+ _reshape_rhs_kernel_managed->configure(compile_context, b, rhs_info);
+ reshaped_rhs = utils::cast::polymorphic_downcast<ICLTensor *>(_weights_manager->acquire(b, _reshape_rhs_kernel_managed.get()));
}
else
{
- _reshape_rhs_kernel.configure(compile_context, b, &_tmp_b, rhs_info);
+ _reshape_rhs_kernel->configure(compile_context, b, &_tmp_b, rhs_info);
}
// Configure two variants of CLGEMMMatrixMultiplyReshapedOnlyRHSKernel (has_pad_y = false/true)
@@ -319,11 +369,11 @@ void CLGEMM::configure_reshaped_only_rhs(const CLCompileContext &compile_context
// Configure matrix multiply kernel with no y padding support
kernel_info.has_pad_y = false;
- _mm_reshaped_only_rhs_kernel.configure(compile_context, a, reshaped_rhs, c, output, alpha, beta, lhs_info, rhs_info, kernel_info);
+ _mm_reshaped_only_rhs_kernel->configure(compile_context, a, reshaped_rhs, c, output, alpha, beta, lhs_info, rhs_info, kernel_info);
// Configure matrix multiply kernel with y padding support
kernel_info.has_pad_y = true;
- _mm_reshaped_only_rhs_fallback_kernel.configure(compile_context, a, reshaped_rhs, c, output, alpha, beta, lhs_info, rhs_info, kernel_info);
+ _mm_reshaped_only_rhs_fallback_kernel->configure(compile_context, a, reshaped_rhs, c, output, alpha, beta, lhs_info, rhs_info, kernel_info);
if(!_reshape_b_only_on_first_run && use_mm_b)
{
@@ -626,49 +676,49 @@ void CLGEMM::run()
{
case CLGEMMKernelType::NATIVE_V1:
{
- CLScheduler::get().enqueue(_mm_kernel, true);
+ CLScheduler::get().enqueue(*_mm_kernel, true);
break;
}
case CLGEMMKernelType::RESHAPED_V1:
{
// Run interleave kernel
- CLScheduler::get().enqueue(_reshape_lhs_kernel, false);
+ CLScheduler::get().enqueue(*_reshape_lhs_kernel, false);
if(!_reshape_b_only_on_first_run)
{
// Run transpose kernel
if(_weights_manager && _weights_manager->are_weights_managed(_original_b))
{
- _weights_manager->run(_original_b, &_reshape_rhs_kernel_managed);
+ _weights_manager->run(_original_b, _reshape_rhs_kernel_managed.get());
}
else
{
- CLScheduler::get().enqueue(_reshape_rhs_kernel, false);
+ CLScheduler::get().enqueue(*_reshape_rhs_kernel, false);
}
}
- CLScheduler::get().enqueue(_mm_kernel, true);
+ CLScheduler::get().enqueue(*_mm_kernel, true);
break;
}
case CLGEMMKernelType::RESHAPED:
{
// Run interleave kernel
- CLScheduler::get().enqueue(_reshape_lhs_kernel, false);
+ CLScheduler::get().enqueue(*_reshape_lhs_kernel, false);
if(!_reshape_b_only_on_first_run)
{
// Run transpose kernel
if(_weights_manager && _weights_manager->are_weights_managed(_original_b))
{
- _weights_manager->run(_original_b, &_reshape_rhs_kernel_managed);
+ _weights_manager->run(_original_b, _reshape_rhs_kernel_managed.get());
}
else
{
- CLScheduler::get().enqueue(_reshape_rhs_kernel, false);
+ CLScheduler::get().enqueue(*_reshape_rhs_kernel, false);
}
}
- CLScheduler::get().enqueue(_mm_reshaped_kernel, true);
+ CLScheduler::get().enqueue(*_mm_reshaped_kernel, true);
break;
}
case CLGEMMKernelType::RESHAPED_ONLY_RHS:
@@ -678,20 +728,20 @@ void CLGEMM::run()
// Run transpose kernel
if(_weights_manager && _weights_manager->are_weights_managed(_original_b))
{
- _weights_manager->run(_original_b, &_reshape_rhs_kernel_managed);
+ _weights_manager->run(_original_b, _reshape_rhs_kernel_managed.get());
}
else
{
- CLScheduler::get().enqueue(_reshape_rhs_kernel, false);
+ CLScheduler::get().enqueue(*_reshape_rhs_kernel, false);
}
}
if(_has_pad_y)
{
- CLScheduler::get().enqueue(_mm_reshaped_only_rhs_fallback_kernel, true);
+ CLScheduler::get().enqueue(*_mm_reshaped_only_rhs_fallback_kernel, true);
}
else
{
- CLScheduler::get().enqueue(_mm_reshaped_only_rhs_kernel, true);
+ CLScheduler::get().enqueue(*_mm_reshaped_only_rhs_kernel, true);
}
break;
}
@@ -720,13 +770,13 @@ void CLGEMM::prepare()
{
if(_weights_manager && _weights_manager->are_weights_managed(_original_b))
{
- _weights_manager->run(_original_b, &_reshape_rhs_kernel_managed);
+ _weights_manager->run(_original_b, _reshape_rhs_kernel_managed.get());
}
else
{
// Run transpose kernel and mark original weights tensor as unused
_tmp_b.allocator()->allocate();
- CLScheduler::get().enqueue(_reshape_rhs_kernel, false);
+ CLScheduler::get().enqueue(*_reshape_rhs_kernel, false);
_original_b->mark_as_unused();
}
}
diff --git a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
index e871b39805..4d26df5e43 100644
--- a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
@@ -30,8 +30,23 @@
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLCol2ImKernel.h"
+#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLIm2ColKernel.h"
+#include "src/core/CL/kernels/CLWeightsReshapeKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "support/Cast.h"
+#include "support/MemorySupport.h"
#include <cmath>
#include <memory>
@@ -43,10 +58,12 @@ using namespace arm_compute::misc::shape_calculator;
using namespace arm_compute::utils::cast;
CLConvolutionLayerReshapeWeights::CLConvolutionLayerReshapeWeights()
- : _weights_reshape_kernel()
+ : _weights_reshape_kernel(support::cpp14::make_unique<CLWeightsReshapeKernel>())
{
}
+CLConvolutionLayerReshapeWeights::~CLConvolutionLayerReshapeWeights() = default;
+
void CLConvolutionLayerReshapeWeights::configure(const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups)
{
configure(CLKernelLibrary::get().get_compile_context(), weights, biases, output, num_groups);
@@ -64,7 +81,7 @@ void CLConvolutionLayerReshapeWeights::configure(const CLCompileContext &compile
const bool append_biases = (biases != nullptr) && !is_data_type_quantized_asymmetric(weights->info()->data_type());
const ICLTensor *biases_to_use = (append_biases) ? biases : nullptr;
- _weights_reshape_kernel.configure(compile_context, weights, biases_to_use, output, num_groups);
+ _weights_reshape_kernel->configure(compile_context, weights, biases_to_use, output, num_groups);
output->info()->set_quantization_info(weights->info()->quantization_info());
}
@@ -96,16 +113,18 @@ Status CLConvolutionLayerReshapeWeights::validate(const ITensorInfo *weights, co
void CLConvolutionLayerReshapeWeights::run()
{
- CLScheduler::get().enqueue(_weights_reshape_kernel);
+ CLScheduler::get().enqueue(*_weights_reshape_kernel);
}
CLGEMMConvolutionLayer::CLGEMMConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager, IWeightsManager *weights_manager)
- : _memory_group(memory_manager), _weights_manager(weights_manager), _reshape_weights(), _reshape_weights_managed(), _im2col_kernel(), _mm_gemm(memory_manager, weights_manager),
- _mm_gemmlowp(memory_manager), _col2im_kernel(), _activationlayer_function(), _original_weights(nullptr), _im2col_output(), _weights_reshaped(), _gemm_output(), _skip_im2col(false),
- _skip_col2im(false), _is_quantized(false), _fuse_activation(true), _is_prepared(false)
+ : _memory_group(memory_manager), _weights_manager(weights_manager), _reshape_weights(), _reshape_weights_managed(), _im2col_kernel(support::cpp14::make_unique<CLIm2ColKernel>()),
+ _mm_gemm(memory_manager, weights_manager), _mm_gemmlowp(memory_manager), _col2im_kernel(support::cpp14::make_unique<CLCol2ImKernel>()), _activationlayer_function(), _original_weights(nullptr),
+ _im2col_output(), _weights_reshaped(), _gemm_output(), _skip_im2col(false), _skip_col2im(false), _is_quantized(false), _fuse_activation(true), _is_prepared(false)
{
}
+CLGEMMConvolutionLayer::~CLGEMMConvolutionLayer() = default;
+
void CLGEMMConvolutionLayer::configure_mm(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,
const GEMMLowpOutputStageInfo &gemmlowp_output_stage,
int gemm_3d_depth, const ActivationLayerInfo &act_info)
@@ -230,8 +249,8 @@ void CLGEMMConvolutionLayer::configure(const CLCompileContext &compile_context,
_fuse_activation = true;
// Set the GPU target for im2col and col2im
- _im2col_kernel.set_target(CLScheduler::get().target());
- _col2im_kernel.set_target(CLScheduler::get().target());
+ _im2col_kernel->set_target(CLScheduler::get().target());
+ _col2im_kernel->set_target(CLScheduler::get().target());
const ICLTensor *gemm_input_to_use = input;
ICLTensor *gemm_output_to_use = output;
@@ -293,11 +312,11 @@ void CLGEMMConvolutionLayer::configure(const CLCompileContext &compile_context,
_memory_group.manage(&_im2col_output);
// Configure and tune im2col. im2col output shape is auto-initialized
- _im2col_kernel.configure(compile_context, input, &_im2col_output, Size2D(kernel_width, kernel_height), conv_info, append_bias, dilation, num_groups);
+ _im2col_kernel->configure(compile_context, input, &_im2col_output, Size2D(kernel_width, kernel_height), conv_info, append_bias, dilation, num_groups);
// Set quantization info
_im2col_output.info()->set_quantization_info(input->info()->quantization_info());
- CLScheduler::get().tune_kernel_static(_im2col_kernel);
+ CLScheduler::get().tune_kernel_static(*_im2col_kernel);
// Update GEMM input
gemm_input_to_use = &_im2col_output;
@@ -390,8 +409,8 @@ void CLGEMMConvolutionLayer::configure(const CLCompileContext &compile_context,
if(!_skip_col2im)
{
// Configure and tune Col2Im
- _col2im_kernel.configure(compile_context, gemm_output_to_use, output, Size2D(conv_w, conv_h), num_groups);
- CLScheduler::get().tune_kernel_static(_col2im_kernel);
+ _col2im_kernel->configure(compile_context, gemm_output_to_use, output, Size2D(conv_w, conv_h), num_groups);
+ CLScheduler::get().tune_kernel_static(*_col2im_kernel.get());
}
if(!_skip_col2im)
@@ -611,7 +630,7 @@ void CLGEMMConvolutionLayer::run()
// Run im2col
if(!_skip_im2col)
{
- CLScheduler::get().enqueue(_im2col_kernel);
+ CLScheduler::get().enqueue(*_im2col_kernel);
}
// Runs CLGEMM or CLGEMMLowpMatrixMultiplyCore functions
@@ -629,7 +648,7 @@ void CLGEMMConvolutionLayer::run()
// Reshape output matrix
if(!_skip_col2im)
{
- CLScheduler::get().enqueue(_col2im_kernel, false);
+ CLScheduler::get().enqueue(*_col2im_kernel.get(), false);
}
//Run Activation Layer if we cannot fuse in GEMM
diff --git a/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp
index 5fc9c17bef..4d277f0982 100644
--- a/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp
@@ -28,8 +28,23 @@
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h"
+#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLIm2ColKernel.h"
+#include "src/core/CL/kernels/CLWeightsReshapeKernel.h"
+#include "support/MemorySupport.h"
-#include <memory>
#include <tuple>
namespace arm_compute
@@ -99,7 +114,7 @@ CLGEMMDeconvolutionLayer::CLGEMMDeconvolutionLayer(std::shared_ptr<IMemoryManage
_permute_weights_to_nhwc(),
_reshape_weights(),
_transpose_weights(),
- _deconv_reshape(),
+ _deconv_reshape(support::cpp14::make_unique<CLDeconvolutionReshapeOutputKernel>()),
_slice_gemm(),
_gemmlowp_final(),
_reshaped_weights(),
@@ -116,6 +131,8 @@ CLGEMMDeconvolutionLayer::CLGEMMDeconvolutionLayer(std::shared_ptr<IMemoryManage
{
}
+CLGEMMDeconvolutionLayer::~CLGEMMDeconvolutionLayer() = default;
+
Status CLGEMMDeconvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output, const PadStrideInfo &deconv_info)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
@@ -317,7 +334,7 @@ void CLGEMMDeconvolutionLayer::configure(const CLCompileContext &compile_context
}
// Configure a Col2Im call to reshape the output of GEMM
- _deconv_reshape.configure(compile_context, &_gemm_output, bias, deconv_reshape_output, input->info(), weights->info(), deconv_info);
+ _deconv_reshape->configure(compile_context, &_gemm_output, bias, deconv_reshape_output, input->info(), weights->info(), deconv_info);
_gemm_output.allocator()->allocate();
if(_is_quantized)
@@ -357,7 +374,7 @@ void CLGEMMDeconvolutionLayer::run()
_mm_gemm.run();
}
- CLScheduler::get().enqueue(_deconv_reshape, false);
+ CLScheduler::get().enqueue(*_deconv_reshape, false);
if(_is_quantized)
{
diff --git a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
index 7a8de6c1f5..d3d80a39e3 100644
--- a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
+++ b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
@@ -35,8 +35,16 @@
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "src/core/CL/gemm/native/CLGEMMNativeKernelConfiguration.h"
#include "src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h"
+#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/runtime/CL/gemm/CLGEMMKernelSelection.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
@@ -71,14 +79,14 @@ inline bool is_gemm_reshaped(unsigned int m, unsigned int n, unsigned int k, Dat
CLGEMMLowpMatrixMultiplyCore::CLGEMMLowpMatrixMultiplyCore(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)),
- _weights_to_qasymm8(),
- _mm_native_kernel(),
- _mm_reshaped_only_rhs_kernel(),
- _mtx_b_reshape_kernel(),
- _mtx_a_reduction_kernel(),
- _mtx_b_reduction_kernel(),
- _offset_contribution_kernel(),
- _offset_contribution_output_stage_kernel(),
+ _weights_to_qasymm8(support::cpp14::make_unique<CLDepthConvertLayerKernel>()),
+ _mm_native_kernel(support::cpp14::make_unique<CLGEMMLowpMatrixMultiplyNativeKernel>()),
+ _mm_reshaped_only_rhs_kernel(support::cpp14::make_unique<CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel>()),
+ _mtx_b_reshape_kernel(support::cpp14::make_unique<CLGEMMReshapeRHSMatrixKernel>()),
+ _mtx_a_reduction_kernel(support::cpp14::make_unique<CLGEMMLowpMatrixAReductionKernel>()),
+ _mtx_b_reduction_kernel(support::cpp14::make_unique<CLGEMMLowpMatrixBReductionKernel>()),
+ _offset_contribution_kernel(support::cpp14::make_unique<CLGEMMLowpOffsetContributionKernel>()),
+ _offset_contribution_output_stage_kernel(support::cpp14::make_unique<CLGEMMLowpOffsetContributionOutputStageKernel>()),
_qasymm8_weights(),
_vector_sum_col(),
_vector_sum_row(),
@@ -100,6 +108,8 @@ CLGEMMLowpMatrixMultiplyCore::CLGEMMLowpMatrixMultiplyCore(std::shared_ptr<IMemo
{
}
+CLGEMMLowpMatrixMultiplyCore::~CLGEMMLowpMatrixMultiplyCore() = default;
+
void CLGEMMLowpMatrixMultiplyCore::configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, const GEMMInfo &gemm_info)
{
configure(CLKernelLibrary::get().get_compile_context(), a, b, c, output, gemm_info);
@@ -125,8 +135,8 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const CLCompileContext &compile_con
const GPUTarget gpu_target = CLScheduler::get().target();
// Set the target for the kernels
- _mm_native_kernel.set_target(gpu_target);
- _mm_reshaped_only_rhs_kernel.set_target(gpu_target);
+ _mm_native_kernel->set_target(gpu_target);
+ _mm_reshaped_only_rhs_kernel->set_target(gpu_target);
GEMMRHSMatrixInfo rhs_info;
GEMMLHSMatrixInfo lhs_info;
@@ -150,7 +160,7 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const CLCompileContext &compile_con
TensorInfo weights_info(*b->info());
weights_info.set_data_type(DataType::QASYMM8);
_qasymm8_weights.allocator()->init(weights_info);
- _weights_to_qasymm8.configure(compile_context, b, &_qasymm8_weights, ConvertPolicy::WRAP, 0);
+ _weights_to_qasymm8->configure(compile_context, b, &_qasymm8_weights, ConvertPolicy::WRAP, 0);
}
const ICLTensor *matrix_b = _convert_to_qasymm8 ? &_qasymm8_weights : b;
@@ -168,7 +178,7 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const CLCompileContext &compile_con
std::tie(lhs_info, rhs_info) = CLGEMMReshapedOnlyRHSKernelConfigurationFactory::create(gpu_target)->configure(m, n, k, batch_size, DataType::QASYMM8);
// Configure reshape RHS kernel
- _mtx_b_reshape_kernel.configure(compile_context, _convert_to_qasymm8 ? &_qasymm8_weights : b, &_tmp_b, rhs_info);
+ _mtx_b_reshape_kernel->configure(compile_context, _convert_to_qasymm8 ? &_qasymm8_weights : b, &_tmp_b, rhs_info);
}
// Using default reduction info
@@ -185,7 +195,7 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const CLCompileContext &compile_con
}
// Configure Matrix B reduction kernel
- _mtx_b_reduction_kernel.configure(compile_context, _convert_to_qasymm8 ? &_qasymm8_weights : b, &_vector_sum_col, reduction_info);
+ _mtx_b_reduction_kernel->configure(compile_context, _convert_to_qasymm8 ? &_qasymm8_weights : b, &_vector_sum_col, reduction_info);
}
// Initialize Matrix A reduction kernel only if _b_offset is not equal to 0
@@ -196,7 +206,7 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const CLCompileContext &compile_con
_memory_group.manage(&_vector_sum_row);
// Configure matrix A reduction kernel
- _mtx_a_reduction_kernel.configure(compile_context, a, &_vector_sum_row, reduction_info);
+ _mtx_a_reduction_kernel->configure(compile_context, a, &_vector_sum_row, reduction_info);
}
GEMMKernelInfo gemm_kernel_info;
@@ -226,8 +236,8 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const CLCompileContext &compile_con
if(_is_gemm_reshaped && gemmlowp_output_stage.type == GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT)
{
// Configure and tune matrix multiply kernel with fused output stage
- _mm_reshaped_only_rhs_kernel.configure(compile_context, _matrix_a, matrix_b, output, gemm_kernel_info, _a_offset == 0 ? nullptr : &_vector_sum_col,
- _b_offset == 0 ? nullptr : &_vector_sum_row, c, &_gemm_output_stage_multipliers, &_gemm_output_stage_shifts);
+ _mm_reshaped_only_rhs_kernel->configure(compile_context, _matrix_a, matrix_b, output, gemm_kernel_info, _a_offset == 0 ? nullptr : &_vector_sum_col,
+ _b_offset == 0 ? nullptr : &_vector_sum_row, c, &_gemm_output_stage_multipliers, &_gemm_output_stage_shifts);
}
else
{
@@ -237,7 +247,7 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const CLCompileContext &compile_con
if(_is_gemm_reshaped)
{
- _mm_reshaped_only_rhs_kernel.configure(compile_context, _matrix_a, matrix_b, &_mm_result_s32, gemm_kernel_info);
+ _mm_reshaped_only_rhs_kernel->configure(compile_context, _matrix_a, matrix_b, &_mm_result_s32, gemm_kernel_info);
}
else
{
@@ -245,11 +255,11 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const CLCompileContext &compile_con
std::tie(lhs_info, rhs_info) = CLGEMMNativeKernelConfigurationFactory::create(gpu_target)->configure(m, n, k, batch_size, DataType::QASYMM8);
// Configure matrix multiply kernel
- _mm_native_kernel.configure(compile_context, _matrix_a, matrix_b, &_mm_result_s32, lhs_info, rhs_info, GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d));
+ _mm_native_kernel->configure(compile_context, _matrix_a, matrix_b, &_mm_result_s32, lhs_info, rhs_info, GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d));
- _offset_contribution_output_stage_kernel.configure(compile_context, &_mm_result_s32, _a_offset == 0 ? nullptr : &_vector_sum_col, _b_offset == 0 ? nullptr : &_vector_sum_row, c, output,
- a->info()->dimension(0),
- _a_offset, _b_offset, gemmlowp_output_stage, &_gemm_output_stage_multipliers, &_gemm_output_stage_shifts);
+ _offset_contribution_output_stage_kernel->configure(compile_context, &_mm_result_s32, _a_offset == 0 ? nullptr : &_vector_sum_col, _b_offset == 0 ? nullptr : &_vector_sum_row, c, output,
+ a->info()->dimension(0),
+ _a_offset, _b_offset, gemmlowp_output_stage, &_gemm_output_stage_multipliers, &_gemm_output_stage_shifts);
_mm_result_s32.allocator()->allocate();
}
}
@@ -270,7 +280,7 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const CLCompileContext &compile_con
if(_is_gemm_reshaped)
{
// Configure and tune matrix multiply kernel
- _mm_reshaped_only_rhs_kernel.configure(compile_context, _matrix_a, matrix_b, output, gemm_kernel_info);
+ _mm_reshaped_only_rhs_kernel->configure(compile_context, _matrix_a, matrix_b, output, gemm_kernel_info);
}
else
{
@@ -278,12 +288,12 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const CLCompileContext &compile_con
std::tie(lhs_info, rhs_info) = CLGEMMNativeKernelConfigurationFactory::create(gpu_target)->configure(m, n, k, batch_size, DataType::QASYMM8);
// Configure matrix multiply kernel
- _mm_native_kernel.configure(compile_context, _matrix_a, matrix_b, output, lhs_info, rhs_info, GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d));
+ _mm_native_kernel->configure(compile_context, _matrix_a, matrix_b, output, lhs_info, rhs_info, GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d));
}
// Configure offset contribution kernel
- _offset_contribution_kernel.configure(compile_context, output, _a_offset == 0 ? nullptr : &_vector_sum_col, _b_offset == 0 ? nullptr : &_vector_sum_row, c, a->info()->dimension(0), _a_offset,
- _b_offset);
+ _offset_contribution_kernel->configure(compile_context, output, _a_offset == 0 ? nullptr : &_vector_sum_col, _b_offset == 0 ? nullptr : &_vector_sum_row, c, a->info()->dimension(0), _a_offset,
+ _b_offset);
}
// Allocate tensors
@@ -489,40 +499,40 @@ void CLGEMMLowpMatrixMultiplyCore::run()
if(!_reshape_b_only_on_first_run)
{
// Run reshape matrix B
- CLScheduler::get().enqueue(_mtx_b_reshape_kernel, false);
+ CLScheduler::get().enqueue(*_mtx_b_reshape_kernel, false);
}
}
// Run matrix B reduction kernel only if _a_offset is not equal to 0
if(_a_offset != 0 && !_reshape_b_only_on_first_run)
{
- CLScheduler::get().enqueue(_mtx_b_reduction_kernel, false);
+ CLScheduler::get().enqueue(*_mtx_b_reduction_kernel, false);
}
// Run matrix A reduction kernel only if _b_offset is not equal to 0
if(_b_offset != 0)
{
- CLScheduler::get().enqueue(_mtx_a_reduction_kernel, false);
+ CLScheduler::get().enqueue(*_mtx_a_reduction_kernel, false);
}
// Run matrix multiply
if(_is_gemm_reshaped)
{
- CLScheduler::get().enqueue(_mm_reshaped_only_rhs_kernel, false);
+ CLScheduler::get().enqueue(*_mm_reshaped_only_rhs_kernel, false);
}
else
{
- CLScheduler::get().enqueue(_mm_native_kernel, false);
+ CLScheduler::get().enqueue(*_mm_native_kernel, false);
}
if(_run_output_stage)
{
// Run offset contribution/output stage kernel
- CLScheduler::get().enqueue(_offset_contribution_output_stage_kernel, true);
+ CLScheduler::get().enqueue(*_offset_contribution_output_stage_kernel, true);
}
if(_run_offset_contribution)
{
// Run offset contribution kernel
- CLScheduler::get().enqueue(_offset_contribution_kernel, true);
+ CLScheduler::get().enqueue(*_offset_contribution_kernel, true);
}
}
@@ -533,7 +543,7 @@ void CLGEMMLowpMatrixMultiplyCore::prepare()
if(_convert_to_qasymm8)
{
_qasymm8_weights.allocator()->allocate();
- CLScheduler::get().enqueue(_weights_to_qasymm8, false);
+ CLScheduler::get().enqueue(*_weights_to_qasymm8, false);
}
if(_is_gemm_reshaped && _reshape_b_only_on_first_run)
@@ -542,7 +552,7 @@ void CLGEMMLowpMatrixMultiplyCore::prepare()
// Run reshape kernel and mark original weights tensor as unused
_tmp_b.allocator()->allocate();
- CLScheduler::get().enqueue(_mtx_b_reshape_kernel, false);
+ CLScheduler::get().enqueue(*_mtx_b_reshape_kernel, false);
_original_b->mark_as_unused();
}
@@ -550,7 +560,7 @@ void CLGEMMLowpMatrixMultiplyCore::prepare()
if(_a_offset != 0 && _reshape_b_only_on_first_run)
{
_vector_sum_col.allocator()->allocate();
- CLScheduler::get().enqueue(_mtx_b_reduction_kernel, false);
+ CLScheduler::get().enqueue(*_mtx_b_reduction_kernel, false);
}
CLScheduler::get().queue().finish();
diff --git a/src/runtime/CL/functions/CLGEMMLowpOutputStage.cpp b/src/runtime/CL/functions/CLGEMMLowpOutputStage.cpp
index 28f397fd8b..f9c5247d2d 100644
--- a/src/runtime/CL/functions/CLGEMMLowpOutputStage.cpp
+++ b/src/runtime/CL/functions/CLGEMMLowpOutputStage.cpp
@@ -24,11 +24,14 @@
#include "arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h"
+#include "arm_compute/core/Types.h"
+#include "src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h"
#include "support/MemorySupport.h"
+#include <algorithm>
+
namespace arm_compute
{
void CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint::configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output,
diff --git a/src/runtime/CL/functions/CLGather.cpp b/src/runtime/CL/functions/CLGather.cpp
index d9b6679ebf..de6296f6a3 100644
--- a/src/runtime/CL/functions/CLGather.cpp
+++ b/src/runtime/CL/functions/CLGather.cpp
@@ -24,7 +24,7 @@
#include "arm_compute/runtime/CL/functions/CLGather.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLGatherKernel.h"
+#include "src/core/CL/kernels/CLGatherKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
diff --git a/src/runtime/CL/functions/CLGaussian3x3.cpp b/src/runtime/CL/functions/CLGaussian3x3.cpp
index c62e200315..97db9ba06d 100644
--- a/src/runtime/CL/functions/CLGaussian3x3.cpp
+++ b/src/runtime/CL/functions/CLGaussian3x3.cpp
@@ -23,8 +23,9 @@
*/
#include "arm_compute/runtime/CL/functions/CLGaussian3x3.h"
-#include "arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h"
#include "arm_compute/core/PixelValue.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLGaussian3x3Kernel.h"
#include "support/MemorySupport.h"
#include <utility>
@@ -41,5 +42,5 @@ void CLGaussian3x3::configure(const CLCompileContext &compile_context, ICLTensor
auto k = arm_compute::support::cpp14::make_unique<CLGaussian3x3Kernel>();
k->configure(compile_context, input, output, border_mode == BorderMode::UNDEFINED);
_kernel = std::move(k);
- _border_handler.configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
}
diff --git a/src/runtime/CL/functions/CLGaussian5x5.cpp b/src/runtime/CL/functions/CLGaussian5x5.cpp
index 1fe2fddfb6..f7470d4ecf 100644
--- a/src/runtime/CL/functions/CLGaussian5x5.cpp
+++ b/src/runtime/CL/functions/CLGaussian5x5.cpp
@@ -24,22 +24,30 @@
#include "arm_compute/runtime/CL/functions/CLGaussian5x5.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h"
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/ITensorAllocator.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLGaussian5x5Kernel.h"
+#include "support/MemorySupport.h"
#include <utility>
using namespace arm_compute;
CLGaussian5x5::CLGaussian5x5(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _kernel_hor(), _kernel_vert(), _border_handler(), _tmp()
+ : _memory_group(std::move(memory_manager)),
+ _kernel_hor(support::cpp14::make_unique<CLGaussian5x5HorKernel>()),
+ _kernel_vert(support::cpp14::make_unique<CLGaussian5x5VertKernel>()),
+ _border_handler(support::cpp14::make_unique<CLFillBorderKernel>()),
+ _tmp()
{
}
+CLGaussian5x5::~CLGaussian5x5() = default;
+
void CLGaussian5x5::configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value)
{
configure(CLKernelLibrary::get().get_compile_context(), input, output, border_mode, constant_border_value);
@@ -55,9 +63,9 @@ void CLGaussian5x5::configure(const CLCompileContext &compile_context, ICLTensor
_memory_group.manage(&_tmp);
// Configure kernels
- _kernel_hor.configure(compile_context, input, &_tmp, border_mode == BorderMode::UNDEFINED);
- _kernel_vert.configure(compile_context, &_tmp, output, border_mode == BorderMode::UNDEFINED);
- _border_handler.configure(compile_context, input, _kernel_hor.border_size(), border_mode, PixelValue(constant_border_value));
+ _kernel_hor->configure(compile_context, input, &_tmp, border_mode == BorderMode::UNDEFINED);
+ _kernel_vert->configure(compile_context, &_tmp, output, border_mode == BorderMode::UNDEFINED);
+ _border_handler->configure(compile_context, input, _kernel_hor->border_size(), border_mode, PixelValue(constant_border_value));
// Allocate intermediate buffers
_tmp.allocator()->allocate();
@@ -65,10 +73,10 @@ void CLGaussian5x5::configure(const CLCompileContext &compile_context, ICLTensor
void CLGaussian5x5::run()
{
- CLScheduler::get().enqueue(_border_handler, false);
+ CLScheduler::get().enqueue(*_border_handler, false);
MemoryGroupResourceScope scope_mg(_memory_group);
- CLScheduler::get().enqueue(_kernel_hor, false);
- CLScheduler::get().enqueue(_kernel_vert);
+ CLScheduler::get().enqueue(*_kernel_hor, false);
+ CLScheduler::get().enqueue(*_kernel_vert);
}
diff --git a/src/runtime/CL/functions/CLGaussianPyramid.cpp b/src/runtime/CL/functions/CLGaussianPyramid.cpp
index 297d535ba5..66b85352c1 100644
--- a/src/runtime/CL/functions/CLGaussianPyramid.cpp
+++ b/src/runtime/CL/functions/CLGaussianPyramid.cpp
@@ -24,19 +24,21 @@
#include "arm_compute/runtime/CL/functions/CLGaussianPyramid.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h"
-#include "arm_compute/core/CL/kernels/CLScaleKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
-
#include "arm_compute/runtime/CL/CLPyramid.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
#include "arm_compute/runtime/CL/functions/CLGaussian5x5.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLGaussian5x5Kernel.h"
+#include "src/core/CL/kernels/CLGaussianPyramidKernel.h"
+#include "src/core/CL/kernels/CLScaleKernel.h"
+#include "support/MemorySupport.h"
#include <cstddef>
@@ -47,6 +49,8 @@ CLGaussianPyramid::CLGaussianPyramid()
{
}
+CLGaussianPyramid::~CLGaussianPyramid() = default;
+
CLGaussianPyramidHalf::CLGaussianPyramidHalf() // NOLINT
: _horizontal_border_handler(),
_vertical_border_handler(),
@@ -55,6 +59,8 @@ CLGaussianPyramidHalf::CLGaussianPyramidHalf() // NOLINT
{
}
+CLGaussianPyramidHalf::~CLGaussianPyramidHalf() = default;
+
void CLGaussianPyramidHalf::configure(ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value)
{
configure(CLKernelLibrary::get().get_compile_context(), input, pyramid, border_mode, constant_border_value);
@@ -80,10 +86,10 @@ void CLGaussianPyramidHalf::configure(const CLCompileContext &compile_context, I
if(num_levels > 1)
{
- _horizontal_border_handler.resize(num_levels - 1);
- _vertical_border_handler.resize(num_levels - 1);
- _horizontal_reduction.resize(num_levels - 1);
- _vertical_reduction.resize(num_levels - 1);
+ _horizontal_border_handler.reserve(num_levels - 1);
+ _vertical_border_handler.reserve(num_levels - 1);
+ _horizontal_reduction.reserve(num_levels - 1);
+ _vertical_reduction.reserve(num_levels - 1);
// Apply half scale to the X dimension of the tensor shape
TensorShape tensor_shape = pyramid->info()->tensor_shape();
@@ -95,16 +101,20 @@ void CLGaussianPyramidHalf::configure(const CLCompileContext &compile_context, I
for(size_t i = 0; i < num_levels - 1; ++i)
{
/* Configure horizontal kernel */
- _horizontal_reduction[i].configure(compile_context, _pyramid->get_pyramid_level(i), _tmp.get_pyramid_level(i));
+ _horizontal_reduction.emplace_back(support::cpp14::make_unique<CLGaussianPyramidHorKernel>());
+ _horizontal_reduction.back()->configure(compile_context, _pyramid->get_pyramid_level(i), _tmp.get_pyramid_level(i));
/* Configure vertical kernel */
- _vertical_reduction[i].configure(compile_context, _tmp.get_pyramid_level(i), _pyramid->get_pyramid_level(i + 1));
+ _vertical_reduction.emplace_back(support::cpp14::make_unique<CLGaussianPyramidVertKernel>());
+ _vertical_reduction.back()->configure(compile_context, _tmp.get_pyramid_level(i), _pyramid->get_pyramid_level(i + 1));
/* Configure border */
- _horizontal_border_handler[i].configure(compile_context, _pyramid->get_pyramid_level(i), _horizontal_reduction[i].border_size(), border_mode, PixelValue(constant_border_value));
+ _horizontal_border_handler.emplace_back(support::cpp14::make_unique<CLFillBorderKernel>());
+ _horizontal_border_handler.back()->configure(compile_context, _pyramid->get_pyramid_level(i), _horizontal_reduction.back()->border_size(), border_mode, PixelValue(constant_border_value));
/* Configure border */
- _vertical_border_handler[i].configure(compile_context, _tmp.get_pyramid_level(i), _vertical_reduction[i].border_size(), border_mode, PixelValue(pixel_value_u16));
+ _vertical_border_handler.emplace_back(support::cpp14::make_unique<CLFillBorderKernel>());
+ _vertical_border_handler.back()->configure(compile_context, _tmp.get_pyramid_level(i), _vertical_reduction.back()->border_size(), border_mode, PixelValue(pixel_value_u16));
}
_tmp.allocate();
}
@@ -127,10 +137,10 @@ void CLGaussianPyramidHalf::run()
for(unsigned int i = 0; i < num_levels - 1; ++i)
{
- CLScheduler::get().enqueue(_horizontal_border_handler[i], false);
- CLScheduler::get().enqueue(_horizontal_reduction[i], false);
- CLScheduler::get().enqueue(_vertical_border_handler[i], false);
- CLScheduler::get().enqueue(_vertical_reduction[i], false);
+ CLScheduler::get().enqueue(*_horizontal_border_handler[i], false);
+ CLScheduler::get().enqueue(*_horizontal_reduction[i], false);
+ CLScheduler::get().enqueue(*_vertical_border_handler[i], false);
+ CLScheduler::get().enqueue(*_vertical_reduction[i], false);
}
}
@@ -163,7 +173,7 @@ void CLGaussianPyramidOrb::configure(const CLCompileContext &compile_context, IC
if(num_levels > 1)
{
_gauss5x5.resize(num_levels - 1);
- _scale_nearest.resize(num_levels - 1);
+ _scale_nearest.reserve(num_levels - 1);
PyramidInfo pyramid_info(num_levels - 1, SCALE_PYRAMID_ORB, pyramid->info()->tensor_shape(), Format::U8);
@@ -175,7 +185,8 @@ void CLGaussianPyramidOrb::configure(const CLCompileContext &compile_context, IC
_gauss5x5[i].configure(compile_context, _pyramid->get_pyramid_level(i), _tmp.get_pyramid_level(i), border_mode, constant_border_value);
/* Configure scale image kernel */
- _scale_nearest[i].configure(compile_context, _tmp.get_pyramid_level(i), _pyramid->get_pyramid_level(i + 1), ScaleKernelInfo{ InterpolationPolicy::NEAREST_NEIGHBOR, border_mode, PixelValue(), SamplingPolicy::CENTER });
+ _scale_nearest.emplace_back(support::cpp14::make_unique<CLScaleKernel>());
+ _scale_nearest.back()->configure(compile_context, _tmp.get_pyramid_level(i), _pyramid->get_pyramid_level(i + 1), ScaleKernelInfo{ InterpolationPolicy::NEAREST_NEIGHBOR, border_mode, PixelValue(), SamplingPolicy::CENTER });
}
_tmp.allocate();
@@ -199,6 +210,6 @@ void CLGaussianPyramidOrb::run()
for(unsigned int i = 0; i < num_levels - 1; ++i)
{
_gauss5x5[i].run();
- CLScheduler::get().enqueue(_scale_nearest[i]);
+ CLScheduler::get().enqueue(*_scale_nearest[i]);
}
}
diff --git a/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp b/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp
index 5291de074a..87bf39030a 100644
--- a/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp
+++ b/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp
@@ -25,22 +25,29 @@
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/kernels/CLBoundingBoxTransformKernel.h"
+#include "src/core/CL/kernels/CLDequantizationLayerKernel.h"
+#include "src/core/CL/kernels/CLGenerateProposalsLayerKernel.h"
+#include "src/core/CL/kernels/CLPadLayerKernel.h"
+#include "src/core/CL/kernels/CLPermuteKernel.h"
+#include "src/core/CL/kernels/CLQuantizationLayerKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
CLGenerateProposalsLayer::CLGenerateProposalsLayer(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(memory_manager),
- _permute_deltas_kernel(),
+ _permute_deltas_kernel(support::cpp14::make_unique<CLPermuteKernel>()),
_flatten_deltas(),
- _permute_scores_kernel(),
+ _permute_scores_kernel(support::cpp14::make_unique<CLPermuteKernel>()),
_flatten_scores(),
- _compute_anchors_kernel(),
- _bounding_box_kernel(),
- _pad_kernel(),
- _dequantize_anchors(),
- _dequantize_deltas(),
- _quantize_all_proposals(),
+ _compute_anchors_kernel(support::cpp14::make_unique<CLComputeAllAnchorsKernel>()),
+ _bounding_box_kernel(support::cpp14::make_unique<CLBoundingBoxTransformKernel>()),
+ _pad_kernel(support::cpp14::make_unique<CLPadLayerKernel>()),
+ _dequantize_anchors(support::cpp14::make_unique<CLDequantizationLayerKernel>()),
+ _dequantize_deltas(support::cpp14::make_unique<CLDequantizationLayerKernel>()),
+ _quantize_all_proposals(support::cpp14::make_unique<CLQuantizationLayerKernel>()),
_cpp_nms(memory_manager),
_is_nhwc(false),
_is_qasymm8(false),
@@ -62,6 +69,8 @@ CLGenerateProposalsLayer::CLGenerateProposalsLayer(std::shared_ptr<IMemoryManage
{
}
+CLGenerateProposalsLayer::~CLGenerateProposalsLayer() = default;
+
void CLGenerateProposalsLayer::configure(const ICLTensor *scores, const ICLTensor *deltas, const ICLTensor *anchors, ICLTensor *proposals, ICLTensor *scores_out, ICLTensor *num_valid_proposals,
const GenerateProposalsInfo &info)
{
@@ -92,7 +101,7 @@ void CLGenerateProposalsLayer::configure(const CLCompileContext &compile_context
// Compute all the anchors
_memory_group.manage(&_all_anchors);
- _compute_anchors_kernel.configure(compile_context, anchors, &_all_anchors, ComputeAnchorsInfo(feat_width, feat_height, info.spatial_scale()));
+ _compute_anchors_kernel->configure(compile_context, anchors, &_all_anchors, ComputeAnchorsInfo(feat_width, feat_height, info.spatial_scale()));
const TensorShape flatten_shape_deltas(values_per_roi, total_num_anchors);
_deltas_flattened.allocator()->init(TensorInfo(flatten_shape_deltas, 1, scores_data_type, deltas->info()->quantization_info()));
@@ -102,7 +111,7 @@ void CLGenerateProposalsLayer::configure(const CLCompileContext &compile_context
if(!_is_nhwc)
{
_memory_group.manage(&_deltas_permuted);
- _permute_deltas_kernel.configure(compile_context, deltas, &_deltas_permuted, PermutationVector{ 2, 0, 1 });
+ _permute_deltas_kernel->configure(compile_context, deltas, &_deltas_permuted, PermutationVector{ 2, 0, 1 });
_flatten_deltas.configure(compile_context, &_deltas_permuted, &_deltas_flattened);
_deltas_permuted.allocator()->allocate();
}
@@ -119,7 +128,7 @@ void CLGenerateProposalsLayer::configure(const CLCompileContext &compile_context
if(!_is_nhwc)
{
_memory_group.manage(&_scores_permuted);
- _permute_scores_kernel.configure(compile_context, scores, &_scores_permuted, PermutationVector{ 2, 0, 1 });
+ _permute_scores_kernel->configure(compile_context, scores, &_scores_permuted, PermutationVector{ 2, 0, 1 });
_flatten_scores.configure(compile_context, &_scores_permuted, &_scores_flattened);
_scores_permuted.allocator()->allocate();
}
@@ -137,18 +146,18 @@ void CLGenerateProposalsLayer::configure(const CLCompileContext &compile_context
_memory_group.manage(&_all_anchors_f32);
_memory_group.manage(&_deltas_flattened_f32);
// Dequantize anchors to float
- _dequantize_anchors.configure(compile_context, &_all_anchors, &_all_anchors_f32);
+ _dequantize_anchors->configure(compile_context, &_all_anchors, &_all_anchors_f32);
_all_anchors.allocator()->allocate();
anchors_to_use = &_all_anchors_f32;
// Dequantize deltas to float
- _dequantize_deltas.configure(compile_context, &_deltas_flattened, &_deltas_flattened_f32);
+ _dequantize_deltas->configure(compile_context, &_deltas_flattened, &_deltas_flattened_f32);
_deltas_flattened.allocator()->allocate();
deltas_to_use = &_deltas_flattened_f32;
}
// Bounding box transform
_memory_group.manage(&_all_proposals);
BoundingBoxTransformInfo bbox_info(info.im_width(), info.im_height(), 1.f);
- _bounding_box_kernel.configure(compile_context, anchors_to_use, &_all_proposals, deltas_to_use, bbox_info);
+ _bounding_box_kernel->configure(compile_context, anchors_to_use, &_all_proposals, deltas_to_use, bbox_info);
deltas_to_use->allocator()->allocate();
anchors_to_use->allocator()->allocate();
@@ -158,7 +167,7 @@ void CLGenerateProposalsLayer::configure(const CLCompileContext &compile_context
_memory_group.manage(&_all_proposals_quantized);
// Requantize all_proposals to QASYMM16 with 0.125 scale and 0 offset
_all_proposals_quantized.allocator()->init(TensorInfo(_all_proposals.info()->tensor_shape(), 1, DataType::QASYMM16, QuantizationInfo(0.125f, 0)));
- _quantize_all_proposals.configure(compile_context, &_all_proposals, &_all_proposals_quantized);
+ _quantize_all_proposals->configure(compile_context, &_all_proposals, &_all_proposals_quantized);
_all_proposals.allocator()->allocate();
_all_proposals_to_use = &_all_proposals_quantized;
}
@@ -193,7 +202,7 @@ void CLGenerateProposalsLayer::configure(const CLCompileContext &compile_context
_scores_flattened.allocator()->allocate();
// Add the first column that represents the batch id. This will be all zeros, as we don't support multiple images
- _pad_kernel.configure(compile_context, &_proposals_4_roi_values, proposals, PaddingList{ { 1, 0 } });
+ _pad_kernel->configure(compile_context, &_proposals_4_roi_values, proposals, PaddingList{ { 1, 0 } });
_proposals_4_roi_values.allocator()->allocate();
}
@@ -343,34 +352,34 @@ void CLGenerateProposalsLayer::run()
MemoryGroupResourceScope scope_mg(_memory_group);
// Compute all the anchors
- CLScheduler::get().enqueue(_compute_anchors_kernel, false);
+ CLScheduler::get().enqueue(*_compute_anchors_kernel, false);
// Transpose and reshape the inputs
if(!_is_nhwc)
{
- CLScheduler::get().enqueue(_permute_deltas_kernel, false);
- CLScheduler::get().enqueue(_permute_scores_kernel, false);
+ CLScheduler::get().enqueue(*_permute_deltas_kernel, false);
+ CLScheduler::get().enqueue(*_permute_scores_kernel, false);
}
_flatten_deltas.run();
_flatten_scores.run();
if(_is_qasymm8)
{
- CLScheduler::get().enqueue(_dequantize_anchors, false);
- CLScheduler::get().enqueue(_dequantize_deltas, false);
+ CLScheduler::get().enqueue(*_dequantize_anchors, false);
+ CLScheduler::get().enqueue(*_dequantize_deltas, false);
}
// Build the boxes
- CLScheduler::get().enqueue(_bounding_box_kernel, false);
+ CLScheduler::get().enqueue(*_bounding_box_kernel, false);
if(_is_qasymm8)
{
- CLScheduler::get().enqueue(_quantize_all_proposals, false);
+ CLScheduler::get().enqueue(*_quantize_all_proposals, false);
}
// Non maxima suppression
run_cpp_nms_kernel();
// Add dummy batch indexes
- CLScheduler::get().enqueue(_pad_kernel, true);
+ CLScheduler::get().enqueue(*_pad_kernel, true);
}
} // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLHOGDescriptor.cpp b/src/runtime/CL/functions/CLHOGDescriptor.cpp
index 21fa6690ea..80026532ab 100644
--- a/src/runtime/CL/functions/CLHOGDescriptor.cpp
+++ b/src/runtime/CL/functions/CLHOGDescriptor.cpp
@@ -28,14 +28,26 @@
#include "arm_compute/core/Size2D.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLHOGDescriptorKernel.h"
+#include "src/core/CL/kernels/CLMagnitudePhaseKernel.h"
+#include "support/MemorySupport.h"
using namespace arm_compute;
CLHOGDescriptor::CLHOGDescriptor(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _gradient(), _orient_bin(), _block_norm(), _mag(), _phase(), _hog_space()
+ : _memory_group(std::move(memory_manager)),
+ _gradient(),
+ _orient_bin(support::cpp14::make_unique<CLHOGOrientationBinningKernel>()),
+ _block_norm(support::cpp14::make_unique<CLHOGBlockNormalizationKernel>()),
+ _mag(),
+ _phase(),
+ _hog_space()
{
}
+CLHOGDescriptor::~CLHOGDescriptor() = default;
+
void CLHOGDescriptor::configure(ICLTensor *input, ICLTensor *output, const IHOG *hog, BorderMode border_mode, uint8_t constant_border_value)
{
configure(CLKernelLibrary::get().get_compile_context(), input, output, hog, border_mode, constant_border_value);
@@ -87,10 +99,10 @@ void CLHOGDescriptor::configure(const CLCompileContext &compile_context, ICLTens
_memory_group.manage(&_hog_space);
// Initialise orientation binning kernel
- _orient_bin.configure(compile_context, &_mag, &_phase, &_hog_space, hog->info());
+ _orient_bin->configure(compile_context, &_mag, &_phase, &_hog_space, hog->info());
// Initialize HOG norm kernel
- _block_norm.configure(compile_context, &_hog_space, output, hog->info());
+ _block_norm->configure(compile_context, &_hog_space, output, hog->info());
// Allocate intermediate tensors
_mag.allocator()->allocate();
@@ -106,8 +118,8 @@ void CLHOGDescriptor::run()
_gradient.run();
// Run orientation binning
- CLScheduler::get().enqueue(_orient_bin, false);
+ CLScheduler::get().enqueue(*_orient_bin, false);
// Run block normalization
- CLScheduler::get().enqueue(_block_norm);
+ CLScheduler::get().enqueue(*_block_norm);
} \ No newline at end of file
diff --git a/src/runtime/CL/functions/CLHOGDetector.cpp b/src/runtime/CL/functions/CLHOGDetector.cpp
index 9188f654dc..07ae8151c0 100644
--- a/src/runtime/CL/functions/CLHOGDetector.cpp
+++ b/src/runtime/CL/functions/CLHOGDetector.cpp
@@ -23,19 +23,22 @@
*/
#include "arm_compute/runtime/CL/functions/CLHOGDetector.h"
-#include "arm_compute/core/CL/kernels/CLHOGDetectorKernel.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLHOGDetectorKernel.h"
+#include "support/MemorySupport.h"
#include <algorithm>
using namespace arm_compute;
CLHOGDetector::CLHOGDetector()
- : _hog_detector_kernel(), _detection_windows(nullptr), _num_detection_windows()
+ : _hog_detector_kernel(support::cpp14::make_unique<CLHOGDetectorKernel>()), _detection_windows(nullptr), _num_detection_windows()
{
}
+CLHOGDetector::~CLHOGDetector() = default;
+
void CLHOGDetector::configure(const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, const Size2D &detection_window_stride, float threshold, size_t idx_class)
{
configure(CLKernelLibrary::get().get_compile_context(), input, hog, detection_windows, detection_window_stride, threshold, idx_class);
@@ -50,7 +53,7 @@ void CLHOGDetector::configure(const CLCompileContext &compile_context, const ICL
_num_detection_windows = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(unsigned int));
// Configure HOGDetectorKernel
- _hog_detector_kernel.configure(compile_context, input, hog, detection_windows, &_num_detection_windows, detection_window_stride, threshold, idx_class);
+ _hog_detector_kernel->configure(compile_context, input, hog, detection_windows, &_num_detection_windows, detection_window_stride, threshold, idx_class);
}
void CLHOGDetector::run()
@@ -62,7 +65,7 @@ void CLHOGDetector::run()
q.enqueueWriteBuffer(_num_detection_windows, CL_FALSE, 0, sizeof(unsigned int), &init_num_detection_windows);
// Run CLHOGDetectorKernel
- CLScheduler::get().enqueue(_hog_detector_kernel);
+ CLScheduler::get().enqueue(*_hog_detector_kernel);
// Read number of detections
unsigned int num_detection_windows = 0;
diff --git a/src/runtime/CL/functions/CLHOGGradient.cpp b/src/runtime/CL/functions/CLHOGGradient.cpp
index 934d1f6351..5f3b9cf529 100644
--- a/src/runtime/CL/functions/CLHOGGradient.cpp
+++ b/src/runtime/CL/functions/CLHOGGradient.cpp
@@ -26,11 +26,18 @@
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLMagnitudePhaseKernel.h"
+#include "support/MemorySupport.h"
using namespace arm_compute;
CLHOGGradient::CLHOGGradient(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _derivative(), _mag_phase(), _gx(), _gy()
+ : _memory_group(std::move(memory_manager)),
+ _derivative(),
+ _mag_phase(support::cpp14::make_unique<CLMagnitudePhaseKernel>()),
+ _gx(),
+ _gy()
{
}
@@ -63,11 +70,11 @@ void CLHOGGradient::configure(const CLCompileContext &compile_context, ICLTensor
// Initialise magnitude/phase kernel
if(PhaseType::UNSIGNED == phase_type)
{
- _mag_phase.configure(compile_context, &_gx, &_gy, output_magnitude, output_phase, MagnitudeType::L2NORM, PhaseType::UNSIGNED);
+ _mag_phase->configure(compile_context, &_gx, &_gy, output_magnitude, output_phase, MagnitudeType::L2NORM, PhaseType::UNSIGNED);
}
else
{
- _mag_phase.configure(compile_context, &_gx, &_gy, output_magnitude, output_phase, MagnitudeType::L2NORM, PhaseType::SIGNED);
+ _mag_phase->configure(compile_context, &_gx, &_gy, output_magnitude, output_phase, MagnitudeType::L2NORM, PhaseType::SIGNED);
}
// Allocate intermediate tensors
@@ -83,5 +90,5 @@ void CLHOGGradient::run()
_derivative.run();
// Run magnitude/phase kernel
- CLScheduler::get().enqueue(_mag_phase);
+ CLScheduler::get().enqueue(*_mag_phase);
} \ No newline at end of file
diff --git a/src/runtime/CL/functions/CLHOGMultiDetection.cpp b/src/runtime/CL/functions/CLHOGMultiDetection.cpp
index 51db43cd71..dfc90537cf 100644
--- a/src/runtime/CL/functions/CLHOGMultiDetection.cpp
+++ b/src/runtime/CL/functions/CLHOGMultiDetection.cpp
@@ -30,6 +30,11 @@
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/Scheduler.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLHOGDescriptorKernel.h"
+#include "src/core/CL/kernels/CLHOGDetectorKernel.h"
+#include "src/core/CL/kernels/CLMagnitudePhaseKernel.h"
+#include "support/MemorySupport.h"
using namespace arm_compute;
@@ -52,6 +57,8 @@ CLHOGMultiDetection::CLHOGMultiDetection(std::shared_ptr<IMemoryManager> memory_
{
}
+CLHOGMultiDetection::~CLHOGMultiDetection() = default;
+
void CLHOGMultiDetection::configure(ICLTensor *input, const ICLMultiHOG *multi_hog, ICLDetectionWindowArray *detection_windows, ICLSize2DArray *detection_window_strides, BorderMode border_mode,
uint8_t constant_border_value, float threshold, bool non_maxima_suppression, float min_distance)
{
@@ -135,8 +142,8 @@ void CLHOGMultiDetection::configure(const CLCompileContext &compile_context, ICL
_num_block_norm_kernel = input_block_norm.size(); // Number of CLHOGBlockNormalizationKernel kernels to compute
_num_hog_detect_kernel = input_hog_detect.size(); // Number of CLHOGDetector functions to compute
- _orient_bin_kernel.resize(_num_orient_bin_kernel);
- _block_norm_kernel.resize(_num_block_norm_kernel);
+ _orient_bin_kernel.reserve(_num_orient_bin_kernel);
+ _block_norm_kernel.reserve(_num_block_norm_kernel);
_hog_detect_kernel.resize(_num_hog_detect_kernel);
_hog_space.resize(_num_orient_bin_kernel);
_hog_norm_space.resize(_num_block_norm_kernel);
@@ -181,7 +188,8 @@ void CLHOGMultiDetection::configure(const CLCompileContext &compile_context, ICL
_memory_group.manage(&_hog_space[i]);
// Initialise orientation binning kernel
- _orient_bin_kernel[i].configure(compile_context, &_mag, &_phase, &_hog_space[i], multi_hog->model(idx_multi_hog)->info());
+ _orient_bin_kernel.emplace_back(support::cpp14::make_unique<CLHOGOrientationBinningKernel>());
+ _orient_bin_kernel.back()->configure(compile_context, &_mag, &_phase, &_hog_space[i], multi_hog->model(idx_multi_hog)->info());
}
// Allocate intermediate tensors
@@ -202,7 +210,8 @@ void CLHOGMultiDetection::configure(const CLCompileContext &compile_context, ICL
_memory_group.manage(&_hog_norm_space[i]);
// Initialize block normalization kernel
- _block_norm_kernel[i].configure(compile_context, &_hog_space[idx_orient_bin], &_hog_norm_space[i], multi_hog->model(idx_multi_hog)->info());
+ _block_norm_kernel.emplace_back(support::cpp14::make_unique<CLHOGBlockNormalizationKernel>());
+ _block_norm_kernel.back()->configure(compile_context, &_hog_space[idx_orient_bin], &_hog_norm_space[i], multi_hog->model(idx_multi_hog)->info());
}
// Allocate intermediate tensors
@@ -248,13 +257,13 @@ void CLHOGMultiDetection::run()
// Run orientation binning kernel
for(size_t i = 0; i < _num_orient_bin_kernel; ++i)
{
- CLScheduler::get().enqueue(_orient_bin_kernel[i], false);
+ CLScheduler::get().enqueue(*_orient_bin_kernel[i], false);
}
// Run block normalization kernel
for(size_t i = 0; i < _num_block_norm_kernel; ++i)
{
- CLScheduler::get().enqueue(_block_norm_kernel[i], false);
+ CLScheduler::get().enqueue(*_block_norm_kernel[i], false);
}
// Run HOG detector kernel
diff --git a/src/runtime/CL/functions/CLHarrisCorners.cpp b/src/runtime/CL/functions/CLHarrisCorners.cpp
index 45b93a5be0..9d8ebceb30 100644
--- a/src/runtime/CL/functions/CLHarrisCorners.cpp
+++ b/src/runtime/CL/functions/CLHarrisCorners.cpp
@@ -24,8 +24,6 @@
#include "arm_compute/runtime/CL/functions/CLHarrisCorners.h"
#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
-#include "arm_compute/core/CL/kernels/CLHarrisCornersKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
@@ -35,6 +33,10 @@
#include "arm_compute/runtime/CL/functions/CLSobel7x7.h"
#include "arm_compute/runtime/ITensorAllocator.h"
#include "arm_compute/runtime/Scheduler.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLHarrisCornersKernel.h"
+#include "src/core/CL/kernels/CLSobel5x5Kernel.h"
+#include "src/core/CL/kernels/CLSobel7x7Kernel.h"
#include "support/MemorySupport.h"
#include <cmath>
@@ -45,12 +47,12 @@ using namespace arm_compute;
CLHarrisCorners::CLHarrisCorners(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
: _memory_group(std::move(memory_manager)),
_sobel(nullptr),
- _harris_score(),
+ _harris_score(support::cpp14::make_unique<CLHarrisScoreKernel>()),
_non_max_suppr(),
_candidates(),
_sort_euclidean(),
- _border_gx(),
- _border_gy(),
+ _border_gx(support::cpp14::make_unique<CLFillBorderKernel>()),
+ _border_gy(support::cpp14::make_unique<CLFillBorderKernel>()),
_gx(),
_gy(),
_score(),
@@ -61,6 +63,8 @@ CLHarrisCorners::CLHarrisCorners(std::shared_ptr<IMemoryManager> memory_manager)
{
}
+CLHarrisCorners::~CLHarrisCorners() = default;
+
void CLHarrisCorners::configure(ICLImage *input, float threshold, float min_dist,
float sensitivity, int32_t gradient_size, int32_t block_size, ICLKeyPointArray *corners,
BorderMode border_mode, uint8_t constant_border_value, bool use_fp16)
@@ -133,11 +137,11 @@ void CLHarrisCorners::configure(const CLCompileContext &compile_context, ICLImag
_memory_group.manage(&_score);
// Set/init Harris Score kernel accordingly with block_size
- _harris_score.configure(compile_context, &_gx, &_gy, &_score, block_size, pow4_normalization_factor, threshold, sensitivity, border_mode == BorderMode::UNDEFINED);
+ _harris_score->configure(compile_context, &_gx, &_gy, &_score, block_size, pow4_normalization_factor, threshold, sensitivity, border_mode == BorderMode::UNDEFINED);
// Configure border filling using harris score kernel's block size
- _border_gx.configure(compile_context, &_gx, _harris_score.border_size(), border_mode, PixelValue(constant_border_value));
- _border_gy.configure(compile_context, &_gy, _harris_score.border_size(), border_mode, PixelValue(constant_border_value));
+ _border_gx->configure(compile_context, &_gx, _harris_score->border_size(), border_mode, PixelValue(constant_border_value));
+ _border_gy->configure(compile_context, &_gy, _harris_score->border_size(), border_mode, PixelValue(constant_border_value));
// Allocate intermediate buffers
_gx.allocator()->allocate();
@@ -175,11 +179,11 @@ void CLHarrisCorners::run()
_sobel->run();
// Fill border before harris score kernel
- CLScheduler::get().enqueue(_border_gx, false);
- CLScheduler::get().enqueue(_border_gy, false);
+ CLScheduler::get().enqueue(*_border_gx, false);
+ CLScheduler::get().enqueue(*_border_gy, false);
// Run harris score kernel
- CLScheduler::get().enqueue(_harris_score, false);
+ CLScheduler::get().enqueue(*_harris_score, false);
// Run non-maxima suppression
_non_max_suppr.run();
diff --git a/src/runtime/CL/functions/CLInstanceNormalizationLayer.cpp b/src/runtime/CL/functions/CLInstanceNormalizationLayer.cpp
index 4a60ee9d08..bd680f448d 100644
--- a/src/runtime/CL/functions/CLInstanceNormalizationLayer.cpp
+++ b/src/runtime/CL/functions/CLInstanceNormalizationLayer.cpp
@@ -23,9 +23,9 @@
*/
#include "arm_compute/runtime/CL/functions/CLInstanceNormalizationLayer.h"
-#include "arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernel.h"
#include "arm_compute/core/Types.h"
-
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLInstanceNormalizationLayerKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
diff --git a/src/runtime/CL/functions/CLIntegralImage.cpp b/src/runtime/CL/functions/CLIntegralImage.cpp
index 8561494242..41e47e77c7 100644
--- a/src/runtime/CL/functions/CLIntegralImage.cpp
+++ b/src/runtime/CL/functions/CLIntegralImage.cpp
@@ -23,16 +23,20 @@
*/
#include "arm_compute/runtime/CL/functions/CLIntegralImage.h"
-#include "arm_compute/core/CL/kernels/CLIntegralImageKernel.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLIntegralImageKernel.h"
+#include "support/MemorySupport.h"
using namespace arm_compute;
CLIntegralImage::CLIntegralImage()
- : _integral_hor(), _integral_vert()
+ : _integral_hor(support::cpp14::make_unique<CLIntegralImageHorKernel>()),
+ _integral_vert(support::cpp14::make_unique<CLIntegralImageVertKernel>())
{
}
+CLIntegralImage::~CLIntegralImage() = default;
+
void CLIntegralImage::configure(const ICLTensor *input, ICLTensor *output)
{
configure(CLKernelLibrary::get().get_compile_context(), input, output);
@@ -40,12 +44,12 @@ void CLIntegralImage::configure(const ICLTensor *input, ICLTensor *output)
void CLIntegralImage::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
{
- _integral_hor.configure(compile_context, input, output);
- _integral_vert.configure(compile_context, output);
+ _integral_hor->configure(compile_context, input, output);
+ _integral_vert->configure(compile_context, output);
}
void CLIntegralImage::run()
{
- CLScheduler::get().enqueue(_integral_hor, false);
- CLScheduler::get().enqueue(_integral_vert);
+ CLScheduler::get().enqueue(*_integral_hor, false);
+ CLScheduler::get().enqueue(*_integral_vert);
}
diff --git a/src/runtime/CL/functions/CLL2NormalizeLayer.cpp b/src/runtime/CL/functions/CLL2NormalizeLayer.cpp
index 66191d1799..64aac269cd 100644
--- a/src/runtime/CL/functions/CLL2NormalizeLayer.cpp
+++ b/src/runtime/CL/functions/CLL2NormalizeLayer.cpp
@@ -24,12 +24,15 @@
#include "arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLL2NormalizeLayerKernel.h"
+#include "src/core/CL/kernels/CLReductionOperationKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
@@ -39,10 +42,15 @@ constexpr int max_input_tensor_dim = 3;
} // namespace
CLL2NormalizeLayer::CLL2NormalizeLayer(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _reduce_func(), _normalize_kernel(), _sumsq()
+ : _memory_group(std::move(memory_manager)),
+ _reduce_func(),
+ _normalize_kernel(support::cpp14::make_unique<CLL2NormalizeLayerKernel>()),
+ _sumsq()
{
}
+CLL2NormalizeLayer::~CLL2NormalizeLayer() = default;
+
void CLL2NormalizeLayer::configure(ICLTensor *input, ICLTensor *output, int axis, float epsilon)
{
configure(CLKernelLibrary::get().get_compile_context(), input, output, axis, epsilon);
@@ -59,7 +67,7 @@ void CLL2NormalizeLayer::configure(const CLCompileContext &compile_context, ICLT
// Configure kernels
const uint32_t actual_axis = wrap_around(axis, max_input_tensor_dim);
_reduce_func.configure(compile_context, input, &_sumsq, actual_axis, ReductionOperation::SUM_SQUARE);
- _normalize_kernel.configure(compile_context, input, &_sumsq, output, axis, epsilon);
+ _normalize_kernel->configure(compile_context, input, &_sumsq, output, axis, epsilon);
// Allocate intermediate tensor
_sumsq.allocator()->allocate();
@@ -91,6 +99,6 @@ void CLL2NormalizeLayer::run()
MemoryGroupResourceScope scope_mg(_memory_group);
_reduce_func.run();
- CLScheduler::get().enqueue(_normalize_kernel, true);
+ CLScheduler::get().enqueue(*_normalize_kernel, true);
}
} // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLLSTMLayer.cpp b/src/runtime/CL/functions/CLLSTMLayer.cpp
index 058b6027c2..b095c06535 100644
--- a/src/runtime/CL/functions/CLLSTMLayer.cpp
+++ b/src/runtime/CL/functions/CLLSTMLayer.cpp
@@ -29,6 +29,22 @@
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLCopyKernel.h"
+#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLMemsetKernel.h"
+#include "src/core/CL/kernels/CLTransposeKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
@@ -37,20 +53,23 @@ using namespace arm_compute::utils::info_helpers;
CLLSTMLayer::CLLSTMLayer(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)), _fully_connected_input_gate(), _accum_input_gate1(), _subtract_input_gate(), _pixelwise_mul_input_gate(), _activation_input_gate(),
- _fully_connected_forget_gate(), _accum_forget_gate1(), _pixelwise_mul_forget_gate(), _activation_forget_gate(), _fully_connected_cell_state(), _gemm_cell_state1(), _transpose_cell_state(),
- _accum_cell_state1(), _accum_cell_state2(), _pixelwise_mul_cell_state1(), _activation_cell_state(), _cell_clip(), _pixelwise_mul_cell_state2(), _fully_connected_output(),
- _pixelwise_mul_output_state1(), _accum_output1(), _activation_output(), _activation_output_state(), _pixelwise_mul_output_state2(), _fully_connected_output_state(), _projection_clip(),
- _copy_cell_state(), _copy_output(), _concat_scratch_buffer(), _concat_inputs_forget_gate(), _concat_weights_forget_gate(), _concat_weights_input_gate(), _concat_weights_output(),
- _ones_memset_kernel(), _mean_std_norm_input_gate(), _pixelwise_mul_input_gate_coeff(), _accum_input_gate_bias(), _mean_std_norm_forget_gate(), _pixelwise_mul_forget_gate_coeff(),
- _accum_forget_gate_bias(), _mean_std_norm_cell_gate(), _pixelwise_mul_cell_gate_coeff(), _accum_cell_gate_bias(), _mean_std_norm_output_gate(), _pixelwise_mul_output_gate_coeff(),
- _accum_output_gate_bias(), _input_gate_out1(), _input_gate_out2(), _input_gate_out3(), _input_gate_out4(), _forget_gate_out1(), _forget_gate_out2(), _forget_gate_out3(), _forget_gate_out4(),
- _forget_gate_out5(), _forget_gate_out6(), _cell_state_out1(), _cell_state_out2(), _cell_state_out3(), _cell_state_out4(), _cell_state_out5(), _output1(), _output2(), _output3(), _output4(),
- _cell_state_activation(), _output_state1(), _ones(), _input_layer_norm_out1(), _input_layer_norm_out2(), _forget_layer_norm_out1(), _forget_layer_norm_out2(), _cell_layer_norm_out1(),
- _cell_layer_norm_out2(), _output_layer_norm_out1(), _output_layer_norm_out2(), _run_peephole_opt(false), _run_cifg_opt(false), _perform_cell_clipping(false), _has_projection_weights(false),
- _perform_projection_clipping(false), _is_prepared(false), _is_layer_norm_lstm(false)
+ _fully_connected_forget_gate(), _accum_forget_gate1(), _pixelwise_mul_forget_gate(), _activation_forget_gate(), _fully_connected_cell_state(), _gemm_cell_state1(),
+ _transpose_cell_state(support::cpp14::make_unique<CLTransposeKernel>()), _accum_cell_state1(), _accum_cell_state2(), _pixelwise_mul_cell_state1(), _activation_cell_state(), _cell_clip(),
+ _pixelwise_mul_cell_state2(), _fully_connected_output(), _pixelwise_mul_output_state1(), _accum_output1(), _activation_output(), _activation_output_state(), _pixelwise_mul_output_state2(),
+ _fully_connected_output_state(), _projection_clip(), _copy_cell_state(support::cpp14::make_unique<CLCopyKernel>()), _copy_output(support::cpp14::make_unique<CLCopyKernel>()), _concat_scratch_buffer(),
+ _concat_inputs_forget_gate(), _concat_weights_forget_gate(), _concat_weights_input_gate(), _concat_weights_output(), _ones_memset_kernel(support::cpp14::make_unique<CLMemsetKernel>()),
+ _mean_std_norm_input_gate(), _pixelwise_mul_input_gate_coeff(), _accum_input_gate_bias(), _mean_std_norm_forget_gate(), _pixelwise_mul_forget_gate_coeff(), _accum_forget_gate_bias(),
+ _mean_std_norm_cell_gate(), _pixelwise_mul_cell_gate_coeff(), _accum_cell_gate_bias(), _mean_std_norm_output_gate(), _pixelwise_mul_output_gate_coeff(), _accum_output_gate_bias(), _input_gate_out1(),
+ _input_gate_out2(), _input_gate_out3(), _input_gate_out4(), _forget_gate_out1(), _forget_gate_out2(), _forget_gate_out3(), _forget_gate_out4(), _forget_gate_out5(), _forget_gate_out6(),
+ _cell_state_out1(), _cell_state_out2(), _cell_state_out3(), _cell_state_out4(), _cell_state_out5(), _output1(), _output2(), _output3(), _output4(), _cell_state_activation(), _output_state1(), _ones(),
+ _input_layer_norm_out1(), _input_layer_norm_out2(), _forget_layer_norm_out1(), _forget_layer_norm_out2(), _cell_layer_norm_out1(), _cell_layer_norm_out2(), _output_layer_norm_out1(),
+ _output_layer_norm_out2(), _run_peephole_opt(false), _run_cifg_opt(false), _perform_cell_clipping(false), _has_projection_weights(false), _perform_projection_clipping(false), _is_prepared(false),
+ _is_layer_norm_lstm(false)
{
}
+CLLSTMLayer::~CLLSTMLayer() = default;
+
void CLLSTMLayer::configure(const ICLTensor *input,
const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights,
const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights,
@@ -172,7 +191,7 @@ void CLLSTMLayer::configure(const CLCompileContext &compile_context, const ICLTe
{
_memory_group.manage(&_input_gate_out1);
_ones.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
- _ones_memset_kernel.configure(compile_context, &_ones, PixelValue(1, _ones.info()->data_type()));
+ _ones_memset_kernel->configure(compile_context, &_ones, PixelValue(1, _ones.info()->data_type()));
_subtract_input_gate.configure(compile_context, &_ones, forget_gate_out, &_input_gate_out1, ConvertPolicy::SATURATE);
_ones.allocator()->allocate();
_run_cifg_opt = true;
@@ -241,7 +260,7 @@ void CLLSTMLayer::configure(const CLCompileContext &compile_context, const ICLTe
_memory_group.manage(&_cell_state_out1);
_fully_connected_cell_state.configure(compile_context, input, input_to_cell_weights, (_is_layer_norm_lstm) ? nullptr : cell_bias, &_cell_state_out1);
_memory_group.manage(&_cell_state_out2);
- _transpose_cell_state.configure(compile_context, recurrent_to_cell_weights, &_cell_state_out2);
+ _transpose_cell_state->configure(compile_context, recurrent_to_cell_weights, &_cell_state_out2);
_memory_group.manage(&_cell_state_out3);
_gemm_cell_state1.configure(compile_context, output_state_in, &_cell_state_out2, nullptr, &_cell_state_out3, 1.f, 0.f);
_cell_state_out2.allocator()->allocate();
@@ -367,8 +386,8 @@ void CLLSTMLayer::configure(const CLCompileContext &compile_context, const ICLTe
}
// Copy cell state and output
- _copy_cell_state.configure(compile_context, &_cell_state_out1, cell_state_out);
- _copy_output.configure(compile_context, output_state_out, output);
+ _copy_cell_state->configure(compile_context, &_cell_state_out1, cell_state_out);
+ _copy_output->configure(compile_context, output_state_out, output);
// Vector for holding the tensors to store in scratch buffer
std::vector<const ICLTensor *> scratch_inputs;
@@ -642,7 +661,7 @@ void CLLSTMLayer::run()
if(_run_cifg_opt)
{
- CLScheduler::get().enqueue(_ones_memset_kernel);
+ CLScheduler::get().enqueue(*_ones_memset_kernel);
_subtract_input_gate.run();
}
else
@@ -665,7 +684,7 @@ void CLLSTMLayer::run()
}
_fully_connected_cell_state.run();
- CLScheduler::get().enqueue(_transpose_cell_state);
+ CLScheduler::get().enqueue(*_transpose_cell_state);
_gemm_cell_state1.run();
_accum_cell_state1.run();
if(_is_layer_norm_lstm)
@@ -711,8 +730,8 @@ void CLLSTMLayer::run()
}
}
- CLScheduler::get().enqueue(_copy_cell_state);
- CLScheduler::get().enqueue(_copy_output);
+ CLScheduler::get().enqueue(*_copy_cell_state);
+ CLScheduler::get().enqueue(*_copy_output);
_concat_scratch_buffer.run();
}
diff --git a/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp b/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp
index 76a531b1c9..46062387e7 100644
--- a/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp
+++ b/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp
@@ -27,6 +27,14 @@
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
+#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
#include <memory>
diff --git a/src/runtime/CL/functions/CLLaplacianPyramid.cpp b/src/runtime/CL/functions/CLLaplacianPyramid.cpp
index 81e903cde8..1ad19e56ea 100644
--- a/src/runtime/CL/functions/CLLaplacianPyramid.cpp
+++ b/src/runtime/CL/functions/CLLaplacianPyramid.cpp
@@ -32,6 +32,9 @@
#include "arm_compute/runtime/CL/functions/CLElementwiseOperations.h"
#include "arm_compute/runtime/CL/functions/CLGaussian5x5.h"
#include "arm_compute/runtime/CL/functions/CLGaussianPyramid.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLGaussian5x5Kernel.h"
+#include "src/core/CL/kernels/CLGaussianPyramidKernel.h"
using namespace arm_compute;
diff --git a/src/runtime/CL/functions/CLLaplacianReconstruct.cpp b/src/runtime/CL/functions/CLLaplacianReconstruct.cpp
index cbb952c3f6..d7fd81754b 100644
--- a/src/runtime/CL/functions/CLLaplacianReconstruct.cpp
+++ b/src/runtime/CL/functions/CLLaplacianReconstruct.cpp
@@ -23,11 +23,13 @@
*/
#include "arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/IPyramid.h"
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
#include <cstddef>
diff --git a/src/runtime/CL/functions/CLLocallyConnectedLayer.cpp b/src/runtime/CL/functions/CLLocallyConnectedLayer.cpp
index 74cb47347f..04e59ac4a6 100644
--- a/src/runtime/CL/functions/CLLocallyConnectedLayer.cpp
+++ b/src/runtime/CL/functions/CLLocallyConnectedLayer.cpp
@@ -27,6 +27,11 @@
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLCol2ImKernel.h"
+#include "src/core/CL/kernels/CLIm2ColKernel.h"
+#include "src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h"
+#include "src/core/CL/kernels/CLWeightsReshapeKernel.h"
+#include "support/MemorySupport.h"
#include <cmath>
#include <tuple>
@@ -78,8 +83,16 @@ void calculate_shapes(const ITensorInfo *input, const ITensorInfo *weights, cons
} // namespace
CLLocallyConnectedLayer::CLLocallyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _input_im2col_kernel(), _weights_reshape_kernel(), _mm_kernel(), _output_col2im_kernel(), _input_im2col_reshaped(), _weights_reshaped(), _gemm_output(),
- _is_prepared(false), _original_weights(nullptr)
+ : _memory_group(std::move(memory_manager)),
+ _input_im2col_kernel(support::cpp14::make_unique<CLIm2ColKernel>()),
+ _weights_reshape_kernel(support::cpp14::make_unique<CLWeightsReshapeKernel>()),
+ _mm_kernel(support::cpp14::make_unique<CLLocallyConnectedMatrixMultiplyKernel>()),
+ _output_col2im_kernel(support::cpp14::make_unique<CLCol2ImKernel>()),
+ _input_im2col_reshaped(),
+ _weights_reshaped(),
+ _gemm_output(),
+ _is_prepared(false),
+ _original_weights(nullptr)
{
}
@@ -169,16 +182,16 @@ void CLLocallyConnectedLayer::configure(const CLCompileContext &compile_context,
_memory_group.manage(&_gemm_output);
// Configure kernels
- _input_im2col_kernel.configure(compile_context, input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _has_bias);
- _weights_reshape_kernel.configure(compile_context, weights, biases, &_weights_reshaped);
- _mm_kernel.configure(compile_context, &_input_im2col_reshaped, &_weights_reshaped, &_gemm_output);
- _output_col2im_kernel.configure(compile_context, &_gemm_output, output, Size2D(conv_w, conv_h));
+ _input_im2col_kernel->configure(compile_context, input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _has_bias);
+ _weights_reshape_kernel->configure(compile_context, weights, biases, &_weights_reshaped);
+ _mm_kernel->configure(compile_context, &_input_im2col_reshaped, &_weights_reshaped, &_gemm_output);
+ _output_col2im_kernel->configure(compile_context, &_gemm_output, output, Size2D(conv_w, conv_h));
// Allocate intermediate tensors
_input_im2col_reshaped.allocator()->allocate();
_gemm_output.allocator()->allocate();
- CLScheduler::get().tune_kernel_static(_input_im2col_kernel);
+ CLScheduler::get().tune_kernel_static(*_input_im2col_kernel);
}
void CLLocallyConnectedLayer::run()
@@ -188,13 +201,13 @@ void CLLocallyConnectedLayer::run()
MemoryGroupResourceScope scope_mg(_memory_group);
// Run input reshaping
- CLScheduler::get().enqueue(_input_im2col_kernel);
+ CLScheduler::get().enqueue(*_input_im2col_kernel);
// Runs vector matrix multiply on reshaped matrices
- CLScheduler::get().enqueue(_mm_kernel);
+ CLScheduler::get().enqueue(*_mm_kernel);
// Reshape output matrix
- CLScheduler::get().enqueue(_output_col2im_kernel, false);
+ CLScheduler::get().enqueue(*_output_col2im_kernel.get(), false);
}
void CLLocallyConnectedLayer::prepare()
@@ -205,7 +218,7 @@ void CLLocallyConnectedLayer::prepare()
// Run weights reshaping and mark original weights tensor as unused
_weights_reshaped.allocator()->allocate();
- CLScheduler::get().enqueue(_weights_reshape_kernel);
+ CLScheduler::get().enqueue(*_weights_reshape_kernel);
_original_weights->mark_as_unused();
CLScheduler::get().queue().finish();
diff --git a/src/runtime/CL/functions/CLMagnitude.cpp b/src/runtime/CL/functions/CLMagnitude.cpp
index 962adadbb2..fb3ebdaa96 100644
--- a/src/runtime/CL/functions/CLMagnitude.cpp
+++ b/src/runtime/CL/functions/CLMagnitude.cpp
@@ -23,7 +23,7 @@
*/
#include "arm_compute/runtime/CL/functions/CLMagnitude.h"
-#include "arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h"
+#include "src/core/CL/kernels/CLMagnitudePhaseKernel.h"
#include "support/MemorySupport.h"
#include <utility>
diff --git a/src/runtime/CL/functions/CLMaxUnpoolingLayer.cpp b/src/runtime/CL/functions/CLMaxUnpoolingLayer.cpp
index 3e32c55067..392bff2b4e 100644
--- a/src/runtime/CL/functions/CLMaxUnpoolingLayer.cpp
+++ b/src/runtime/CL/functions/CLMaxUnpoolingLayer.cpp
@@ -24,18 +24,23 @@
#include "arm_compute/runtime/CL/functions/CLMaxUnpoolingLayer.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLMaxUnpoolingLayerKernel.h"
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLMaxUnpoolingLayerKernel.h"
+#include "src/core/CL/kernels/CLMemsetKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
CLMaxUnpoolingLayer::CLMaxUnpoolingLayer()
- : _memset_kernel(), _unpooling_layer_kernel()
+ : _memset_kernel(support::cpp14::make_unique<CLMemsetKernel>()),
+ _unpooling_layer_kernel(support::cpp14::make_unique<CLMaxUnpoolingLayerKernel>())
{
}
+CLMaxUnpoolingLayer::~CLMaxUnpoolingLayer() = default;
+
void CLMaxUnpoolingLayer::configure(ICLTensor *input, ICLTensor *indices, ICLTensor *output, const PoolingLayerInfo &pool_info)
{
configure(CLKernelLibrary::get().get_compile_context(), input, indices, output, pool_info);
@@ -44,9 +49,9 @@ void CLMaxUnpoolingLayer::configure(ICLTensor *input, ICLTensor *indices, ICLTen
void CLMaxUnpoolingLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *indices, ICLTensor *output, const PoolingLayerInfo &pool_info)
{
const PixelValue zero_value(0.f);
- _memset_kernel.configure(output, zero_value);
+ _memset_kernel->configure(output, zero_value);
- _unpooling_layer_kernel.configure(compile_context, input, indices, output, pool_info);
+ _unpooling_layer_kernel->configure(compile_context, input, indices, output, pool_info);
}
Status CLMaxUnpoolingLayer::validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, const PoolingLayerInfo &pool_info)
@@ -57,9 +62,9 @@ Status CLMaxUnpoolingLayer::validate(const ITensorInfo *input, const ITensorInfo
void CLMaxUnpoolingLayer::run()
{
// Run memset
- CLScheduler::get().enqueue(_memset_kernel, false);
+ CLScheduler::get().enqueue(*_memset_kernel, false);
// Run max unpooling layer
- CLScheduler::get().enqueue(_unpooling_layer_kernel);
+ CLScheduler::get().enqueue(*_unpooling_layer_kernel);
}
} /* namespace arm_compute */
diff --git a/src/runtime/CL/functions/CLMeanStdDev.cpp b/src/runtime/CL/functions/CLMeanStdDev.cpp
index 2517fdc4ef..c91bc954b8 100644
--- a/src/runtime/CL/functions/CLMeanStdDev.cpp
+++ b/src/runtime/CL/functions/CLMeanStdDev.cpp
@@ -25,6 +25,10 @@
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/CL/functions/CLMeanStdDev.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLMeanStdDevKernel.h"
+#include "src/core/CL/kernels/CLReductionOperationKernel.h"
+#include "support/MemorySupport.h"
using namespace arm_compute;
@@ -39,13 +43,15 @@ CLMeanStdDev::CLMeanStdDev(std::shared_ptr<IMemoryManager> memory_manager) // NO
_reduction_output_stddev(),
_mean(nullptr),
_stddev(nullptr),
- _mean_stddev_kernel(),
- _fill_border_kernel(),
+ _mean_stddev_kernel(support::cpp14::make_unique<CLMeanStdDevKernel>()),
+ _fill_border_kernel(support::cpp14::make_unique<CLFillBorderKernel>()),
_global_sum(),
_global_sum_squared()
{
}
+CLMeanStdDev::~CLMeanStdDev() = default;
+
Status CLMeanStdDev::validate(ITensorInfo *input, float *mean, float *stddev)
{
ARM_COMPUTE_RETURN_ERROR_ON_TENSOR_NOT_2D(input);
@@ -101,8 +107,8 @@ void CLMeanStdDev::configure(const CLCompileContext &compile_context, ICLImage *
_global_sum_squared = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_ulong));
}
- _mean_stddev_kernel.configure(compile_context, input, mean, &_global_sum, stddev, &_global_sum_squared);
- _fill_border_kernel.configure(compile_context, input, _mean_stddev_kernel.border_size(), BorderMode::CONSTANT, PixelValue(static_cast<uint8_t>(0)));
+ _mean_stddev_kernel->configure(compile_context, input, mean, &_global_sum, stddev, &_global_sum_squared);
+ _fill_border_kernel->configure(compile_context, input, _mean_stddev_kernel->border_size(), BorderMode::CONSTANT, PixelValue(static_cast<uint8_t>(0)));
}
}
@@ -149,8 +155,8 @@ void CLMeanStdDev::run_float()
void CLMeanStdDev::run_int()
{
- CLScheduler::get().enqueue(_fill_border_kernel);
- CLScheduler::get().enqueue(_mean_stddev_kernel);
+ CLScheduler::get().enqueue(*_fill_border_kernel);
+ CLScheduler::get().enqueue(*_mean_stddev_kernel);
}
void CLMeanStdDev::run()
diff --git a/src/runtime/CL/functions/CLMeanStdDevNormalizationLayer.cpp b/src/runtime/CL/functions/CLMeanStdDevNormalizationLayer.cpp
index 07ab669fde..5b5ff49ecb 100644
--- a/src/runtime/CL/functions/CLMeanStdDevNormalizationLayer.cpp
+++ b/src/runtime/CL/functions/CLMeanStdDevNormalizationLayer.cpp
@@ -23,8 +23,8 @@
*/
#include "arm_compute/runtime/CL/functions/CLMeanStdDevNormalizationLayer.h"
-#include "arm_compute/core/CL/kernels/CLMeanStdDevNormalizationKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/kernels/CLMeanStdDevNormalizationKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
diff --git a/src/runtime/CL/functions/CLMedian3x3.cpp b/src/runtime/CL/functions/CLMedian3x3.cpp
index 92153128f9..2040ebd4f5 100644
--- a/src/runtime/CL/functions/CLMedian3x3.cpp
+++ b/src/runtime/CL/functions/CLMedian3x3.cpp
@@ -23,8 +23,9 @@
*/
#include "arm_compute/runtime/CL/functions/CLMedian3x3.h"
-#include "arm_compute/core/CL/kernels/CLMedian3x3Kernel.h"
#include "arm_compute/core/PixelValue.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLMedian3x3Kernel.h"
#include "support/MemorySupport.h"
#include <utility>
@@ -41,5 +42,5 @@ void CLMedian3x3::configure(const CLCompileContext &compile_context, ICLTensor *
auto k = arm_compute::support::cpp14::make_unique<CLMedian3x3Kernel>();
k->configure(compile_context, input, output, border_mode == BorderMode::UNDEFINED);
_kernel = std::move(k);
- _border_handler.configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
}
diff --git a/src/runtime/CL/functions/CLMinMaxLocation.cpp b/src/runtime/CL/functions/CLMinMaxLocation.cpp
index a27defe2f7..3ddd4d04ed 100644
--- a/src/runtime/CL/functions/CLMinMaxLocation.cpp
+++ b/src/runtime/CL/functions/CLMinMaxLocation.cpp
@@ -22,14 +22,15 @@
* SOFTWARE.
*/
#include "arm_compute/runtime/CL/functions/CLMinMaxLocation.h"
-
#include "arm_compute/core/CL/CLHelpers.h"
+#include "src/core/CL/kernels/CLMinMaxLocationKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
CLMinMaxLocation::CLMinMaxLocation()
- : _min_max_kernel(),
- _min_max_loc_kernel(),
+ : _min_max_kernel(support::cpp14::make_unique<CLMinMaxKernel>()),
+ _min_max_loc_kernel(support::cpp14::make_unique<CLMinMaxLocationKernel>()),
_min_max_vals(),
_min_max_count_vals(),
_min(nullptr),
@@ -41,6 +42,8 @@ CLMinMaxLocation::CLMinMaxLocation()
{
}
+CLMinMaxLocation::~CLMinMaxLocation() = default;
+
void CLMinMaxLocation::configure(const ICLImage *input, void *min, void *max, CLCoordinates2DArray *min_loc, CLCoordinates2DArray *max_loc, uint32_t *min_count, uint32_t *max_count)
{
configure(CLKernelLibrary::get().get_compile_context(), input, min, max, min_loc, max_loc, min_count, max_count);
@@ -62,16 +65,16 @@ void CLMinMaxLocation::configure(const CLCompileContext &compile_context, const
_min_loc = min_loc;
_max_loc = max_loc;
- _min_max_kernel.configure(compile_context, input, &_min_max_vals);
- _min_max_loc_kernel.configure(compile_context, input, &_min_max_vals, &_min_max_count_vals, _min_loc, _max_loc);
+ _min_max_kernel->configure(compile_context, input, &_min_max_vals);
+ _min_max_loc_kernel->configure(compile_context, input, &_min_max_vals, &_min_max_count_vals, _min_loc, _max_loc);
}
void CLMinMaxLocation::run()
{
cl::CommandQueue q = CLScheduler::get().queue();
- CLScheduler::get().enqueue(_min_max_kernel, false);
- CLScheduler::get().enqueue(_min_max_loc_kernel, false);
+ CLScheduler::get().enqueue(*_min_max_kernel, false);
+ CLScheduler::get().enqueue(*_min_max_loc_kernel, false);
// Update min and max
q.enqueueReadBuffer(_min_max_vals, CL_FALSE, 0 * sizeof(int32_t), sizeof(int32_t), static_cast<int32_t *>(_min));
diff --git a/src/runtime/CL/functions/CLNonLinearFilter.cpp b/src/runtime/CL/functions/CLNonLinearFilter.cpp
index 71f08e8072..3312f6f9a7 100644
--- a/src/runtime/CL/functions/CLNonLinearFilter.cpp
+++ b/src/runtime/CL/functions/CLNonLinearFilter.cpp
@@ -23,7 +23,8 @@
*/
#include "arm_compute/runtime/CL/functions/CLNonLinearFilter.h"
-#include "arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLNonLinearFilterKernel.h"
#include "support/MemorySupport.h"
#include <utility>
@@ -42,5 +43,5 @@ void CLNonLinearFilter::configure(const CLCompileContext &compile_context, ICLTe
auto k = arm_compute::support::cpp14::make_unique<CLNonLinearFilterKernel>();
k->configure(compile_context, input, output, function, mask_size, pattern, mask, border_mode == BorderMode::UNDEFINED);
_kernel = std::move(k);
- _border_handler.configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
}
diff --git a/src/runtime/CL/functions/CLNonMaximaSuppression3x3.cpp b/src/runtime/CL/functions/CLNonMaximaSuppression3x3.cpp
index a79bb0c5a3..22ca176a71 100644
--- a/src/runtime/CL/functions/CLNonMaximaSuppression3x3.cpp
+++ b/src/runtime/CL/functions/CLNonMaximaSuppression3x3.cpp
@@ -23,7 +23,8 @@
*/
#include "arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h"
-#include "arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h"
#include "support/MemorySupport.h"
#include <utility>
@@ -43,10 +44,10 @@ void CLNonMaximaSuppression3x3::configure(const CLCompileContext &compile_contex
if(border_mode != BorderMode::UNDEFINED)
{
- _border_handler.configure(compile_context, input, _kernel->border_size(), BorderMode::CONSTANT);
+ _border_handler->configure(compile_context, input, _kernel->border_size(), BorderMode::CONSTANT);
}
else
{
- _border_handler.configure(compile_context, input, _kernel->border_size(), BorderMode::UNDEFINED);
+ _border_handler->configure(compile_context, input, _kernel->border_size(), BorderMode::UNDEFINED);
}
}
diff --git a/src/runtime/CL/functions/CLNormalizationLayer.cpp b/src/runtime/CL/functions/CLNormalizationLayer.cpp
index 4be6257bbf..40a6cdd2f4 100644
--- a/src/runtime/CL/functions/CLNormalizationLayer.cpp
+++ b/src/runtime/CL/functions/CLNormalizationLayer.cpp
@@ -25,18 +25,25 @@
#include "arm_compute/runtime/CL/functions/CLNormalizationLayer.h"
#include "arm_compute/core/Error.h"
+#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLNormalizationLayerKernel.h"
+#include "support/MemorySupport.h"
using namespace arm_compute;
CLNormalizationLayer::CLNormalizationLayer()
- : _norm_kernel(), _border_handler()
+ : _norm_kernel(support::cpp14::make_unique<CLNormalizationLayerKernel>()),
+ _border_handler(support::cpp14::make_unique<CLFillBorderKernel>())
{
}
+CLNormalizationLayer::~CLNormalizationLayer() = default;
+
void CLNormalizationLayer::configure(ICLTensor *input, ICLTensor *output, const NormalizationLayerInfo &norm_info)
{
configure(CLKernelLibrary::get().get_compile_context(), input, output, norm_info);
@@ -47,10 +54,10 @@ void CLNormalizationLayer::configure(const CLCompileContext &compile_context, IC
ARM_COMPUTE_ERROR_ON(input == nullptr);
// Configure normalization kernel
- _norm_kernel.configure(compile_context, input, output, norm_info);
+ _norm_kernel->configure(compile_context, input, output, norm_info);
// Fill the border by 3 elements since we need vload4 in the IN_MAP normalization kernel
- _border_handler.configure(compile_context, input, _norm_kernel.border_size(), BorderMode::CONSTANT, PixelValue());
+ _border_handler->configure(compile_context, input, _norm_kernel->border_size(), BorderMode::CONSTANT, PixelValue());
}
Status CLNormalizationLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const NormalizationLayerInfo &norm_info)
@@ -61,8 +68,8 @@ Status CLNormalizationLayer::validate(const ITensorInfo *input, const ITensorInf
void CLNormalizationLayer::run()
{
// Run border handler
- CLScheduler::get().enqueue(_border_handler, false);
+ CLScheduler::get().enqueue(*_border_handler, false);
// Run normalization kernel
- CLScheduler::get().enqueue(_norm_kernel);
+ CLScheduler::get().enqueue(*_norm_kernel);
}
diff --git a/src/runtime/CL/functions/CLNormalizePlanarYUVLayer.cpp b/src/runtime/CL/functions/CLNormalizePlanarYUVLayer.cpp
index 806e6489a2..9576486db0 100644
--- a/src/runtime/CL/functions/CLNormalizePlanarYUVLayer.cpp
+++ b/src/runtime/CL/functions/CLNormalizePlanarYUVLayer.cpp
@@ -24,7 +24,7 @@
#include "arm_compute/runtime/CL/functions/CLNormalizePlanarYUVLayer.h"
-#include "arm_compute/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h"
+#include "src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h"
#include "support/MemorySupport.h"
#include <utility>
diff --git a/src/runtime/CL/functions/CLOpticalFlow.cpp b/src/runtime/CL/functions/CLOpticalFlow.cpp
index 0b5547eaab..fca6192296 100644
--- a/src/runtime/CL/functions/CLOpticalFlow.cpp
+++ b/src/runtime/CL/functions/CLOpticalFlow.cpp
@@ -24,7 +24,6 @@
#include "arm_compute/runtime/CL/functions/CLOpticalFlow.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLLKTrackerKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Window.h"
@@ -33,6 +32,8 @@
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
#include "arm_compute/runtime/CL/functions/CLScharr3x3.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLLKTrackerKernel.h"
#include "support/MemorySupport.h"
using namespace arm_compute;
@@ -42,7 +43,7 @@ CLOpticalFlow::CLOpticalFlow(std::shared_ptr<IMemoryManager> memory_manager) //
_tracker_init_kernel(),
_tracker_stage0_kernel(),
_tracker_stage1_kernel(),
- _tracker_finalize_kernel(),
+ _tracker_finalize_kernel(support::cpp14::make_unique<CLLKTrackerFinalizeKernel>()),
_func_scharr(),
_scharr_gx(),
_scharr_gy(),
@@ -57,6 +58,8 @@ CLOpticalFlow::CLOpticalFlow(std::shared_ptr<IMemoryManager> memory_manager) //
{
}
+CLOpticalFlow::~CLOpticalFlow() = default;
+
void CLOpticalFlow::configure(const CLPyramid *old_pyramid, const CLPyramid *new_pyramid,
const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates, ICLKeyPointArray *new_points,
Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, bool use_initial_estimate,
@@ -93,9 +96,9 @@ void CLOpticalFlow::configure(const CLCompileContext &compile_context, const CLP
const int old_values_list_length = list_length * window_dimension * window_dimension;
// Create kernels and tensors
- _tracker_init_kernel.resize(_num_levels);
- _tracker_stage0_kernel.resize(_num_levels);
- _tracker_stage1_kernel.resize(_num_levels);
+ _tracker_init_kernel.reserve(_num_levels);
+ _tracker_stage0_kernel.reserve(_num_levels);
+ _tracker_stage1_kernel.reserve(_num_levels);
_func_scharr.resize(_num_levels);
_scharr_gx.resize(_num_levels);
_scharr_gy.resize(_num_levels);
@@ -134,16 +137,19 @@ void CLOpticalFlow::configure(const CLCompileContext &compile_context, const CLP
_func_scharr[i].configure(compile_context, old_ith_input, &_scharr_gx[i], &_scharr_gy[i], border_mode, constant_border_value);
// Init Lucas-Kanade init kernel
- _tracker_init_kernel[i].configure(compile_context, old_points, new_points_estimates, _old_points_internal.get(), _new_points_internal.get(), use_initial_estimate, i, _num_levels, pyr_scale);
+ _tracker_init_kernel.emplace_back(support::cpp14::make_unique<CLLKTrackerInitKernel>());
+ _tracker_init_kernel.back()->configure(compile_context, old_points, new_points_estimates, _old_points_internal.get(), _new_points_internal.get(), use_initial_estimate, i, _num_levels, pyr_scale);
// Init Lucas-Kanade stage0 kernel
- _tracker_stage0_kernel[i].configure(compile_context, old_ith_input, &_scharr_gx[i], &_scharr_gy[i],
- _old_points_internal.get(), _new_points_internal.get(), _coefficient_table.get(), _old_values.get(),
- window_dimension, i);
+ _tracker_stage0_kernel.emplace_back(support::cpp14::make_unique<CLLKTrackerStage0Kernel>());
+ _tracker_stage0_kernel.back()->configure(compile_context, old_ith_input, &_scharr_gx[i], &_scharr_gy[i],
+ _old_points_internal.get(), _new_points_internal.get(), _coefficient_table.get(), _old_values.get(),
+ window_dimension, i);
// Init Lucas-Kanade stage1 kernel
- _tracker_stage1_kernel[i].configure(compile_context, new_ith_input, _new_points_internal.get(), _coefficient_table.get(), _old_values.get(),
- termination, epsilon, num_iterations, window_dimension, i);
+ _tracker_stage1_kernel.emplace_back(support::cpp14::make_unique<CLLKTrackerStage1Kernel>());
+ _tracker_stage1_kernel.back()->configure(compile_context, new_ith_input, _new_points_internal.get(), _coefficient_table.get(), _old_values.get(),
+ termination, epsilon, num_iterations, window_dimension, i);
// Allocate intermediate buffers
_scharr_gx[i].allocator()->allocate();
@@ -151,7 +157,7 @@ void CLOpticalFlow::configure(const CLCompileContext &compile_context, const CLP
}
// Finalize Lucas-Kanade
- _tracker_finalize_kernel.configure(compile_context, _new_points_internal.get(), new_points);
+ _tracker_finalize_kernel->configure(compile_context, _new_points_internal.get(), new_points);
}
void CLOpticalFlow::run()
@@ -166,14 +172,14 @@ void CLOpticalFlow::run()
_func_scharr[level - 1].run();
// Run Lucas-Kanade init kernel
- CLScheduler::get().enqueue(_tracker_init_kernel[level - 1]);
+ CLScheduler::get().enqueue(*_tracker_init_kernel[level - 1]);
// Run Lucas-Kanade stage0 kernel
- CLScheduler::get().enqueue(_tracker_stage0_kernel[level - 1]);
+ CLScheduler::get().enqueue(*_tracker_stage0_kernel[level - 1]);
// Run Lucas-Kanade stage1 kernel
- CLScheduler::get().enqueue(_tracker_stage1_kernel[level - 1]);
+ CLScheduler::get().enqueue(*_tracker_stage1_kernel[level - 1]);
}
- CLScheduler::get().enqueue(_tracker_finalize_kernel, true);
+ CLScheduler::get().enqueue(*_tracker_finalize_kernel, true);
}
diff --git a/src/runtime/CL/functions/CLPReluLayer.cpp b/src/runtime/CL/functions/CLPReluLayer.cpp
index aaddd46071..60cf4d1a2d 100644
--- a/src/runtime/CL/functions/CLPReluLayer.cpp
+++ b/src/runtime/CL/functions/CLPReluLayer.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h"
+#include "src/core/CL/kernels/CLElementwiseOperationKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
diff --git a/src/runtime/CL/functions/CLPadLayer.cpp b/src/runtime/CL/functions/CLPadLayer.cpp
index fb6078cc79..388b07b76e 100644
--- a/src/runtime/CL/functions/CLPadLayer.cpp
+++ b/src/runtime/CL/functions/CLPadLayer.cpp
@@ -22,14 +22,21 @@
* SOFTWARE.
*/
#include "arm_compute/runtime/CL/functions/CLPadLayer.h"
+#include "src/core/CL/kernels/CLCopyKernel.h"
+#include "src/core/CL/kernels/CLPadLayerKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
CLPadLayer::CLPadLayer()
- : _pad_kernel(), _copy_kernel(), _perform_pad(false)
+ : _pad_kernel(support::cpp14::make_unique<CLPadLayerKernel>()),
+ _copy_kernel(support::cpp14::make_unique<CLCopyKernel>()),
+ _perform_pad(false)
{
}
+CLPadLayer::~CLPadLayer() = default;
+
void CLPadLayer::configure(ICLTensor *input, ICLTensor *output, const PaddingList &padding, PixelValue constant_value, PaddingMode mode)
{
configure(CLKernelLibrary::get().get_compile_context(), input, output, padding, constant_value, mode);
@@ -46,12 +53,12 @@ void CLPadLayer::configure(const CLCompileContext &compile_context, ICLTensor *i
if(_perform_pad)
{
- _pad_kernel.configure(compile_context, input, output, padding, constant_value, mode);
+ _pad_kernel->configure(compile_context, input, output, padding, constant_value, mode);
}
else
{
// Copy the input to the whole output if no padding is applied
- _copy_kernel.configure(compile_context, input, output);
+ _copy_kernel->configure(compile_context, input, output);
}
}
Status CLPadLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding, PixelValue constant_value, PaddingMode mode)
@@ -75,11 +82,11 @@ void CLPadLayer::run()
{
if(_perform_pad)
{
- CLScheduler::get().enqueue(_pad_kernel);
+ CLScheduler::get().enqueue(*_pad_kernel);
}
else
{
- CLScheduler::get().enqueue(_copy_kernel);
+ CLScheduler::get().enqueue(*_copy_kernel);
}
}
} // namespace arm_compute \ No newline at end of file
diff --git a/src/runtime/CL/functions/CLPermute.cpp b/src/runtime/CL/functions/CLPermute.cpp
index e13046bd46..f7f0bc4f5d 100644
--- a/src/runtime/CL/functions/CLPermute.cpp
+++ b/src/runtime/CL/functions/CLPermute.cpp
@@ -24,8 +24,8 @@
#include "arm_compute/runtime/CL/functions/CLPermute.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLPermuteKernel.h"
#include "arm_compute/core/Error.h"
+#include "src/core/CL/kernels/CLPermuteKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
diff --git a/src/runtime/CL/functions/CLPhase.cpp b/src/runtime/CL/functions/CLPhase.cpp
index 64d2e0fdff..6594cd5bac 100644
--- a/src/runtime/CL/functions/CLPhase.cpp
+++ b/src/runtime/CL/functions/CLPhase.cpp
@@ -23,7 +23,7 @@
*/
#include "arm_compute/runtime/CL/functions/CLPhase.h"
-#include "arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h"
+#include "src/core/CL/kernels/CLMagnitudePhaseKernel.h"
#include "support/MemorySupport.h"
#include <utility>
diff --git a/src/runtime/CL/functions/CLPixelWiseMultiplication.cpp b/src/runtime/CL/functions/CLPixelWiseMultiplication.cpp
index 883ce68536..12cc5d60af 100644
--- a/src/runtime/CL/functions/CLPixelWiseMultiplication.cpp
+++ b/src/runtime/CL/functions/CLPixelWiseMultiplication.cpp
@@ -24,8 +24,9 @@
#include "arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLPixelWiseMultiplicationKernel.h"
#include "support/MemorySupport.h"
#include <utility>
@@ -55,7 +56,7 @@ ITensorPack select_border_input(ITensorPack &tensors)
namespace experimental
{
CLPixelWiseMultiplication::CLPixelWiseMultiplication()
- : _border_handler()
+ : _border_handler(support::cpp14::make_unique<CLFillBorderKernel>())
{
}
@@ -72,7 +73,7 @@ void CLPixelWiseMultiplication::configure(const CLCompileContext &compile_contex
if(broadcasted_info->dimension(0) == 1)
{
- _border_handler.configure(compile_context, broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE);
+ _border_handler->configure(compile_context, broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE);
}
}
}
@@ -86,12 +87,12 @@ Status CLPixelWiseMultiplication::validate(const ITensorInfo *input1, const ITen
void CLPixelWiseMultiplication::run(ITensorPack &tensors)
{
auto border_pack = select_border_input(tensors);
- CLScheduler::get().enqueue_op(_border_handler, border_pack);
+ CLScheduler::get().enqueue_op(*_border_handler, border_pack);
ICLOperator::run(tensors);
}
CLComplexPixelWiseMultiplication::CLComplexPixelWiseMultiplication()
- : _border_handler()
+ : _border_handler(support::cpp14::make_unique<CLFillBorderKernel>())
{
}
@@ -107,7 +108,7 @@ void CLComplexPixelWiseMultiplication::configure(const CLCompileContext &compile
if(broadcasted_info->dimension(0) == 1)
{
- _border_handler.configure(compile_context, broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE);
+ _border_handler->configure(compile_context, broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE);
}
}
}
@@ -120,7 +121,7 @@ Status CLComplexPixelWiseMultiplication::validate(const ITensorInfo *input1, con
void CLComplexPixelWiseMultiplication::run(ITensorPack &tensors)
{
auto border_pack = select_border_input(tensors);
- CLScheduler::get().enqueue_op(_border_handler, border_pack);
+ CLScheduler::get().enqueue_op(*_border_handler, border_pack);
ICLOperator::run(tensors);
}
} // namespace experimental
diff --git a/src/runtime/CL/functions/CLPoolingLayer.cpp b/src/runtime/CL/functions/CLPoolingLayer.cpp
index a14818fffe..7f99aee9ba 100644
--- a/src/runtime/CL/functions/CLPoolingLayer.cpp
+++ b/src/runtime/CL/functions/CLPoolingLayer.cpp
@@ -24,8 +24,9 @@
#include "arm_compute/runtime/CL/functions/CLPoolingLayer.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLPoolingLayerKernel.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLPoolingLayerKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
@@ -79,7 +80,7 @@ void CLPoolingLayer::configure(const CLCompileContext &compile_context, ICLTenso
default:
ARM_COMPUTE_ERROR("Data layout not supported");
}
- _border_handler.configure(compile_context, input, _kernel->border_size(), border_mode, pixel_value);
+ _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode, pixel_value);
// Tune kernels
CLScheduler::get().tune_kernel_static(*_kernel);
diff --git a/src/runtime/CL/functions/CLPriorBoxLayer.cpp b/src/runtime/CL/functions/CLPriorBoxLayer.cpp
index fefbff639d..8cb971793e 100644
--- a/src/runtime/CL/functions/CLPriorBoxLayer.cpp
+++ b/src/runtime/CL/functions/CLPriorBoxLayer.cpp
@@ -24,13 +24,13 @@
#include "arm_compute/runtime/CL/functions/CLPriorBoxLayer.h"
-#include "arm_compute/core/CL/kernels/CLPriorBoxLayerKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
-
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLPriorBoxLayerKernel.h"
#include "support/MemorySupport.h"
using namespace arm_compute;
diff --git a/src/runtime/CL/functions/CLQLSTMLayer.cpp b/src/runtime/CL/functions/CLQLSTMLayer.cpp
index 2d21d210e4..54df5a0a5e 100644
--- a/src/runtime/CL/functions/CLQLSTMLayer.cpp
+++ b/src/runtime/CL/functions/CLQLSTMLayer.cpp
@@ -30,7 +30,18 @@
#include "arm_compute/core/utils/misc/InfoHelpers.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLCopyKernel.h"
+#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h"
#include "src/core/helpers/WindowHelpers.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
@@ -86,10 +97,50 @@ void CLQLSTMLayer::TensorCopyKernel::run()
}
CLQLSTMLayer::CLQLSTMLayer(std::shared_ptr<IMemoryManager> memory_manager)
+ : _input_to_input_reduction(support::cpp14::make_unique<CLGEMMLowpMatrixAReductionKernel>()),
+ _recurrent_to_input_reduction(support::cpp14::make_unique<CLGEMMLowpMatrixAReductionKernel>()),
+ _input_to_forget_reduction(support::cpp14::make_unique<CLGEMMLowpMatrixAReductionKernel>()),
+ _recurrent_to_forget_reduction(support::cpp14::make_unique<CLGEMMLowpMatrixAReductionKernel>()),
+ _input_to_cell_reduction(support::cpp14::make_unique<CLGEMMLowpMatrixAReductionKernel>()),
+ _recurrent_to_cell_reduction(support::cpp14::make_unique<CLGEMMLowpMatrixAReductionKernel>()),
+ _input_to_output_reduction(support::cpp14::make_unique<CLGEMMLowpMatrixAReductionKernel>()),
+ _recurrent_to_output_reduction(support::cpp14::make_unique<CLGEMMLowpMatrixAReductionKernel>()),
+ _projection_reduction(support::cpp14::make_unique<CLGEMMLowpMatrixAReductionKernel>()),
+ _layer_norms(),
+ _copy_output(support::cpp14::make_unique<CLCopyKernel>())
{
+ for(auto &norm : _layer_norms)
+ {
+ norm = support::cpp14::make_unique<CLQLSTMLayerNormalizationKernel>();
+ }
+
_memory_group = MemoryGroup(std::move(memory_manager));
}
+CLQLSTMLayer::~CLQLSTMLayer() = default;
+
+void CLQLSTMLayer::configure_layer_norm(LayerNormGate g, const ICLTensor *in)
+{
+ ARM_COMPUTE_ERROR_ON(!_has_layer_norm);
+
+ CLTensor *out = &get_layer_norm_output(g);
+ _memory_group.manage(out);
+ out->allocator()->init(*(in->info()));
+
+ get_layer_norm(g).configure(in, out, get_layer_norm_weight(g), get_layer_norm_bias(g));
+}
+
+Status CLQLSTMLayer::validate_layer_norm(const ITensorInfo &in, const ITensorInfo &weight, const ITensorInfo &bias)
+{
+ // Output quantization scale will be different, but ignored here
+ // since it will be configured at configure() stage.
+ const TensorInfo out
+ {
+ in
+ };
+ return CLQLSTMLayerNormalizationKernel::validate(&in, &out, &weight, &bias);
+}
+
void CLQLSTMLayer::configure_mm(const CLCompileContext &compile_context, CLGEMMLowpMatrixMultiplyCore &mm, CLGEMMLowpOutputStage &outstage, GEMMLowpOutputStageInfo &gemmlowp_info,
const ICLTensor *mm_input, const ICLTensor *mm_weights, const ICLTensor *bias,
CLTensor *mm_res, CLTensor *outstage_res, float gemmlowp_scale,
@@ -200,18 +251,18 @@ void CLQLSTMLayer::configure(const CLCompileContext &compile_context, const ICLT
_input_to_input_weights = lstm_params.input_to_input_weights();
_recurrent_to_input_weights = lstm_params.recurrent_to_input_weights();
- _input_to_input_reduction.configure(compile_context, _input_to_input_weights, &_input_to_input_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
- _recurrent_to_input_reduction.configure(compile_context, _recurrent_to_input_weights, &_recurrent_to_input_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
+ _input_to_input_reduction->configure(compile_context, _input_to_input_weights, &_input_to_input_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
+ _recurrent_to_input_reduction->configure(compile_context, _recurrent_to_input_weights, &_recurrent_to_input_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
}
- _input_to_forget_reduction.configure(compile_context, input_to_forget_weights, &_input_to_forget_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
- _recurrent_to_forget_reduction.configure(compile_context, recurrent_to_forget_weights, &_recurrent_to_forget_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
- _input_to_cell_reduction.configure(compile_context, input_to_cell_weights, &_input_to_cell_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
- _recurrent_to_cell_reduction.configure(compile_context, recurrent_to_cell_weights, &_recurrent_to_cell_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
- _input_to_output_reduction.configure(compile_context, input_to_output_weights, &_input_to_output_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
- _recurrent_to_output_reduction.configure(compile_context, recurrent_to_output_weights, &_recurrent_to_output_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
+ _input_to_forget_reduction->configure(compile_context, input_to_forget_weights, &_input_to_forget_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
+ _recurrent_to_forget_reduction->configure(compile_context, recurrent_to_forget_weights, &_recurrent_to_forget_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
+ _input_to_cell_reduction->configure(compile_context, input_to_cell_weights, &_input_to_cell_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
+ _recurrent_to_cell_reduction->configure(compile_context, recurrent_to_cell_weights, &_recurrent_to_cell_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
+ _input_to_output_reduction->configure(compile_context, input_to_output_weights, &_input_to_output_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
+ _recurrent_to_output_reduction->configure(compile_context, recurrent_to_output_weights, &_recurrent_to_output_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
if(_has_projection)
{
- _projection_reduction.configure(compile_context, _projection_weights, &_projection_eff_bias, GEMMLowpReductionKernelInfo(output_size, false, lstm_params.hidden_state_zero(), true));
+ _projection_reduction->configure(compile_context, _projection_weights, &_projection_eff_bias, GEMMLowpReductionKernelInfo(output_size, false, lstm_params.hidden_state_zero(), true));
if(_projection_bias != nullptr)
{
_projection_bias_add.configure(compile_context, _projection_bias, &_projection_eff_bias, &_projection_eff_bias, ConvertPolicy::SATURATE);
@@ -543,7 +594,7 @@ void CLQLSTMLayer::configure(const CLCompileContext &compile_context, const ICLT
}
// Copy output_state_out to output
- _copy_output.configure(compile_context, output_state_out, output);
+ _copy_output->configure(compile_context, output_state_out, output);
}
Status CLQLSTMLayer::validate(const ITensorInfo *input,
@@ -1049,7 +1100,7 @@ void CLQLSTMLayer::run()
}
// Copy output_state_out to output
- CLScheduler::get().enqueue(_copy_output);
+ CLScheduler::get().enqueue(*_copy_output);
}
void CLQLSTMLayer::prepare()
@@ -1081,8 +1132,8 @@ void CLQLSTMLayer::prepare()
{
_input_to_input_eff_bias.allocator()->allocate();
_recurrent_to_input_eff_bias.allocator()->allocate();
- CLScheduler::get().enqueue(_input_to_input_reduction);
- CLScheduler::get().enqueue(_recurrent_to_input_reduction);
+ CLScheduler::get().enqueue(*_input_to_input_reduction);
+ CLScheduler::get().enqueue(*_recurrent_to_input_reduction);
_input_to_input_weights_transposed.allocator()->allocate();
_recurrent_to_input_weights_transposed.allocator()->allocate();
@@ -1097,17 +1148,17 @@ void CLQLSTMLayer::prepare()
_recurrent_to_cell_eff_bias.allocator()->allocate();
_input_to_output_eff_bias.allocator()->allocate();
_recurrent_to_output_eff_bias.allocator()->allocate();
- CLScheduler::get().enqueue(_input_to_forget_reduction);
- CLScheduler::get().enqueue(_recurrent_to_forget_reduction);
- CLScheduler::get().enqueue(_input_to_cell_reduction);
- CLScheduler::get().enqueue(_recurrent_to_cell_reduction);
- CLScheduler::get().enqueue(_input_to_output_reduction);
- CLScheduler::get().enqueue(_recurrent_to_output_reduction);
+ CLScheduler::get().enqueue(*_input_to_forget_reduction);
+ CLScheduler::get().enqueue(*_recurrent_to_forget_reduction);
+ CLScheduler::get().enqueue(*_input_to_cell_reduction);
+ CLScheduler::get().enqueue(*_recurrent_to_cell_reduction);
+ CLScheduler::get().enqueue(*_input_to_output_reduction);
+ CLScheduler::get().enqueue(*_recurrent_to_output_reduction);
if(_has_projection)
{
_projection_eff_bias.allocator()->allocate();
- CLScheduler::get().enqueue(_projection_reduction);
+ CLScheduler::get().enqueue(*_projection_reduction);
if(_projection_bias != nullptr)
{
_projection_bias_add.run();
diff --git a/src/runtime/CL/functions/CLQuantizationLayer.cpp b/src/runtime/CL/functions/CLQuantizationLayer.cpp
index f0a446acab..f132547eb9 100644
--- a/src/runtime/CL/functions/CLQuantizationLayer.cpp
+++ b/src/runtime/CL/functions/CLQuantizationLayer.cpp
@@ -23,7 +23,7 @@
*/
#include "arm_compute/runtime/CL/functions/CLQuantizationLayer.h"
-#include "arm_compute/core/CL/kernels/CLQuantizationLayerKernel.h"
+#include "src/core/CL/kernels/CLQuantizationLayerKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
diff --git a/src/runtime/CL/functions/CLRNNLayer.cpp b/src/runtime/CL/functions/CLRNNLayer.cpp
index 94e7f9440c..be3e539f98 100644
--- a/src/runtime/CL/functions/CLRNNLayer.cpp
+++ b/src/runtime/CL/functions/CLRNNLayer.cpp
@@ -28,17 +28,33 @@
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLCopyKernel.h"
+#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
using namespace arm_compute::misc::shape_calculator;
CLRNNLayer::CLRNNLayer(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _gemm_state_f(), _add_kernel(), _activation(), _fully_connected_kernel(), _copy_kernel(), _fully_connected_out(), _gemm_output(), _add_output(),
- _is_prepared(false)
+ : _memory_group(std::move(memory_manager)), _gemm_state_f(), _add_kernel(), _activation(), _fully_connected_kernel(), _copy_kernel(support::cpp14::make_unique<CLCopyKernel>()), _fully_connected_out(),
+ _gemm_output(), _add_output(), _is_prepared(false)
{
}
+CLRNNLayer::~CLRNNLayer() = default;
+
Status CLRNNLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *recurrent_weights, const ITensorInfo *bias, const ITensorInfo *hidden_state,
const ITensorInfo *output, const ActivationLayerInfo &info)
{
@@ -107,7 +123,7 @@ void CLRNNLayer::configure(const CLCompileContext &compile_context, const ICLTen
_activation.configure(compile_context, &_add_output, hidden_state, info);
_add_output.allocator()->allocate();
- _copy_kernel.configure(compile_context, hidden_state, output);
+ _copy_kernel->configure(compile_context, hidden_state, output);
}
void CLRNNLayer::run()
@@ -122,7 +138,7 @@ void CLRNNLayer::run()
_activation.run();
// copy hidden out to output
- CLScheduler::get().enqueue(_copy_kernel);
+ CLScheduler::get().enqueue(*_copy_kernel);
}
void CLRNNLayer::prepare()
diff --git a/src/runtime/CL/functions/CLROIAlignLayer.cpp b/src/runtime/CL/functions/CLROIAlignLayer.cpp
index 2337cee33f..cf28a1a0fb 100644
--- a/src/runtime/CL/functions/CLROIAlignLayer.cpp
+++ b/src/runtime/CL/functions/CLROIAlignLayer.cpp
@@ -24,7 +24,8 @@
#include "arm_compute/runtime/CL/functions/CLROIAlignLayer.h"
#include "arm_compute/core/CL/ICLArray.h"
-#include "arm_compute/core/CL/kernels/CLROIAlignLayerKernel.h"
+#include "src/core/CL/kernels/CLROIAlignLayerKernel.h"
+#include "src/core/CL/kernels/CLROIPoolingLayerKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
diff --git a/src/runtime/CL/functions/CLROIPoolingLayer.cpp b/src/runtime/CL/functions/CLROIPoolingLayer.cpp
index cdf60ce04f..b0e6716cce 100644
--- a/src/runtime/CL/functions/CLROIPoolingLayer.cpp
+++ b/src/runtime/CL/functions/CLROIPoolingLayer.cpp
@@ -22,10 +22,8 @@
* SOFTWARE.
*/
#include "arm_compute/runtime/CL/functions/CLROIPoolingLayer.h"
-
#include "arm_compute/core/CL/ICLArray.h"
-
-#include "arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h"
+#include "src/core/CL/kernels/CLROIPoolingLayerKernel.h"
#include "support/MemorySupport.h"
using namespace arm_compute;
diff --git a/src/runtime/CL/functions/CLRange.cpp b/src/runtime/CL/functions/CLRange.cpp
index 8bf2a0c43e..57b57bd305 100644
--- a/src/runtime/CL/functions/CLRange.cpp
+++ b/src/runtime/CL/functions/CLRange.cpp
@@ -24,10 +24,10 @@
#include "arm_compute/runtime/CL/functions/CLRange.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLRangeKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLRangeKernel.h"
#include "support/MemorySupport.h"
using namespace arm_compute;
diff --git a/src/runtime/CL/functions/CLReduceMean.cpp b/src/runtime/CL/functions/CLReduceMean.cpp
index 4ea7f7642f..b761dc2f99 100644
--- a/src/runtime/CL/functions/CLReduceMean.cpp
+++ b/src/runtime/CL/functions/CLReduceMean.cpp
@@ -24,11 +24,12 @@
#include "arm_compute/runtime/CL/functions/CLReduceMean.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLReductionOperationKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "src/core/CL/CLValidate.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLReductionOperationKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
namespace arm_compute
diff --git a/src/runtime/CL/functions/CLReductionOperation.cpp b/src/runtime/CL/functions/CLReductionOperation.cpp
index 208371c45d..7423f4bc87 100644
--- a/src/runtime/CL/functions/CLReductionOperation.cpp
+++ b/src/runtime/CL/functions/CLReductionOperation.cpp
@@ -30,9 +30,10 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLReductionOperationKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/runtime/Utils.h"
-
#include "support/MemorySupport.h"
namespace arm_compute
@@ -43,6 +44,8 @@ CLReductionOperation::CLReductionOperation(std::shared_ptr<IMemoryManager> memor
{
}
+CLReductionOperation::~CLReductionOperation() = default;
+
Status CLReductionOperation::validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op, bool keep_dims)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
@@ -211,7 +214,7 @@ void CLReductionOperation::configure(const CLCompileContext &compile_context, IC
}
// Configure reduction operation kernels
- _reduction_kernels_vector.resize(_num_of_stages);
+ _reduction_kernels_vector.reserve(_num_of_stages);
// Create temporary tensors
if(_is_serial)
@@ -221,11 +224,12 @@ void CLReductionOperation::configure(const CLCompileContext &compile_context, IC
_memory_group.manage(&_results_vector.back());
}
- _reduction_kernels_vector[0].configure(compile_context, input, output_internal, axis, op, 0);
+ _reduction_kernels_vector.emplace_back(support::cpp14::make_unique<CLReductionOperationKernel>());
+ _reduction_kernels_vector[0]->configure(compile_context, input, output_internal, axis, op, 0);
}
else
{
- _border_handlers_vector.resize(_num_of_stages);
+ _border_handlers_vector.reserve(_num_of_stages);
_memory_group.manage(&_results_vector[0]);
ReductionOperation first_kernel_op;
@@ -269,15 +273,23 @@ void CLReductionOperation::configure(const CLCompileContext &compile_context, IC
ARM_COMPUTE_ERROR("Not supported");
}
- _reduction_kernels_vector[0].configure(compile_context, input, &_results_vector[0], axis, first_kernel_op);
- _border_handlers_vector[0].configure(compile_context, input, _reduction_kernels_vector[0].border_size(), BorderMode::CONSTANT, pixelValue);
+ _reduction_kernels_vector.emplace_back(support::cpp14::make_unique<CLReductionOperationKernel>());
+ _reduction_kernels_vector[0]->configure(compile_context, input, &_results_vector[0], axis, first_kernel_op);
+
+ _border_handlers_vector.emplace_back(support::cpp14::make_unique<CLFillBorderKernel>());
+ _border_handlers_vector[0]->configure(compile_context, input, _reduction_kernels_vector[0]->border_size(), BorderMode::CONSTANT, pixelValue);
// Apply ReductionOperation on intermediate stages
for(unsigned int i = 1; i < _num_of_stages - 1; ++i)
{
_memory_group.manage(&_results_vector[i]);
- _reduction_kernels_vector[i].configure(compile_context, &_results_vector[i - 1], &_results_vector[i], axis, intermediate_kernel_op);
- _border_handlers_vector[i].configure(compile_context, &_results_vector[i - 1], _reduction_kernels_vector[i].border_size(), BorderMode::CONSTANT, pixelValue);
+
+ _reduction_kernels_vector.emplace_back(support::cpp14::make_unique<CLReductionOperationKernel>());
+ _reduction_kernels_vector[i]->configure(compile_context, &_results_vector[i - 1], &_results_vector[i], axis, intermediate_kernel_op);
+
+ _border_handlers_vector.emplace_back(support::cpp14::make_unique<CLFillBorderKernel>());
+ _border_handlers_vector[i]->configure(compile_context, &_results_vector[i - 1], _reduction_kernels_vector[i]->border_size(), BorderMode::CONSTANT, pixelValue);
+
_results_vector[i - 1].allocator()->allocate();
}
@@ -290,8 +302,12 @@ void CLReductionOperation::configure(const CLCompileContext &compile_context, IC
_memory_group.manage(&_results_vector.back());
}
- _reduction_kernels_vector[last_stage].configure(compile_context, &_results_vector[last_stage - 1], output_internal, axis, last_kernel_op, input_width);
- _border_handlers_vector[last_stage].configure(compile_context, &_results_vector[last_stage - 1], _reduction_kernels_vector[last_stage].border_size(), BorderMode::CONSTANT, pixelValue);
+ _reduction_kernels_vector.emplace_back(support::cpp14::make_unique<CLReductionOperationKernel>());
+ _reduction_kernels_vector[last_stage]->configure(compile_context, &_results_vector[last_stage - 1], output_internal, axis, last_kernel_op, input_width);
+
+ _border_handlers_vector.emplace_back(support::cpp14::make_unique<CLFillBorderKernel>());
+ _border_handlers_vector[last_stage]->configure(compile_context, &_results_vector[last_stage - 1], _reduction_kernels_vector[last_stage]->border_size(), BorderMode::CONSTANT, pixelValue);
+
_results_vector[last_stage - 1].allocator()->allocate();
}
@@ -308,14 +324,14 @@ void CLReductionOperation::run()
if(_is_serial)
{
- CLScheduler::get().enqueue(_reduction_kernels_vector[0], false);
+ CLScheduler::get().enqueue(*_reduction_kernels_vector[0], false);
}
else
{
for(unsigned int i = 0; i < _num_of_stages; ++i)
{
- CLScheduler::get().enqueue(_border_handlers_vector[i], false);
- CLScheduler::get().enqueue(_reduction_kernels_vector[i], false);
+ CLScheduler::get().enqueue(*_border_handlers_vector[i], false);
+ CLScheduler::get().enqueue(*_reduction_kernels_vector[i], false);
}
}
diff --git a/src/runtime/CL/functions/CLRemap.cpp b/src/runtime/CL/functions/CLRemap.cpp
index 1e3d614402..6466c2843b 100644
--- a/src/runtime/CL/functions/CLRemap.cpp
+++ b/src/runtime/CL/functions/CLRemap.cpp
@@ -24,11 +24,12 @@
#include "arm_compute/runtime/CL/functions/CLRemap.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLRemapKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLRemapKernel.h"
#include "support/MemorySupport.h"
#include <utility>
@@ -53,5 +54,5 @@ void CLRemap::configure(const CLCompileContext &compile_context, ICLTensor *inpu
auto k = arm_compute::support::cpp14::make_unique<CLRemapKernel>();
k->configure(compile_context, input, map_x, map_y, output, policy, border_mode == BorderMode::UNDEFINED);
_kernel = std::move(k);
- _border_handler.configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
}
diff --git a/src/runtime/CL/functions/CLReorgLayer.cpp b/src/runtime/CL/functions/CLReorgLayer.cpp
index 1dc41aefb5..4b2f70334f 100644
--- a/src/runtime/CL/functions/CLReorgLayer.cpp
+++ b/src/runtime/CL/functions/CLReorgLayer.cpp
@@ -24,10 +24,10 @@
#include "arm_compute/runtime/CL/functions/CLReorgLayer.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLReorgLayerKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/CL/kernels/CLReorgLayerKernel.h"
#include "support/MemorySupport.h"
#include <utility>
diff --git a/src/runtime/CL/functions/CLReshapeLayer.cpp b/src/runtime/CL/functions/CLReshapeLayer.cpp
index 273a761a0a..5112064b23 100644
--- a/src/runtime/CL/functions/CLReshapeLayer.cpp
+++ b/src/runtime/CL/functions/CLReshapeLayer.cpp
@@ -24,7 +24,7 @@
#include "arm_compute/runtime/CL/functions/CLReshapeLayer.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLReshapeLayerKernel.h"
+#include "src/core/CL/kernels/CLReshapeLayerKernel.h"
#include "support/MemorySupport.h"
/** [CLReshapeLayer snippet] **/
diff --git a/src/runtime/CL/functions/CLReverse.cpp b/src/runtime/CL/functions/CLReverse.cpp
index 213fbc8f32..b73d8de62e 100644
--- a/src/runtime/CL/functions/CLReverse.cpp
+++ b/src/runtime/CL/functions/CLReverse.cpp
@@ -23,8 +23,8 @@
*/
#include "arm_compute/runtime/CL/functions/CLReverse.h"
-#include "arm_compute/core/CL/kernels/CLReverseKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/kernels/CLReverseKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
diff --git a/src/runtime/CL/functions/CLScale.cpp b/src/runtime/CL/functions/CLScale.cpp
index e111c6d1f7..383b0cc305 100644
--- a/src/runtime/CL/functions/CLScale.cpp
+++ b/src/runtime/CL/functions/CLScale.cpp
@@ -24,10 +24,11 @@
#include "arm_compute/runtime/CL/functions/CLScale.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLScaleKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLScaleKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
@@ -60,7 +61,7 @@ void CLScale::configure(const CLCompileContext &compile_context, ICLTensor *inpu
{
border_mode_to_use = BorderMode::CONSTANT;
}
- _border_handler.configure(compile_context, input, _kernel->border_size(), border_mode_to_use, info.constant_border_value);
+ _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode_to_use, info.constant_border_value);
}
void CLScale::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value,
diff --git a/src/runtime/CL/functions/CLScharr3x3.cpp b/src/runtime/CL/functions/CLScharr3x3.cpp
index b121ee7b99..e5d0d2d630 100644
--- a/src/runtime/CL/functions/CLScharr3x3.cpp
+++ b/src/runtime/CL/functions/CLScharr3x3.cpp
@@ -23,8 +23,9 @@
*/
#include "arm_compute/runtime/CL/functions/CLScharr3x3.h"
-#include "arm_compute/core/CL/kernels/CLScharr3x3Kernel.h"
#include "arm_compute/core/PixelValue.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLScharr3x3Kernel.h"
#include "support/MemorySupport.h"
#include <utility>
@@ -41,5 +42,5 @@ void CLScharr3x3::configure(const CLCompileContext &compile_context, ICLTensor *
auto k = arm_compute::support::cpp14::make_unique<CLScharr3x3Kernel>();
k->configure(compile_context, input, output_x, output_y, border_mode == BorderMode::UNDEFINED);
_kernel = std::move(k);
- _border_handler.configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
}
diff --git a/src/runtime/CL/functions/CLSelect.cpp b/src/runtime/CL/functions/CLSelect.cpp
index ef8010847b..374da91b78 100644
--- a/src/runtime/CL/functions/CLSelect.cpp
+++ b/src/runtime/CL/functions/CLSelect.cpp
@@ -23,9 +23,9 @@
*/
#include "arm_compute/runtime/CL/functions/CLSelect.h"
-#include "arm_compute/core/CL/kernels/CLSelectKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLSelectKernel.h"
#include "support/MemorySupport.h"
diff --git a/src/runtime/CL/functions/CLSlice.cpp b/src/runtime/CL/functions/CLSlice.cpp
index f36550ba91..940540563a 100644
--- a/src/runtime/CL/functions/CLSlice.cpp
+++ b/src/runtime/CL/functions/CLSlice.cpp
@@ -24,9 +24,9 @@
#include "arm_compute/runtime/CL/functions/CLSlice.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLStridedSliceKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/helpers/tensor_transform.h"
+#include "src/core/CL/kernels/CLStridedSliceKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
diff --git a/src/runtime/CL/functions/CLSobel3x3.cpp b/src/runtime/CL/functions/CLSobel3x3.cpp
index 566a4a1534..78376f935a 100644
--- a/src/runtime/CL/functions/CLSobel3x3.cpp
+++ b/src/runtime/CL/functions/CLSobel3x3.cpp
@@ -23,14 +23,17 @@
*/
#include "arm_compute/runtime/CL/functions/CLSobel3x3.h"
-#include "arm_compute/core/CL/kernels/CLSobel3x3Kernel.h"
#include "arm_compute/core/PixelValue.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLSobel3x3Kernel.h"
#include "support/MemorySupport.h"
#include <utility>
using namespace arm_compute;
+CLSobel3x3::~CLSobel3x3() = default;
+
void CLSobel3x3::configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value)
{
configure(CLKernelLibrary::get().get_compile_context(), input, output_x, output_y, border_mode, constant_border_value);
@@ -41,5 +44,5 @@ void CLSobel3x3::configure(const CLCompileContext &compile_context, ICLTensor *i
auto k = arm_compute::support::cpp14::make_unique<CLSobel3x3Kernel>();
k->configure(compile_context, input, output_x, output_y, border_mode == BorderMode::UNDEFINED);
_kernel = std::move(k);
- _border_handler.configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
}
diff --git a/src/runtime/CL/functions/CLSobel5x5.cpp b/src/runtime/CL/functions/CLSobel5x5.cpp
index f70e4f36f5..fa5d8945fb 100644
--- a/src/runtime/CL/functions/CLSobel5x5.cpp
+++ b/src/runtime/CL/functions/CLSobel5x5.cpp
@@ -24,20 +24,29 @@
#include "arm_compute/runtime/CL/functions/CLSobel5x5.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLSobel5x5Kernel.h"
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/ITensorAllocator.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLSobel5x5Kernel.h"
+#include "support/MemorySupport.h"
using namespace arm_compute;
CLSobel5x5::CLSobel5x5(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _sobel_hor(), _sobel_vert(), _border_handler(), _tmp_x(), _tmp_y()
+ : _memory_group(std::move(memory_manager)),
+ _sobel_hor(support::cpp14::make_unique<CLSobel5x5HorKernel>()),
+ _sobel_vert(support::cpp14::make_unique<CLSobel5x5VertKernel>()),
+ _border_handler(support::cpp14::make_unique<CLFillBorderKernel>()),
+ _tmp_x(),
+ _tmp_y()
{
}
+CLSobel5x5::~CLSobel5x5() = default;
+
void CLSobel5x5::configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value)
{
configure(CLKernelLibrary::get().get_compile_context(), input, output_x, output_y, border_mode, constant_border_value);
@@ -58,8 +67,8 @@ void CLSobel5x5::configure(const CLCompileContext &compile_context, ICLTensor *i
_tmp_y.allocator()->init(tensor_info);
_memory_group.manage(&_tmp_x);
_memory_group.manage(&_tmp_y);
- _sobel_hor.configure(compile_context, input, &_tmp_x, &_tmp_y, border_mode == BorderMode::UNDEFINED);
- _sobel_vert.configure(compile_context, &_tmp_x, &_tmp_y, output_x, output_y, border_mode == BorderMode::UNDEFINED);
+ _sobel_hor->configure(compile_context, input, &_tmp_x, &_tmp_y, border_mode == BorderMode::UNDEFINED);
+ _sobel_vert->configure(compile_context, &_tmp_x, &_tmp_y, output_x, output_y, border_mode == BorderMode::UNDEFINED);
_tmp_x.allocator()->allocate();
_tmp_y.allocator()->allocate();
}
@@ -67,27 +76,27 @@ void CLSobel5x5::configure(const CLCompileContext &compile_context, ICLTensor *i
{
_tmp_x.allocator()->init(tensor_info);
_memory_group.manage(&_tmp_x);
- _sobel_hor.configure(compile_context, input, &_tmp_x, nullptr, border_mode == BorderMode::UNDEFINED);
- _sobel_vert.configure(compile_context, &_tmp_x, nullptr, output_x, nullptr, border_mode == BorderMode::UNDEFINED);
+ _sobel_hor->configure(compile_context, input, &_tmp_x, nullptr, border_mode == BorderMode::UNDEFINED);
+ _sobel_vert->configure(compile_context, &_tmp_x, nullptr, output_x, nullptr, border_mode == BorderMode::UNDEFINED);
_tmp_x.allocator()->allocate();
}
else if(run_sobel_y)
{
_tmp_y.allocator()->init(tensor_info);
_memory_group.manage(&_tmp_y);
- _sobel_hor.configure(compile_context, input, nullptr, &_tmp_y, border_mode == BorderMode::UNDEFINED);
- _sobel_vert.configure(compile_context, nullptr, &_tmp_y, nullptr, output_y, border_mode == BorderMode::UNDEFINED);
+ _sobel_hor->configure(compile_context, input, nullptr, &_tmp_y, border_mode == BorderMode::UNDEFINED);
+ _sobel_vert->configure(compile_context, nullptr, &_tmp_y, nullptr, output_y, border_mode == BorderMode::UNDEFINED);
_tmp_y.allocator()->allocate();
}
- _border_handler.configure(compile_context, input, _sobel_hor.border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler->configure(compile_context, input, _sobel_hor->border_size(), border_mode, PixelValue(constant_border_value));
}
void CLSobel5x5::run()
{
- CLScheduler::get().enqueue(_border_handler, false);
+ CLScheduler::get().enqueue(*_border_handler, false);
MemoryGroupResourceScope scope_mg(_memory_group);
- CLScheduler::get().enqueue(_sobel_hor, false);
- CLScheduler::get().enqueue(_sobel_vert);
+ CLScheduler::get().enqueue(*_sobel_hor, false);
+ CLScheduler::get().enqueue(*_sobel_vert);
}
diff --git a/src/runtime/CL/functions/CLSobel7x7.cpp b/src/runtime/CL/functions/CLSobel7x7.cpp
index 792432e841..f462adb0ed 100644
--- a/src/runtime/CL/functions/CLSobel7x7.cpp
+++ b/src/runtime/CL/functions/CLSobel7x7.cpp
@@ -24,20 +24,29 @@
#include "arm_compute/runtime/CL/functions/CLSobel7x7.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLSobel7x7Kernel.h"
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/ITensorAllocator.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLSobel7x7Kernel.h"
+#include "support/MemorySupport.h"
using namespace arm_compute;
CLSobel7x7::CLSobel7x7(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _sobel_hor(), _sobel_vert(), _border_handler(), _tmp_x(), _tmp_y()
+ : _memory_group(std::move(memory_manager)),
+ _sobel_hor(support::cpp14::make_unique<CLSobel7x7HorKernel>()),
+ _sobel_vert(support::cpp14::make_unique<CLSobel7x7VertKernel>()),
+ _border_handler(support::cpp14::make_unique<CLFillBorderKernel>()),
+ _tmp_x(),
+ _tmp_y()
{
}
+CLSobel7x7::~CLSobel7x7() = default;
+
void CLSobel7x7::configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value)
{
configure(CLKernelLibrary::get().get_compile_context(), input, output_x, output_y, border_mode, constant_border_value);
@@ -58,8 +67,8 @@ void CLSobel7x7::configure(const CLCompileContext &compile_context, ICLTensor *i
_tmp_y.allocator()->init(tensor_info);
_memory_group.manage(&_tmp_x);
_memory_group.manage(&_tmp_y);
- _sobel_hor.configure(compile_context, input, &_tmp_x, &_tmp_y, border_mode == BorderMode::UNDEFINED);
- _sobel_vert.configure(compile_context, &_tmp_x, &_tmp_y, output_x, output_y, border_mode == BorderMode::UNDEFINED);
+ _sobel_hor->configure(compile_context, input, &_tmp_x, &_tmp_y, border_mode == BorderMode::UNDEFINED);
+ _sobel_vert->configure(compile_context, &_tmp_x, &_tmp_y, output_x, output_y, border_mode == BorderMode::UNDEFINED);
_tmp_x.allocator()->allocate();
_tmp_y.allocator()->allocate();
}
@@ -67,27 +76,27 @@ void CLSobel7x7::configure(const CLCompileContext &compile_context, ICLTensor *i
{
_tmp_x.allocator()->init(tensor_info);
_memory_group.manage(&_tmp_x);
- _sobel_hor.configure(compile_context, input, &_tmp_x, nullptr, border_mode == BorderMode::UNDEFINED);
- _sobel_vert.configure(compile_context, &_tmp_x, nullptr, output_x, nullptr, border_mode == BorderMode::UNDEFINED);
+ _sobel_hor->configure(compile_context, input, &_tmp_x, nullptr, border_mode == BorderMode::UNDEFINED);
+ _sobel_vert->configure(compile_context, &_tmp_x, nullptr, output_x, nullptr, border_mode == BorderMode::UNDEFINED);
_tmp_x.allocator()->allocate();
}
else if(run_sobel_y)
{
_tmp_y.allocator()->init(tensor_info);
_memory_group.manage(&_tmp_y);
- _sobel_hor.configure(compile_context, input, nullptr, &_tmp_y, border_mode == BorderMode::UNDEFINED);
- _sobel_vert.configure(compile_context, nullptr, &_tmp_y, nullptr, output_y, border_mode == BorderMode::UNDEFINED);
+ _sobel_hor->configure(compile_context, input, nullptr, &_tmp_y, border_mode == BorderMode::UNDEFINED);
+ _sobel_vert->configure(compile_context, nullptr, &_tmp_y, nullptr, output_y, border_mode == BorderMode::UNDEFINED);
_tmp_y.allocator()->allocate();
}
- _border_handler.configure(compile_context, input, _sobel_hor.border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler->configure(compile_context, input, _sobel_hor->border_size(), border_mode, PixelValue(constant_border_value));
}
void CLSobel7x7::run()
{
- CLScheduler::get().enqueue(_border_handler, false);
+ CLScheduler::get().enqueue(*_border_handler, false);
MemoryGroupResourceScope scope_mg(_memory_group);
- CLScheduler::get().enqueue(_sobel_hor, false);
- CLScheduler::get().enqueue(_sobel_vert);
+ CLScheduler::get().enqueue(*_sobel_hor, false);
+ CLScheduler::get().enqueue(*_sobel_vert);
}
diff --git a/src/runtime/CL/functions/CLSoftmaxLayer.cpp b/src/runtime/CL/functions/CLSoftmaxLayer.cpp
index 759c8706a1..4caf91488e 100644
--- a/src/runtime/CL/functions/CLSoftmaxLayer.cpp
+++ b/src/runtime/CL/functions/CLSoftmaxLayer.cpp
@@ -24,25 +24,39 @@
#include "arm_compute/runtime/CL/functions/CLSoftmaxLayer.h"
#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/ICLKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLSoftmaxLayerKernel.h"
#include "src/core/helpers/SoftmaxHelpers.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
template <bool IS_LOG>
CLSoftmaxLayerGeneric<IS_LOG>::CLSoftmaxLayerGeneric(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _permute_input(), _permute_output(), _max_shift_exp_sum_kernel(), _norm_kernel(), _max(), _sum(), _tmp(), _input_permuted(), _output_permuted(),
+ : _memory_group(std::move(memory_manager)),
+ _permute_input(),
+ _permute_output(),
+ _max_shift_exp_sum_kernel(support::cpp14::make_unique<CLLogits1DMaxShiftExpSumKernel>()),
+ _norm_kernel(support::cpp14::make_unique<CLLogits1DNormKernel>()),
+ _max(),
+ _sum(),
+ _tmp(),
+ _input_permuted(),
+ _output_permuted(),
_needs_permute()
{
}
template <bool IS_LOG>
+CLSoftmaxLayerGeneric<IS_LOG>::~CLSoftmaxLayerGeneric() = default;
+
+template <bool IS_LOG>
void CLSoftmaxLayerGeneric<IS_LOG>::configure(const ICLTensor *input, ICLTensor *output, float beta, int32_t axis)
{
configure(CLKernelLibrary::get().get_compile_context(), input, output, beta, axis);
@@ -78,7 +92,7 @@ void CLSoftmaxLayerGeneric<IS_LOG>::configure(const CLCompileContext &compile_co
_sum.allocator()->init(tmp_input->info()->clone()->set_tensor_shape(max_sum_shape).set_data_type(tmp_data_type));
// Set GPU target to kernels
- _max_shift_exp_sum_kernel.set_target(CLScheduler::get().target());
+ _max_shift_exp_sum_kernel->set_target(CLScheduler::get().target());
// Manage intermediate buffers
_memory_group.manage(&_tmp);
@@ -91,8 +105,8 @@ void CLSoftmaxLayerGeneric<IS_LOG>::configure(const CLCompileContext &compile_co
softmax_info.input_data_type = tmp_input->info()->data_type();
// Configure kernels
- _max_shift_exp_sum_kernel.configure(compile_context, tmp_input, &_max, &_tmp, &_sum, softmax_info);
- _norm_kernel.configure(compile_context, &_tmp, &_sum, tmp_output, softmax_info);
+ _max_shift_exp_sum_kernel->configure(compile_context, tmp_input, &_max, &_tmp, &_sum, softmax_info);
+ _norm_kernel->configure(compile_context, &_tmp, &_sum, tmp_output, softmax_info);
// Allocate intermediate buffers
_tmp.allocator()->allocate();
@@ -156,8 +170,8 @@ void CLSoftmaxLayerGeneric<IS_LOG>::run()
_permute_input.run();
}
- CLScheduler::get().enqueue(_max_shift_exp_sum_kernel, false);
- CLScheduler::get().enqueue(_norm_kernel, !_needs_permute);
+ CLScheduler::get().enqueue(*_max_shift_exp_sum_kernel, false);
+ CLScheduler::get().enqueue(*_norm_kernel, !_needs_permute);
if(_needs_permute)
{
diff --git a/src/runtime/CL/functions/CLSpaceToBatchLayer.cpp b/src/runtime/CL/functions/CLSpaceToBatchLayer.cpp
index eea3cb535f..e83def5677 100644
--- a/src/runtime/CL/functions/CLSpaceToBatchLayer.cpp
+++ b/src/runtime/CL/functions/CLSpaceToBatchLayer.cpp
@@ -29,14 +29,21 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLMemsetKernel.h"
+#include "src/core/CL/kernels/CLSpaceToBatchLayerKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
CLSpaceToBatchLayer::CLSpaceToBatchLayer()
- : _space_to_batch_kernel(), _memset_kernel(), _has_padding(false)
+ : _space_to_batch_kernel(support::cpp14::make_unique<CLSpaceToBatchLayerKernel>()),
+ _memset_kernel(support::cpp14::make_unique<CLMemsetKernel>()),
+ _has_padding(false)
{
}
+CLSpaceToBatchLayer::~CLSpaceToBatchLayer() = default;
+
void CLSpaceToBatchLayer::configure(const ICLTensor *input, const ICLTensor *block_shape, const ICLTensor *paddings, ICLTensor *output)
{
configure(CLKernelLibrary::get().get_compile_context(), input, block_shape, paddings, output);
@@ -49,9 +56,9 @@ void CLSpaceToBatchLayer::configure(const CLCompileContext &compile_context, con
if(input->info()->tensor_shape().total_size() != output->info()->tensor_shape().total_size())
{
_has_padding = true;
- _memset_kernel.configure(compile_context, output, PixelValue(0, input->info()->data_type(), input->info()->quantization_info()));
+ _memset_kernel->configure(compile_context, output, PixelValue(0, input->info()->data_type(), input->info()->quantization_info()));
}
- _space_to_batch_kernel.configure(compile_context, input, block_shape, paddings, output);
+ _space_to_batch_kernel->configure(compile_context, input, block_shape, paddings, output);
}
void CLSpaceToBatchLayer::configure(const ICLTensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, ICLTensor *output)
@@ -67,9 +74,9 @@ void CLSpaceToBatchLayer::configure(const CLCompileContext &compile_context, con
if(input->info()->tensor_shape().total_size() != output->info()->tensor_shape().total_size())
{
_has_padding = true;
- _memset_kernel.configure(compile_context, output, PixelValue(0, input->info()->data_type(), input->info()->quantization_info()));
+ _memset_kernel->configure(compile_context, output, PixelValue(0, input->info()->data_type(), input->info()->quantization_info()));
}
- _space_to_batch_kernel.configure(compile_context, input, block_shape_x, block_shape_y, padding_left, padding_right, output);
+ _space_to_batch_kernel->configure(compile_context, input, block_shape_x, block_shape_y, padding_left, padding_right, output);
}
Status CLSpaceToBatchLayer::validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *paddings, const ITensorInfo *output)
@@ -94,8 +101,8 @@ void CLSpaceToBatchLayer::run()
// Zero out output only if we have paddings
if(_has_padding)
{
- CLScheduler::get().enqueue(_memset_kernel, true);
+ CLScheduler::get().enqueue(*_memset_kernel, true);
}
- CLScheduler::get().enqueue(_space_to_batch_kernel, true);
+ CLScheduler::get().enqueue(*_space_to_batch_kernel, true);
}
} // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLSpaceToDepthLayer.cpp b/src/runtime/CL/functions/CLSpaceToDepthLayer.cpp
index 06aa92d6fa..db8c4953cc 100644
--- a/src/runtime/CL/functions/CLSpaceToDepthLayer.cpp
+++ b/src/runtime/CL/functions/CLSpaceToDepthLayer.cpp
@@ -29,14 +29,18 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLSpaceToDepthLayerKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
CLSpaceToDepthLayer::CLSpaceToDepthLayer()
- : _space_to_depth_kernel()
+ : _space_to_depth_kernel(support::cpp14::make_unique<CLSpaceToDepthLayerKernel>())
{
}
+CLSpaceToDepthLayer::~CLSpaceToDepthLayer() = default;
+
void CLSpaceToDepthLayer::configure(const ICLTensor *input, ICLTensor *output, int32_t block_shape)
{
configure(CLKernelLibrary::get().get_compile_context(), input, output, block_shape);
@@ -44,7 +48,7 @@ void CLSpaceToDepthLayer::configure(const ICLTensor *input, ICLTensor *output, i
void CLSpaceToDepthLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t block_shape)
{
- _space_to_depth_kernel.configure(compile_context, input, output, block_shape);
+ _space_to_depth_kernel->configure(compile_context, input, output, block_shape);
}
Status CLSpaceToDepthLayer::validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape)
@@ -54,6 +58,6 @@ Status CLSpaceToDepthLayer::validate(const ITensorInfo *input, const ITensorInfo
void CLSpaceToDepthLayer::run()
{
- CLScheduler::get().enqueue(_space_to_depth_kernel, true);
+ CLScheduler::get().enqueue(*_space_to_depth_kernel, true);
}
} // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLStackLayer.cpp b/src/runtime/CL/functions/CLStackLayer.cpp
index 39f0ab4779..f4aa78a72d 100644
--- a/src/runtime/CL/functions/CLStackLayer.cpp
+++ b/src/runtime/CL/functions/CLStackLayer.cpp
@@ -32,6 +32,8 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLStackLayerKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
@@ -42,6 +44,8 @@ CLStackLayer::CLStackLayer() // NOLINT
{
}
+CLStackLayer::~CLStackLayer() = default;
+
void CLStackLayer::configure(const std::vector<ICLTensor *> &input, int axis, ICLTensor *output)
{
configure(CLKernelLibrary::get().get_compile_context(), input, axis, output);
@@ -50,14 +54,15 @@ void CLStackLayer::configure(const std::vector<ICLTensor *> &input, int axis, IC
void CLStackLayer::configure(const CLCompileContext &compile_context, const std::vector<ICLTensor *> &input, int axis, ICLTensor *output)
{
_num_inputs = input.size();
- _stack_kernels.resize(_num_inputs);
+ _stack_kernels.reserve(_num_inputs);
// Wrap around negative values
const unsigned int axis_u = wrap_around(axis, static_cast<int>(input[0]->info()->num_dimensions() + 1));
for(unsigned int i = 0; i < _num_inputs; i++)
{
- _stack_kernels[i].configure(compile_context, input[i], axis_u, i, _num_inputs, output);
+ _stack_kernels.emplace_back(support::cpp14::make_unique<CLStackLayerKernel>());
+ _stack_kernels.back()->configure(compile_context, input[i], axis_u, i, _num_inputs, output);
}
}
@@ -87,7 +92,7 @@ void CLStackLayer::run()
{
for(unsigned i = 0; i < _num_inputs; i++)
{
- CLScheduler::get().enqueue(_stack_kernels[i], false);
+ CLScheduler::get().enqueue(*_stack_kernels[i], false);
}
}
} // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLStridedSlice.cpp b/src/runtime/CL/functions/CLStridedSlice.cpp
index b78073dd67..3f6814f5ce 100644
--- a/src/runtime/CL/functions/CLStridedSlice.cpp
+++ b/src/runtime/CL/functions/CLStridedSlice.cpp
@@ -24,8 +24,8 @@
#include "arm_compute/runtime/CL/functions/CLStridedSlice.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLStridedSliceKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/kernels/CLStridedSliceKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
diff --git a/src/runtime/CL/functions/CLTableLookup.cpp b/src/runtime/CL/functions/CLTableLookup.cpp
index 3d2d1853ca..8282f37e4b 100644
--- a/src/runtime/CL/functions/CLTableLookup.cpp
+++ b/src/runtime/CL/functions/CLTableLookup.cpp
@@ -23,7 +23,7 @@
*/
#include "arm_compute/runtime/CL/functions/CLTableLookup.h"
-#include "arm_compute/core/CL/kernels/CLTableLookupKernel.h"
+#include "src/core/CL/kernels/CLTableLookupKernel.h"
#include "support/MemorySupport.h"
#include <utility>
diff --git a/src/runtime/CL/functions/CLThreshold.cpp b/src/runtime/CL/functions/CLThreshold.cpp
index bdbf37e841..250f6f034f 100644
--- a/src/runtime/CL/functions/CLThreshold.cpp
+++ b/src/runtime/CL/functions/CLThreshold.cpp
@@ -23,7 +23,7 @@
*/
#include "arm_compute/runtime/CL/functions/CLThreshold.h"
-#include "arm_compute/core/CL/kernels/CLThresholdKernel.h"
+#include "src/core/CL/kernels/CLThresholdKernel.h"
#include "support/MemorySupport.h"
#include <utility>
diff --git a/src/runtime/CL/functions/CLTile.cpp b/src/runtime/CL/functions/CLTile.cpp
index 68efad0125..8384e48baf 100644
--- a/src/runtime/CL/functions/CLTile.cpp
+++ b/src/runtime/CL/functions/CLTile.cpp
@@ -23,7 +23,7 @@
*/
#include "arm_compute/runtime/CL/functions/CLTile.h"
-#include "arm_compute/core/CL/kernels/CLTileKernel.h"
+#include "src/core/CL/kernels/CLTileKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
diff --git a/src/runtime/CL/functions/CLTranspose.cpp b/src/runtime/CL/functions/CLTranspose.cpp
index 8cade66a90..43fa7a012a 100644
--- a/src/runtime/CL/functions/CLTranspose.cpp
+++ b/src/runtime/CL/functions/CLTranspose.cpp
@@ -23,7 +23,7 @@
*/
#include "arm_compute/runtime/CL/functions/CLTranspose.h"
-#include "arm_compute/core/CL/kernels/CLTransposeKernel.h"
+#include "src/core/CL/kernels/CLTransposeKernel.h"
#include "support/MemorySupport.h"
#include <utility>
diff --git a/src/runtime/CL/functions/CLUpsampleLayer.cpp b/src/runtime/CL/functions/CLUpsampleLayer.cpp
index e9456c100b..10b4b76a5e 100644
--- a/src/runtime/CL/functions/CLUpsampleLayer.cpp
+++ b/src/runtime/CL/functions/CLUpsampleLayer.cpp
@@ -26,15 +26,19 @@
#include "arm_compute/core/CL/OpenCL.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLUpsampleLayerKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
CLUpsampleLayer::CLUpsampleLayer() // NOLINT
- : _upsample(),
+ : _upsample(support::cpp14::make_unique<CLUpsampleLayerKernel>()),
_output(nullptr)
{
}
+CLUpsampleLayer::~CLUpsampleLayer() = default;
+
Status CLUpsampleLayer::validate(const ITensorInfo *input, const ITensorInfo *output,
const Size2D &info, const InterpolationPolicy upsampling_policy)
{
@@ -53,11 +57,11 @@ void CLUpsampleLayer::configure(const CLCompileContext &compile_context, ICLTens
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
_output = output;
- _upsample.configure(compile_context, input, _output, info, upsampling_policy);
+ _upsample->configure(compile_context, input, _output, info, upsampling_policy);
}
void CLUpsampleLayer::run()
{
- CLScheduler::get().enqueue(_upsample, false);
+ CLScheduler::get().enqueue(*_upsample, false);
}
} // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLWarpAffine.cpp b/src/runtime/CL/functions/CLWarpAffine.cpp
index fffc58c8d0..86e5a7bd86 100644
--- a/src/runtime/CL/functions/CLWarpAffine.cpp
+++ b/src/runtime/CL/functions/CLWarpAffine.cpp
@@ -23,8 +23,9 @@
*/
#include "arm_compute/runtime/CL/functions/CLWarpAffine.h"
-#include "arm_compute/core/CL/kernels/CLWarpAffineKernel.h"
#include "arm_compute/core/PixelValue.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLWarpAffineKernel.h"
#include "support/MemorySupport.h"
#include <utility>
@@ -42,5 +43,5 @@ void CLWarpAffine::configure(const CLCompileContext &compile_context, ICLTensor
auto k = arm_compute::support::cpp14::make_unique<CLWarpAffineKernel>();
k->configure(compile_context, input, output, matrix, policy);
_kernel = std::move(k);
- _border_handler.configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
}
diff --git a/src/runtime/CL/functions/CLWarpPerspective.cpp b/src/runtime/CL/functions/CLWarpPerspective.cpp
index 2b4b187e38..7e8bc5cdff 100644
--- a/src/runtime/CL/functions/CLWarpPerspective.cpp
+++ b/src/runtime/CL/functions/CLWarpPerspective.cpp
@@ -23,8 +23,9 @@
*/
#include "arm_compute/runtime/CL/functions/CLWarpPerspective.h"
-#include "arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h"
#include "arm_compute/core/PixelValue.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLWarpPerspectiveKernel.h"
#include "support/MemorySupport.h"
#include <utility>
@@ -42,5 +43,5 @@ void CLWarpPerspective::configure(const CLCompileContext &compile_context, ICLTe
auto k = arm_compute::support::cpp14::make_unique<CLWarpPerspectiveKernel>();
k->configure(compile_context, input, output, matrix, policy);
_kernel = std::move(k);
- _border_handler.configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
}
diff --git a/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp b/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp
index 7ad017f918..7af42904e8 100644
--- a/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp
@@ -28,6 +28,15 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLWinogradFilterTransformKernel.h"
+#include "src/core/CL/kernels/CLWinogradOutputTransformKernel.h"
+#include "support/MemorySupport.h"
using namespace arm_compute;
@@ -90,11 +99,13 @@ bool check_support_fast_math(const Size2D &output_tile, const Size2D &kernel_siz
} // namespace
CLWinogradConvolutionLayer::CLWinogradConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(memory_manager), _batched_mm(memory_manager), _input_transform(), _filter_transform(), _output_transform(), _input0(), _input1(), _batched_mm_output(), _original_weights(nullptr),
- _is_prepared(false)
+ : _memory_group(memory_manager), _batched_mm(memory_manager), _input_transform(), _filter_transform(support::cpp14::make_unique<CLWinogradFilterTransformKernel>()),
+ _output_transform(support::cpp14::make_unique<CLWinogradOutputTransformKernel>()), _input0(), _input1(), _batched_mm_output(), _original_weights(nullptr), _is_prepared(false)
{
}
+CLWinogradConvolutionLayer::~CLWinogradConvolutionLayer() = default;
+
void CLWinogradConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info,
bool enable_fast_math)
{
@@ -139,7 +150,7 @@ void CLWinogradConvolutionLayer::configure(const CLCompileContext &compile_conte
_input_transform.configure(compile_context, input, &_input0, winograd_info);
// Configure filter transform
- _filter_transform.configure(compile_context, weights, &_input1, winograd_info);
+ _filter_transform->configure(compile_context, weights, &_input1, winograd_info);
// Configure batched matrix multiply
_batched_mm.configure(compile_context, &_input0, &_input1, nullptr, &_batched_mm_output, 1.0f, 0.0f, GEMMInfo(false, false, true /* Reshape weights only for the first run*/, 0, false, false,
@@ -147,7 +158,7 @@ void CLWinogradConvolutionLayer::configure(const CLCompileContext &compile_conte
(input->info()->data_type() == DataType::F16)));
// Configure output transform
- _output_transform.configure(compile_context, &_batched_mm_output, biases, output, winograd_info, act_info);
+ _output_transform->configure(compile_context, &_batched_mm_output, biases, output, winograd_info, act_info);
// Allocate temporary tensors
_input0.allocator()->allocate();
@@ -218,7 +229,7 @@ void CLWinogradConvolutionLayer::run()
_batched_mm.run();
// Run output transform
- CLScheduler::get().enqueue(_output_transform);
+ CLScheduler::get().enqueue(*_output_transform);
}
void CLWinogradConvolutionLayer::prepare()
@@ -227,7 +238,7 @@ void CLWinogradConvolutionLayer::prepare()
{
// Run filter transform and mark original weights as unused
_input1.allocator()->allocate();
- CLScheduler::get().enqueue(_filter_transform, false);
+ CLScheduler::get().enqueue(*_filter_transform, false);
_original_weights->mark_as_unused();
// Prepare GEMM and release reshaped weights if marked unused by CLGEMM
diff --git a/src/runtime/CL/functions/CLWinogradInputTransform.cpp b/src/runtime/CL/functions/CLWinogradInputTransform.cpp
index 9498206549..308c41f714 100644
--- a/src/runtime/CL/functions/CLWinogradInputTransform.cpp
+++ b/src/runtime/CL/functions/CLWinogradInputTransform.cpp
@@ -24,8 +24,9 @@
#include "arm_compute/runtime/CL/functions/CLWinogradInputTransform.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h"
#include "arm_compute/core/Error.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLWinogradInputTransformKernel.h"
#include "support/MemorySupport.h"
using namespace arm_compute;
@@ -40,7 +41,7 @@ void CLWinogradInputTransform::configure(const CLCompileContext &compile_context
auto k = arm_compute::support::cpp14::make_unique<CLWinogradInputTransformKernel>();
k->configure(compile_context, input, output, winograd_info);
_kernel = std::move(k);
- _border_handler.configure(compile_context, input, _kernel->border_size(), BorderMode::CONSTANT, PixelValue());
+ _border_handler->configure(compile_context, input, _kernel->border_size(), BorderMode::CONSTANT, PixelValue());
}
Status CLWinogradInputTransform::validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info)
diff --git a/src/runtime/CL/functions/CLYOLOLayer.cpp b/src/runtime/CL/functions/CLYOLOLayer.cpp
index d553f97009..46bf220b0c 100644
--- a/src/runtime/CL/functions/CLYOLOLayer.cpp
+++ b/src/runtime/CL/functions/CLYOLOLayer.cpp
@@ -23,8 +23,8 @@
*/
#include "arm_compute/runtime/CL/functions/CLYOLOLayer.h"
-#include "arm_compute/core/CL/kernels/CLYOLOLayerKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/kernels/CLYOLOLayerKernel.h"
#include "support/MemorySupport.h"
using namespace arm_compute;
diff --git a/src/runtime/CL/tuners/BifrostTuner.cpp b/src/runtime/CL/tuners/BifrostTuner.cpp
index a6474c9835..9490e0b219 100644
--- a/src/runtime/CL/tuners/BifrostTuner.cpp
+++ b/src/runtime/CL/tuners/BifrostTuner.cpp
@@ -24,7 +24,7 @@
#include "arm_compute/runtime/CL/tuners/BifrostTuner.h"
#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernels.h"
+#include "src/core/CL/CLKernels.h"
#include "support/Cast.h"
namespace arm_compute
diff --git a/src/runtime/CL/tuners/MidgardTuner.cpp b/src/runtime/CL/tuners/MidgardTuner.cpp
index 58b0d579d2..72734f2207 100644
--- a/src/runtime/CL/tuners/MidgardTuner.cpp
+++ b/src/runtime/CL/tuners/MidgardTuner.cpp
@@ -24,7 +24,7 @@
#include "arm_compute/runtime/CL/tuners/MidgardTuner.h"
#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernels.h"
+#include "src/core/CL/CLKernels.h"
#include "support/Cast.h"
namespace arm_compute
diff --git a/tests/CL/Helper.h b/tests/CL/Helper.h
index e0d584c5ce..e548af4938 100644
--- a/tests/CL/Helper.h
+++ b/tests/CL/Helper.h
@@ -24,13 +24,15 @@
#ifndef ARM_COMPUTE_TEST_CL_HELPER_H
#define ARM_COMPUTE_TEST_CL_HELPER_H
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
-#include "arm_compute/core/CL/kernels/CLMemsetKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLMemsetKernel.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
#include "arm_compute/runtime/IFunction.h"
+
+#include "src/core/CL/ICLKernel.h"
+
#include "support/MemorySupport.h"
namespace arm_compute
@@ -93,7 +95,7 @@ public:
auto k = arm_compute::support::cpp14::make_unique<K>();
k->configure(first, std::forward<Args>(args)...);
_kernel = std::move(k);
- _border_handler.configure(first, BorderSize(bordersize), BorderMode::CONSTANT, PixelValue());
+ _border_handler->configure(first, BorderSize(bordersize), BorderMode::CONSTANT, PixelValue());
}
};
diff --git a/tests/benchmark/CL/Scale.cpp b/tests/benchmark/CL/Scale.cpp
index 58727edcae..8a1ceb663e 100644
--- a/tests/benchmark/CL/Scale.cpp
+++ b/tests/benchmark/CL/Scale.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,6 +26,7 @@
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
#include "arm_compute/runtime/CL/functions/CLScale.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
#include "tests/CL/CLAccessor.h"
#include "tests/benchmark/fixtures/ScaleFixture.h"
#include "tests/datasets/BorderModeDataset.h"
diff --git a/tests/validate_examples/cl_gemm.cpp b/tests/validate_examples/cl_gemm.cpp
index 0e71f9d5bf..99f7513624 100644
--- a/tests/validate_examples/cl_gemm.cpp
+++ b/tests/validate_examples/cl_gemm.cpp
@@ -28,9 +28,24 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
-#include "arm_compute/runtime/CL/CLFunctions.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
-
+#include "arm_compute/runtime/CL/functions/CLGEMM.h"
+#include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h"
+#include "arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h"
+#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLIm2ColKernel.h"
+#include "src/core/CL/kernels/CLWeightsReshapeKernel.h"
#include "tests/AssetsLibrary.h"
#include "tests/CL/CLAccessor.h"
#include "tests/Globals.h"
diff --git a/tests/validation/CL/ArgMinMax.cpp b/tests/validation/CL/ArgMinMax.cpp
index 7dcd22e795..2508c63524 100644
--- a/tests/validation/CL/ArgMinMax.cpp
+++ b/tests/validation/CL/ArgMinMax.cpp
@@ -22,12 +22,11 @@
* SOFTWARE.
*/
#include "arm_compute/core/Types.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
#include "arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h"
#include "arm_compute/runtime/CL/functions/CLReductionOperation.h"
-
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "tests/CL/CLAccessor.h"
#include "tests/datasets/ShapeDatasets.h"
#include "tests/datasets/SplitDataset.h"
@@ -54,7 +53,8 @@ const auto ArgMinMaxSmallDataset = framework::dataset::make("Shape",
TensorShape{ 2560, 2U, 2U, 2U },
});
-const auto ArgMinMaxLargeDataset = framework::dataset::make("Shape", { TensorShape{ 517U, 123U, 13U, 2U } });
+const auto ArgMinMaxLargeDataset = framework::dataset::make("Shape",
+{ TensorShape{ 517U, 123U, 13U, 2U } });
} // namespace
TEST_SUITE(CL)
TEST_SUITE(ArgMinMax)
diff --git a/tests/validation/CL/BatchNormalizationLayer.cpp b/tests/validation/CL/BatchNormalizationLayer.cpp
index e67f4cc199..88f00b0eff 100644
--- a/tests/validation/CL/BatchNormalizationLayer.cpp
+++ b/tests/validation/CL/BatchNormalizationLayer.cpp
@@ -58,10 +58,11 @@ const auto act_infos = framework::dataset::make("Activat
ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 8.f, 2.f),
});
-const auto common_fusion_dataset = combine(combine(combine(framework::dataset::make("UseBias", { false, true }),
- framework::dataset::make("UseBeta", { false, true })),
- framework::dataset::make("UseGamma", { false, true })),
- framework::dataset::make("Epsilon", { 0.001f }));
+const auto common_fusion_dataset = combine(combine(combine(framework::dataset::make("UseBias",
+{ false, true }),
+framework::dataset::make("UseBeta", { false, true })),
+framework::dataset::make("UseGamma", { false, true })),
+framework::dataset::make("Epsilon", { 0.001f }));
bool validate_zero_padding(TensorShape shape0, const TensorShape shape1, float epsilon, ActivationLayerInfo act_info, DataType dt, DataLayout data_layout)
{
@@ -141,9 +142,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
// clang-format on
// *INDENT-ON*
-DATA_TEST_CASE(ValidateZeroPadding, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallRandomBatchNormalizationLayerDataset(),
- act_infos),
- framework::dataset::make("DataType", { DataType::F32, DataType::F16 })),
+DATA_TEST_CASE(ValidateZeroPadding, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallRandomBatchNormalizationLayerDataset(), act_infos), framework::dataset::make("DataType", { DataType::F32, DataType::F16 })),
framework::dataset::make("DataLayout", { DataLayout::NHWC })),
shape0, shape1, episilon, act_infos, data_type, data_layout)
{
@@ -154,8 +153,7 @@ DATA_TEST_CASE(ValidateZeroPadding, framework::DatasetMode::ALL, combine(combine
TEST_SUITE(Float)
TEST_SUITE(FP32)
FIXTURE_DATA_TEST_CASE(Random, CLBatchNormalizationLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallRandomBatchNormalizationLayerDataset(),
- combine(framework::dataset::make("UseBeta", { false, true }),
- framework::dataset::make("UseGamma", { false, true }))),
+ combine(framework::dataset::make("UseBeta", { false, true }), framework::dataset::make("UseGamma", { false, true }))),
act_infos),
framework::dataset::make("DataType", DataType::F32)),
framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
@@ -167,9 +165,9 @@ TEST_SUITE_END() //FP32
TEST_SUITE(FP16)
FIXTURE_DATA_TEST_CASE(Random, CLBatchNormalizationLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallRandomBatchNormalizationLayerDataset(),
- combine(framework::dataset::make("UseBeta", { false, true }),
- framework::dataset::make("UseGamma", { false, true }))),
- framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f))),
+ combine(framework::dataset::make("UseBeta", { false, true }), framework::dataset::make("UseGamma", { false, true }))),
+ framework::dataset::make("ActivationInfo",
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f))),
framework::dataset::make("DataType", DataType::F16)),
framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
{
diff --git a/tests/validation/CL/BatchToSpaceLayer.cpp b/tests/validation/CL/BatchToSpaceLayer.cpp
index f553787729..e90ac921c5 100644
--- a/tests/validation/CL/BatchToSpaceLayer.cpp
+++ b/tests/validation/CL/BatchToSpaceLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
diff --git a/tests/validation/CL/BoundingBoxTransform.cpp b/tests/validation/CL/BoundingBoxTransform.cpp
index 82dfa31606..2a7f1667d6 100644
--- a/tests/validation/CL/BoundingBoxTransform.cpp
+++ b/tests/validation/CL/BoundingBoxTransform.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
diff --git a/tests/validation/CL/CannyEdge.cpp b/tests/validation/CL/CannyEdge.cpp
index 84c73c1a8c..10da5f4363 100644
--- a/tests/validation/CL/CannyEdge.cpp
+++ b/tests/validation/CL/CannyEdge.cpp
@@ -48,8 +48,9 @@ namespace
/* Allowed ratio of mismatches between target and reference (1.0 = 100%) */
const float allowed_mismatch_ratio = 0.1f;
-const auto data = combine(framework::dataset::make("GradientSize", { 3, 5, 7 }),
- combine(framework::dataset::make("Normalization", { MagnitudeType::L1NORM, MagnitudeType::L2NORM }), datasets::BorderModes()));
+const auto data = combine(framework::dataset::make("GradientSize",
+{ 3, 5, 7 }),
+combine(framework::dataset::make("Normalization", { MagnitudeType::L1NORM, MagnitudeType::L2NORM }), datasets::BorderModes()));
} // namespace
TEST_SUITE(CL)
diff --git a/tests/validation/CL/ChannelCombine.cpp b/tests/validation/CL/ChannelCombine.cpp
index 6187e72960..7ef8414d7e 100644
--- a/tests/validation/CL/ChannelCombine.cpp
+++ b/tests/validation/CL/ChannelCombine.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
diff --git a/tests/validation/CL/ChannelExtract.cpp b/tests/validation/CL/ChannelExtract.cpp
index 7657d5a7ea..7a0dcf3e7d 100644
--- a/tests/validation/CL/ChannelExtract.cpp
+++ b/tests/validation/CL/ChannelExtract.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
diff --git a/tests/validation/CL/Col2Im.cpp b/tests/validation/CL/Col2Im.cpp
index d6ef010b53..b651bf8918 100644
--- a/tests/validation/CL/Col2Im.cpp
+++ b/tests/validation/CL/Col2Im.cpp
@@ -21,9 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLCol2ImKernel.h"
#include "arm_compute/core/Types.h"
-
+#include "src/core/CL/kernels/CLCol2ImKernel.h"
#include "tests/CL/CLAccessor.h"
#include "tests/CL/Helper.h"
#include "tests/framework/Asserts.h"
diff --git a/tests/validation/CL/ConvertFullyConnectedWeights.cpp b/tests/validation/CL/ConvertFullyConnectedWeights.cpp
index a5065fb217..70d7b2c767 100644
--- a/tests/validation/CL/ConvertFullyConnectedWeights.cpp
+++ b/tests/validation/CL/ConvertFullyConnectedWeights.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
diff --git a/tests/validation/CL/CropResize.cpp b/tests/validation/CL/CropResize.cpp
index 636db1728f..f1fae3d5cc 100644
--- a/tests/validation/CL/CropResize.cpp
+++ b/tests/validation/CL/CropResize.cpp
@@ -25,7 +25,6 @@
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
#include "arm_compute/runtime/CL/functions/CLCropResize.h"
-
#include "tests/CL/CLAccessor.h"
#include "tests/datasets/CropResizeDataset.h"
#include "tests/framework/Asserts.h"
diff --git a/tests/validation/CL/DeconvolutionLayer.cpp b/tests/validation/CL/DeconvolutionLayer.cpp
index c677f5ae96..c284cdcee3 100644
--- a/tests/validation/CL/DeconvolutionLayer.cpp
+++ b/tests/validation/CL/DeconvolutionLayer.cpp
@@ -21,7 +21,6 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLTensor.h"
diff --git a/tests/validation/CL/DepthToSpaceLayer.cpp b/tests/validation/CL/DepthToSpaceLayer.cpp
index fd570ad753..7cee4b7129 100644
--- a/tests/validation/CL/DepthToSpaceLayer.cpp
+++ b/tests/validation/CL/DepthToSpaceLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
diff --git a/tests/validation/CL/DepthwiseConvolutionLayerNative.cpp b/tests/validation/CL/DepthwiseConvolutionLayerNative.cpp
index 058d9b3ecc..b1cd379574 100644
--- a/tests/validation/CL/DepthwiseConvolutionLayerNative.cpp
+++ b/tests/validation/CL/DepthwiseConvolutionLayerNative.cpp
@@ -21,12 +21,12 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h"
#include "arm_compute/core/KernelDescriptors.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h"
#include "tests/CL/CLAccessor.h"
#include "tests/CL/Helper.h"
#include "tests/PaddingCalculator.h"
diff --git a/tests/validation/CL/Fill.cpp b/tests/validation/CL/Fill.cpp
index b86dae10fd..38950079da 100644
--- a/tests/validation/CL/Fill.cpp
+++ b/tests/validation/CL/Fill.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
diff --git a/tests/validation/CL/FillBorder.cpp b/tests/validation/CL/FillBorder.cpp
index e0b283b56b..e2afd6494e 100644
--- a/tests/validation/CL/FillBorder.cpp
+++ b/tests/validation/CL/FillBorder.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
#include "tests/CL/CLAccessor.h"
#include "tests/Globals.h"
#include "tests/datasets/BorderModeDataset.h"
diff --git a/tests/validation/CL/Flatten.cpp b/tests/validation/CL/Flatten.cpp
index a00041b0a4..04f720f7e5 100644
--- a/tests/validation/CL/Flatten.cpp
+++ b/tests/validation/CL/Flatten.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
diff --git a/tests/validation/CL/Floor.cpp b/tests/validation/CL/Floor.cpp
index 58645b9d85..2961cfa3f2 100644
--- a/tests/validation/CL/Floor.cpp
+++ b/tests/validation/CL/Floor.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
diff --git a/tests/validation/CL/FuseBatchNormalization.cpp b/tests/validation/CL/FuseBatchNormalization.cpp
index 0736250727..548feab2ed 100644
--- a/tests/validation/CL/FuseBatchNormalization.cpp
+++ b/tests/validation/CL/FuseBatchNormalization.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
diff --git a/tests/validation/CL/GEMM.cpp b/tests/validation/CL/GEMM.cpp
index c9540c352a..392eeb1510 100644
--- a/tests/validation/CL/GEMM.cpp
+++ b/tests/validation/CL/GEMM.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
diff --git a/tests/validation/CL/GEMMLowpMatrixMultiplyNative.cpp b/tests/validation/CL/GEMMLowpMatrixMultiplyNative.cpp
index ce000bd8e1..1cfeac59af 100644
--- a/tests/validation/CL/GEMMLowpMatrixMultiplyNative.cpp
+++ b/tests/validation/CL/GEMMLowpMatrixMultiplyNative.cpp
@@ -21,9 +21,9 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
#include "tests/CL/CLAccessor.h"
#include "tests/CL/Helper.h"
#include "tests/framework/Asserts.h"
diff --git a/tests/validation/CL/GEMMLowpMatrixMultiplyReshaped.cpp b/tests/validation/CL/GEMMLowpMatrixMultiplyReshaped.cpp
index 16e4a137eb..0c651cddc2 100644
--- a/tests/validation/CL/GEMMLowpMatrixMultiplyReshaped.cpp
+++ b/tests/validation/CL/GEMMLowpMatrixMultiplyReshaped.cpp
@@ -21,11 +21,11 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
#include "tests/CL/CLAccessor.h"
#include "tests/CL/Helper.h"
#include "tests/framework/Asserts.h"
diff --git a/tests/validation/CL/GEMMLowpMatrixMultiplyReshapedOnlyRHS.cpp b/tests/validation/CL/GEMMLowpMatrixMultiplyReshapedOnlyRHS.cpp
index d8618bd881..fa256280ca 100644
--- a/tests/validation/CL/GEMMLowpMatrixMultiplyReshapedOnlyRHS.cpp
+++ b/tests/validation/CL/GEMMLowpMatrixMultiplyReshapedOnlyRHS.cpp
@@ -21,12 +21,12 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
#include "tests/CL/CLAccessor.h"
#include "tests/CL/Helper.h"
#include "tests/PaddingCalculator.h"
diff --git a/tests/validation/CL/GEMMMatrixMultiply.cpp b/tests/validation/CL/GEMMMatrixMultiply.cpp
index e521dd5a02..5d2e211d91 100644
--- a/tests/validation/CL/GEMMMatrixMultiply.cpp
+++ b/tests/validation/CL/GEMMMatrixMultiply.cpp
@@ -21,12 +21,12 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
#include "arm_compute/core/KernelDescriptors.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
#include "tests/CL/CLAccessor.h"
#include "tests/CL/Helper.h"
#include "tests/PaddingCalculator.h"
diff --git a/tests/validation/CL/GEMMMatrixMultiplyInterleavedTransposed.cpp b/tests/validation/CL/GEMMMatrixMultiplyInterleavedTransposed.cpp
index fcbf8ce110..b2701e7f6c 100644
--- a/tests/validation/CL/GEMMMatrixMultiplyInterleavedTransposed.cpp
+++ b/tests/validation/CL/GEMMMatrixMultiplyInterleavedTransposed.cpp
@@ -21,14 +21,14 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
#include "arm_compute/core/KernelDescriptors.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
#include "tests/CL/CLAccessor.h"
#include "tests/CL/Helper.h"
#include "tests/PaddingCalculator.h"
diff --git a/tests/validation/CL/GEMMMatrixMultiplyNative.cpp b/tests/validation/CL/GEMMMatrixMultiplyNative.cpp
index 6ba5012d15..1cf1209dee 100644
--- a/tests/validation/CL/GEMMMatrixMultiplyNative.cpp
+++ b/tests/validation/CL/GEMMMatrixMultiplyNative.cpp
@@ -21,12 +21,12 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h"
#include "arm_compute/core/KernelDescriptors.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h"
#include "tests/CL/CLAccessor.h"
#include "tests/CL/Helper.h"
#include "tests/PaddingCalculator.h"
diff --git a/tests/validation/CL/GEMMMatrixMultiplyReshaped.cpp b/tests/validation/CL/GEMMMatrixMultiplyReshaped.cpp
index 5629a80f8e..0a0a1fc397 100644
--- a/tests/validation/CL/GEMMMatrixMultiplyReshaped.cpp
+++ b/tests/validation/CL/GEMMMatrixMultiplyReshaped.cpp
@@ -21,14 +21,14 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
#include "arm_compute/core/KernelDescriptors.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
#include "tests/CL/CLAccessor.h"
#include "tests/CL/Helper.h"
#include "tests/PaddingCalculator.h"
diff --git a/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp b/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp
index 33912ae2ba..789b77377d 100644
--- a/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp
+++ b/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp
@@ -21,13 +21,13 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
#include "arm_compute/core/KernelDescriptors.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
#include "tests/CL/CLAccessor.h"
#include "tests/CL/Helper.h"
#include "tests/PaddingCalculator.h"
diff --git a/tests/validation/CL/GEMMReshapeLHSMatrix.cpp b/tests/validation/CL/GEMMReshapeLHSMatrix.cpp
index d9439f63f1..4af495944e 100644
--- a/tests/validation/CL/GEMMReshapeLHSMatrix.cpp
+++ b/tests/validation/CL/GEMMReshapeLHSMatrix.cpp
@@ -21,11 +21,11 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
#include "tests/CL/CLAccessor.h"
#include "tests/CL/Helper.h"
#include "tests/PaddingCalculator.h"
diff --git a/tests/validation/CL/GEMMReshapeRHSMatrix.cpp b/tests/validation/CL/GEMMReshapeRHSMatrix.cpp
index 579ed32afe..14048e81ec 100644
--- a/tests/validation/CL/GEMMReshapeRHSMatrix.cpp
+++ b/tests/validation/CL/GEMMReshapeRHSMatrix.cpp
@@ -21,11 +21,11 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
#include "tests/CL/CLAccessor.h"
#include "tests/CL/Helper.h"
#include "tests/PaddingCalculator.h"
diff --git a/tests/validation/CL/Gather.cpp b/tests/validation/CL/Gather.cpp
index 6126231d0d..f0b87d7d9f 100644
--- a/tests/validation/CL/Gather.cpp
+++ b/tests/validation/CL/Gather.cpp
@@ -25,7 +25,6 @@
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
#include "arm_compute/runtime/CL/functions/CLGather.h"
-
#include "tests/CL/CLAccessor.h"
#include "tests/datasets/GatherDataset.h"
#include "tests/framework/Asserts.h"
diff --git a/tests/validation/CL/GlobalPoolingLayer.cpp b/tests/validation/CL/GlobalPoolingLayer.cpp
index 5328fc8448..246368e66d 100644
--- a/tests/validation/CL/GlobalPoolingLayer.cpp
+++ b/tests/validation/CL/GlobalPoolingLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
diff --git a/tests/validation/CL/HOGDescriptor.cpp b/tests/validation/CL/HOGDescriptor.cpp
index 7c014b5d22..c6b2763dfd 100644
--- a/tests/validation/CL/HOGDescriptor.cpp
+++ b/tests/validation/CL/HOGDescriptor.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
diff --git a/tests/validation/CL/HOGDetector.cpp b/tests/validation/CL/HOGDetector.cpp
index 78edf0fd27..9f74c728cf 100644
--- a/tests/validation/CL/HOGDetector.cpp
+++ b/tests/validation/CL/HOGDetector.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
diff --git a/tests/validation/CL/HOGMultiDetection.cpp b/tests/validation/CL/HOGMultiDetection.cpp
index 091ff9e9db..5557fde33c 100644
--- a/tests/validation/CL/HOGMultiDetection.cpp
+++ b/tests/validation/CL/HOGMultiDetection.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
diff --git a/tests/validation/CL/Im2Col.cpp b/tests/validation/CL/Im2Col.cpp
index 12b082fe13..e7e46b7bc5 100644
--- a/tests/validation/CL/Im2Col.cpp
+++ b/tests/validation/CL/Im2Col.cpp
@@ -21,9 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLIm2ColKernel.h"
#include "arm_compute/core/Types.h"
-
+#include "src/core/CL/kernels/CLIm2ColKernel.h"
#include "tests/CL/CLAccessor.h"
#include "tests/CL/Helper.h"
#include "tests/framework/Asserts.h"
diff --git a/tests/validation/CL/InstanceNormalizationLayer.cpp b/tests/validation/CL/InstanceNormalizationLayer.cpp
index a30e3260c6..a52ebc5bfe 100644
--- a/tests/validation/CL/InstanceNormalizationLayer.cpp
+++ b/tests/validation/CL/InstanceNormalizationLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
diff --git a/tests/validation/CL/L2NormalizeLayer.cpp b/tests/validation/CL/L2NormalizeLayer.cpp
index 9502df5ade..bcf68a526c 100644
--- a/tests/validation/CL/L2NormalizeLayer.cpp
+++ b/tests/validation/CL/L2NormalizeLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
diff --git a/tests/validation/CL/LSTMLayerQuantized.cpp b/tests/validation/CL/LSTMLayerQuantized.cpp
index f975bfb196..fe533ee914 100644
--- a/tests/validation/CL/LSTMLayerQuantized.cpp
+++ b/tests/validation/CL/LSTMLayerQuantized.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -22,7 +22,6 @@
* SOFTWARE.
*/
#include "arm_compute/runtime/CL/functions/CLLSTMLayerQuantized.h"
-
#include "tests/CL/CLAccessor.h"
#include "tests/PaddingCalculator.h"
#include "tests/Utils.h"
diff --git a/tests/validation/CL/LogSoftmaxLayer.cpp b/tests/validation/CL/LogSoftmaxLayer.cpp
index 8fdc745d13..b7f6a66e42 100644
--- a/tests/validation/CL/LogSoftmaxLayer.cpp
+++ b/tests/validation/CL/LogSoftmaxLayer.cpp
@@ -21,7 +21,6 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
diff --git a/tests/validation/CL/MeanStdDevNormalizationLayer.cpp b/tests/validation/CL/MeanStdDevNormalizationLayer.cpp
index a355f9eb1c..e77a21ed7f 100644
--- a/tests/validation/CL/MeanStdDevNormalizationLayer.cpp
+++ b/tests/validation/CL/MeanStdDevNormalizationLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
diff --git a/tests/validation/CL/NormalizationLayer.cpp b/tests/validation/CL/NormalizationLayer.cpp
index 88949806d5..1aed2786ff 100644
--- a/tests/validation/CL/NormalizationLayer.cpp
+++ b/tests/validation/CL/NormalizationLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
diff --git a/tests/validation/CL/OpticalFlow.cpp b/tests/validation/CL/OpticalFlow.cpp
index cf60038d4b..7c1ff5ed57 100644
--- a/tests/validation/CL/OpticalFlow.cpp
+++ b/tests/validation/CL/OpticalFlow.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
diff --git a/tests/validation/CL/PriorBoxLayer.cpp b/tests/validation/CL/PriorBoxLayer.cpp
index c63b093844..780f4796fa 100644
--- a/tests/validation/CL/PriorBoxLayer.cpp
+++ b/tests/validation/CL/PriorBoxLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
diff --git a/tests/validation/CL/QLSTMLayerNormalization.cpp b/tests/validation/CL/QLSTMLayerNormalization.cpp
index a927be17bb..1c7dee4612 100644
--- a/tests/validation/CL/QLSTMLayerNormalization.cpp
+++ b/tests/validation/CL/QLSTMLayerNormalization.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h"
+#include "src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h"
#include "tests/CL/CLAccessor.h"
#include "tests/CL/Helper.h"
#include "tests/PaddingCalculator.h"
diff --git a/tests/validation/CL/RNNLayer.cpp b/tests/validation/CL/RNNLayer.cpp
index 4e67868943..23219bd7b0 100644
--- a/tests/validation/CL/RNNLayer.cpp
+++ b/tests/validation/CL/RNNLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
diff --git a/tests/validation/CL/Range.cpp b/tests/validation/CL/Range.cpp
index bf81f55e41..c4e0e17aa0 100644
--- a/tests/validation/CL/Range.cpp
+++ b/tests/validation/CL/Range.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
diff --git a/tests/validation/CL/ReduceMean.cpp b/tests/validation/CL/ReduceMean.cpp
index 1dc6c615a9..947f84af49 100644
--- a/tests/validation/CL/ReduceMean.cpp
+++ b/tests/validation/CL/ReduceMean.cpp
@@ -25,7 +25,6 @@
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
#include "arm_compute/runtime/CL/functions/CLReduceMean.h"
-
#include "tests/CL/CLAccessor.h"
#include "tests/datasets/ShapeDatasets.h"
#include "tests/datasets/SplitDataset.h"
diff --git a/tests/validation/CL/Reverse.cpp b/tests/validation/CL/Reverse.cpp
index ed2c6e337a..11df0e7803 100644
--- a/tests/validation/CL/Reverse.cpp
+++ b/tests/validation/CL/Reverse.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
diff --git a/tests/validation/CL/SoftmaxLayer.cpp b/tests/validation/CL/SoftmaxLayer.cpp
index 76289cab44..fe31b00e00 100644
--- a/tests/validation/CL/SoftmaxLayer.cpp
+++ b/tests/validation/CL/SoftmaxLayer.cpp
@@ -21,7 +21,6 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
diff --git a/tests/validation/CL/SpaceToBatchLayer.cpp b/tests/validation/CL/SpaceToBatchLayer.cpp
index b2339399a3..971312e379 100644
--- a/tests/validation/CL/SpaceToBatchLayer.cpp
+++ b/tests/validation/CL/SpaceToBatchLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
diff --git a/tests/validation/CL/SpaceToDepthLayer.cpp b/tests/validation/CL/SpaceToDepthLayer.cpp
index 25b4bcd70c..b9e767fb65 100644
--- a/tests/validation/CL/SpaceToDepthLayer.cpp
+++ b/tests/validation/CL/SpaceToDepthLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
diff --git a/tests/validation/CL/TableLookup.cpp b/tests/validation/CL/TableLookup.cpp
index 93e313096c..f435c60c13 100644
--- a/tests/validation/CL/TableLookup.cpp
+++ b/tests/validation/CL/TableLookup.cpp
@@ -26,7 +26,6 @@
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
#include "arm_compute/runtime/CL/functions/CLTableLookup.h"
-
#include "tests/CL/CLAccessor.h"
#include "tests/CL/CLLutAccessor.h"
#include "tests/PaddingCalculator.h"
@@ -34,7 +33,6 @@
#include "tests/framework/Asserts.h"
#include "tests/framework/Macros.h"
#include "tests/framework/datasets/Datasets.h"
-
#include "tests/validation/Helpers.h"
#include "tests/validation/Validation.h"
#include "tests/validation/fixtures/TableLookupFixture.h"
diff --git a/tests/validation/CL/Tile.cpp b/tests/validation/CL/Tile.cpp
index 73f4aa82a2..a06c05744f 100644
--- a/tests/validation/CL/Tile.cpp
+++ b/tests/validation/CL/Tile.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
diff --git a/tests/validation/CL/UNIT/DynamicTensor.cpp b/tests/validation/CL/UNIT/DynamicTensor.cpp
index b6302846a7..833256039e 100644
--- a/tests/validation/CL/UNIT/DynamicTensor.cpp
+++ b/tests/validation/CL/UNIT/DynamicTensor.cpp
@@ -28,6 +28,12 @@
#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/MemoryManagerOnDemand.h"
#include "arm_compute/runtime/PoolManager.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLIm2ColKernel.h"
+#include "src/core/CL/kernels/CLL2NormalizeLayerKernel.h"
+#include "src/core/CL/kernels/CLReductionOperationKernel.h"
+#include "src/core/CL/kernels/CLWeightsReshapeKernel.h"
#include "tests/AssetsLibrary.h"
#include "tests/CL/CLAccessor.h"
#include "tests/Globals.h"
diff --git a/tests/validation/CL/UNIT/Tuner.cpp b/tests/validation/CL/UNIT/Tuner.cpp
index ee5c76ce5f..cf2513bf2c 100644
--- a/tests/validation/CL/UNIT/Tuner.cpp
+++ b/tests/validation/CL/UNIT/Tuner.cpp
@@ -21,10 +21,10 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/tuners/BifrostTuner.h"
+#include "src/core/CL/kernels/CLDirectConvolutionLayerKernel.h"
#include "tests/Utils.h"
#include "tests/framework/Asserts.h"
#include "tests/framework/Macros.h"
diff --git a/tests/validation/CL/UNIT/WeightsRetention.cpp b/tests/validation/CL/UNIT/WeightsRetention.cpp
index 7234e47642..acf795e48b 100644
--- a/tests/validation/CL/UNIT/WeightsRetention.cpp
+++ b/tests/validation/CL/UNIT/WeightsRetention.cpp
@@ -22,6 +22,18 @@
* SOFTWARE.
*/
#include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h"
+#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
#include "tests/AssetsLibrary.h"
#include "tests/CL/CLAccessor.h"
#include "tests/Globals.h"
diff --git a/tests/validation/CL/WeightsReshape.cpp b/tests/validation/CL/WeightsReshape.cpp
index 3e7ecc3408..d04c10cee2 100644
--- a/tests/validation/CL/WeightsReshape.cpp
+++ b/tests/validation/CL/WeightsReshape.cpp
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/kernels/CLWeightsReshapeKernel.h"
#include "tests/CL/CLAccessor.h"
#include "tests/CL/Helper.h"
#include "tests/datasets/ShapeDatasets.h"
diff --git a/tests/validation/CL/Winograd.cpp b/tests/validation/CL/Winograd.cpp
index d1522f3e7f..f206e92493 100644
--- a/tests/validation/CL/Winograd.cpp
+++ b/tests/validation/CL/Winograd.cpp
@@ -21,8 +21,6 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h"
-#include "arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
@@ -30,6 +28,8 @@
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
#include "arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h"
#include "arm_compute/runtime/CL/functions/CLWinogradInputTransform.h"
+#include "src/core/CL/kernels/CLWinogradFilterTransformKernel.h"
+#include "src/core/CL/kernels/CLWinogradOutputTransformKernel.h"
#include "tests/CL/CLAccessor.h"
#include "tests/CL/Helper.h"
#include "tests/PaddingCalculator.h"
diff --git a/tests/validation/CL/YOLOLayer.cpp b/tests/validation/CL/YOLOLayer.cpp
index f28082b74b..95c18d3d95 100644
--- a/tests/validation/CL/YOLOLayer.cpp
+++ b/tests/validation/CL/YOLOLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*