From 68dd25fbe6e4d3c3513fa5993863419769aa08fc Mon Sep 17 00:00:00 2001 From: Sang-Hoon Park Date: Mon, 19 Oct 2020 16:00:11 +0100 Subject: COMPMID-3637: Move utility headers from arm_compute to src Signed-off-by: Georgios Pinitas Change-Id: If9d6fa8c900b68c4b6fd373f2fc1f9abb83ea917 Signed-off-by: Michalis Spyrou Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4145 Tested-by: Arm Jenkins Reviewed-by: Sang-Hoon Park Comments-Addressed: Arm Jenkins --- src/core/AccessWindowAutoPadding.cpp | 4 +- src/core/AccessWindowAutoPadding.h | 85 +++ src/core/AccessWindowStatic.cpp | 4 +- src/core/AccessWindowStatic.h | 101 +++ src/core/AccessWindowTranspose.cpp | 4 +- src/core/AccessWindowTranspose.h | 48 ++ src/core/CL/CLValidate.h | 61 ++ src/core/CL/ICLGEMMKernelConfiguration.h | 68 ++ src/core/CL/ICLKernel.cpp | 9 +- src/core/CL/ICLSimple2DKernel.cpp | 7 +- src/core/CL/ICLSimpleKernel.cpp | 3 +- src/core/CL/gemm/CLGEMMHelpers.cpp | 2 +- src/core/CL/gemm/CLGEMMHelpers.h | 73 ++ .../gemm/native/CLGEMMNativeKernelConfiguration.h | 65 ++ .../CLGEMMNativeKernelConfigurationBifrost.cpp | 4 +- .../CLGEMMNativeKernelConfigurationBifrost.h | 56 ++ .../CLGEMMNativeKernelConfigurationMidgard.cpp | 4 +- .../CLGEMMNativeKernelConfigurationMidgard.h | 51 ++ .../CLGEMMNativeKernelConfigurationValhall.cpp | 4 +- .../CLGEMMNativeKernelConfigurationValhall.h | 53 ++ .../reshaped/CLGEMMReshapedKernelConfiguration.h | 63 ++ .../CLGEMMReshapedKernelConfigurationBifrost.cpp | 30 +- .../CLGEMMReshapedKernelConfigurationBifrost.h | 56 ++ .../CLGEMMReshapedKernelConfigurationValhall.cpp | 4 +- .../CLGEMMReshapedKernelConfigurationValhall.h | 53 ++ .../CLGEMMReshapedOnlyRHSKernelConfiguration.h | 63 ++ ...MMReshapedOnlyRHSKernelConfigurationBifrost.cpp | 12 +- ...GEMMReshapedOnlyRHSKernelConfigurationBifrost.h | 59 ++ ...MMReshapedOnlyRHSKernelConfigurationValhall.cpp | 4 +- ...GEMMReshapedOnlyRHSKernelConfigurationValhall.h | 53 ++ src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp | 5 +- src/core/CL/kernels/CLActivationLayerKernel.cpp | 10 +- src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp | 7 +- .../CL/kernels/CLBatchConcatenateLayerKernel.cpp | 6 +- .../CL/kernels/CLBatchNormalizationLayerKernel.cpp | 5 +- src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp | 4 +- src/core/CL/kernels/CLBitwiseAndKernel.cpp | 3 +- src/core/CL/kernels/CLBitwiseOrKernel.cpp | 3 +- src/core/CL/kernels/CLBitwiseXorKernel.cpp | 3 +- .../CL/kernels/CLBoundingBoxTransformKernel.cpp | 7 +- src/core/CL/kernels/CLBox3x3Kernel.cpp | 3 +- src/core/CL/kernels/CLCannyEdgeKernel.cpp | 1 + src/core/CL/kernels/CLChannelCombineKernel.cpp | 4 +- src/core/CL/kernels/CLChannelExtractKernel.cpp | 5 +- .../CL/kernels/CLChannelShuffleLayerKernel.cpp | 6 +- src/core/CL/kernels/CLCol2ImKernel.cpp | 6 +- src/core/CL/kernels/CLColorConvertKernel.cpp | 4 +- src/core/CL/kernels/CLComparisonKernel.cpp | 4 +- .../CLConvertFullyConnectedWeightsKernel.cpp | 5 +- src/core/CL/kernels/CLConvolutionKernel.cpp | 2 +- src/core/CL/kernels/CLCopyKernel.cpp | 6 +- src/core/CL/kernels/CLCropKernel.cpp | 9 +- .../kernels/CLDeconvolutionLayerUpsampleKernel.cpp | 5 +- .../kernels/CLDeconvolutionReshapeOutputKernel.cpp | 2 + .../CL/kernels/CLDepthConcatenateLayerKernel.cpp | 6 +- src/core/CL/kernels/CLDepthConvertLayerKernel.cpp | 4 +- src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp | 4 +- .../CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp | 8 +- .../CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp | 8 +- .../CLDepthwiseConvolutionLayerNativeKernel.cpp | 7 +- ...pthwiseConvolutionLayerReshapeWeightsKernel.cpp | 9 +- .../CL/kernels/CLDequantizationLayerKernel.cpp | 7 +- src/core/CL/kernels/CLDerivativeKernel.cpp | 4 +- src/core/CL/kernels/CLDilateKernel.cpp | 2 +- .../CL/kernels/CLDirectConvolutionLayerKernel.cpp | 9 +- .../CL/kernels/CLElementWiseUnaryLayerKernel.cpp | 5 +- .../CL/kernels/CLElementwiseOperationKernel.cpp | 6 +- src/core/CL/kernels/CLErodeKernel.cpp | 2 +- src/core/CL/kernels/CLFFTDigitReverseKernel.cpp | 6 +- src/core/CL/kernels/CLFFTRadixStageKernel.cpp | 6 +- src/core/CL/kernels/CLFFTScaleKernel.cpp | 6 +- src/core/CL/kernels/CLFastCornersKernel.cpp | 4 +- src/core/CL/kernels/CLFillBorderKernel.cpp | 6 +- src/core/CL/kernels/CLFlattenLayerKernel.cpp | 3 +- src/core/CL/kernels/CLFloorKernel.cpp | 6 +- .../CL/kernels/CLFuseBatchNormalizationKernel.cpp | 5 +- .../CLGEMMLowpMatrixMultiplyNativeKernel.cpp | 9 +- .../CLGEMMLowpMatrixMultiplyReshapedKernel.cpp | 7 +- ...GEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp | 7 +- .../kernels/CLGEMMLowpOffsetContributionKernel.cpp | 9 +- ...GEMMLowpOffsetContributionOutputStageKernel.cpp | 7 +- ...owpQuantizeDownInt32ScaleByFixedPointKernel.cpp | 6 +- ...GEMMLowpQuantizeDownInt32ScaleByFloatKernel.cpp | 6 +- .../CLGEMMLowpQuantizeDownInt32ScaleKernel.cpp | 6 +- src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp | 4 +- src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp | 14 +- .../kernels/CLGEMMMatrixMultiplyNativeKernel.cpp | 9 +- .../kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp | 13 +- .../CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp | 11 +- .../kernels/CLGEMMMatrixVectorMultiplyKernel.cpp | 7 +- .../CL/kernels/CLGEMMReshapeLHSMatrixKernel.cpp | 9 +- .../CL/kernels/CLGEMMReshapeRHSMatrixKernel.cpp | 11 +- src/core/CL/kernels/CLGatherKernel.cpp | 3 +- src/core/CL/kernels/CLGaussian3x3Kernel.cpp | 2 +- src/core/CL/kernels/CLGaussianPyramidKernel.cpp | 1 + .../CL/kernels/CLGenerateProposalsLayerKernel.cpp | 7 +- src/core/CL/kernels/CLHOGDescriptorKernel.cpp | 4 +- src/core/CL/kernels/CLHOGDetectorKernel.cpp | 4 +- src/core/CL/kernels/CLHarrisCornersKernel.cpp | 7 +- .../CL/kernels/CLHeightConcatenateLayerKernel.cpp | 6 +- src/core/CL/kernels/CLHistogramKernel.cpp | 4 +- src/core/CL/kernels/CLIm2ColKernel.cpp | 8 +- .../kernels/CLInstanceNormalizationLayerKernel.cpp | 5 +- src/core/CL/kernels/CLIntegralImageKernel.cpp | 3 +- src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp | 5 +- src/core/CL/kernels/CLLKTrackerKernel.cpp | 5 +- .../CLLocallyConnectedMatrixMultiplyKernel.cpp | 8 +- src/core/CL/kernels/CLMagnitudePhaseKernel.cpp | 3 +- src/core/CL/kernels/CLMaxUnpoolingLayerKernel.cpp | 9 +- src/core/CL/kernels/CLMeanStdDevKernel.cpp | 6 +- .../CL/kernels/CLMeanStdDevNormalizationKernel.cpp | 7 +- src/core/CL/kernels/CLMedian3x3Kernel.cpp | 2 +- src/core/CL/kernels/CLMemsetKernel.cpp | 2 +- src/core/CL/kernels/CLMinMaxLayerKernel.cpp | 5 +- src/core/CL/kernels/CLMinMaxLocationKernel.cpp | 3 +- src/core/CL/kernels/CLNonLinearFilterKernel.cpp | 3 +- .../CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp | 1 + src/core/CL/kernels/CLNormalizationLayerKernel.cpp | 7 +- .../CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp | 7 +- src/core/CL/kernels/CLPadLayerKernel.cpp | 2 + src/core/CL/kernels/CLPermuteKernel.cpp | 3 +- .../CL/kernels/CLPixelWiseMultiplicationKernel.cpp | 6 +- src/core/CL/kernels/CLPoolingLayerKernel.cpp | 7 +- src/core/CL/kernels/CLPriorBoxLayerKernel.cpp | 5 +- .../CL/kernels/CLQLSTMLayerNormalizationKernel.cpp | 3 +- src/core/CL/kernels/CLQuantizationLayerKernel.cpp | 6 +- src/core/CL/kernels/CLROIAlignLayerKernel.cpp | 7 +- src/core/CL/kernels/CLROIPoolingLayerKernel.cpp | 7 +- src/core/CL/kernels/CLRangeKernel.cpp | 4 +- src/core/CL/kernels/CLReductionOperationKernel.cpp | 7 +- src/core/CL/kernels/CLRemapKernel.cpp | 5 +- src/core/CL/kernels/CLReorgLayerKernel.cpp | 4 +- src/core/CL/kernels/CLReshapeLayerKernel.cpp | 8 +- src/core/CL/kernels/CLReverseKernel.cpp | 6 +- src/core/CL/kernels/CLScaleKernel.cpp | 5 +- src/core/CL/kernels/CLScharr3x3Kernel.cpp | 4 +- src/core/CL/kernels/CLSelectKernel.cpp | 5 +- src/core/CL/kernels/CLSobel3x3Kernel.cpp | 4 +- src/core/CL/kernels/CLSobel5x5Kernel.cpp | 4 +- src/core/CL/kernels/CLSobel7x7Kernel.cpp | 4 +- src/core/CL/kernels/CLSoftmaxLayerKernel.cpp | 7 +- src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp | 4 +- src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp | 4 +- src/core/CL/kernels/CLStackLayerKernel.cpp | 5 +- src/core/CL/kernels/CLStridedSliceKernel.cpp | 6 +- src/core/CL/kernels/CLTileKernel.cpp | 3 +- src/core/CL/kernels/CLTransposeKernel.cpp | 10 +- src/core/CL/kernels/CLUpsampleLayerKernel.cpp | 6 +- src/core/CL/kernels/CLWarpAffineKernel.cpp | 3 +- src/core/CL/kernels/CLWarpPerspectiveKernel.cpp | 3 +- src/core/CL/kernels/CLWeightsReshapeKernel.cpp | 2 + .../kernels/CLWidthConcatenate2TensorsKernel.cpp | 10 +- .../kernels/CLWidthConcatenate4TensorsKernel.cpp | 11 +- .../CL/kernels/CLWidthConcatenateLayerKernel.cpp | 7 +- .../CL/kernels/CLWinogradFilterTransformKernel.cpp | 6 +- .../CL/kernels/CLWinogradInputTransformKernel.cpp | 6 +- .../CL/kernels/CLWinogradOutputTransformKernel.cpp | 6 +- src/core/CL/kernels/CLYOLOLayerKernel.cpp | 8 +- src/core/CPP/ICPPSimpleKernel.cpp | 5 +- src/core/CPP/Validate.h | 117 ++++ .../CPPBoxWithNonMaximaSuppressionLimitKernel.cpp | 4 +- src/core/CPP/kernels/CPPCornerCandidatesKernel.cpp | 12 +- ...PPDetectionWindowNonMaximaSuppressionKernel.cpp | 3 +- .../CPP/kernels/CPPNonMaximumSuppressionKernel.cpp | 4 +- src/core/CPP/kernels/CPPPermuteKernel.cpp | 7 +- src/core/CPP/kernels/CPPTopKVKernel.cpp | 10 +- src/core/CPP/kernels/CPPUpsampleKernel.cpp | 7 +- src/core/GLES_COMPUTE/IGCSimpleKernel.cpp | 3 +- .../kernels/GCAbsoluteDifferenceKernel.cpp | 1 + .../kernels/GCActivationLayerKernel.cpp | 2 + .../kernels/GCArithmeticAdditionKernel.cpp | 2 + .../kernels/GCBatchNormalizationLayerKernel.cpp | 4 +- src/core/GLES_COMPUTE/kernels/GCCol2ImKernel.cpp | 6 +- .../kernels/GCDepthConcatenateLayerKernel.cpp | 2 + .../GCDepthwiseConvolutionLayer3x3Kernel.cpp | 4 +- .../kernels/GCDirectConvolutionLayerKernel.cpp | 22 +- .../GLES_COMPUTE/kernels/GCDropoutLayerKernel.cpp | 2 + .../GLES_COMPUTE/kernels/GCFillBorderKernel.cpp | 2 + .../kernels/GCGEMMInterleave4x4Kernel.cpp | 2 + .../kernels/GCGEMMMatrixAccumulateBiasesKernel.cpp | 4 +- .../kernels/GCGEMMMatrixAdditionKernel.cpp | 2 + .../kernels/GCGEMMMatrixMultiplyKernel.cpp | 6 +- .../kernels/GCGEMMTranspose1xWKernel.cpp | 4 +- src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp | 12 +- .../kernels/GCNormalizationLayerKernel.cpp | 2 + .../kernels/GCNormalizePlanarYUVLayerKernel.cpp | 4 +- .../kernels/GCPixelWiseMultiplicationKernel.cpp | 2 + .../GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp | 4 +- src/core/GLES_COMPUTE/kernels/GCScaleKernel.cpp | 4 +- .../GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.cpp | 4 +- .../GLES_COMPUTE/kernels/GCTensorShiftKernel.cpp | 4 +- .../GLES_COMPUTE/kernels/GCTransposeKernel.cpp | 4 +- .../kernels/GCWeightsReshapeKernel.cpp | 2 + src/core/Helpers.cpp | 171 ----- src/core/NEON/NETracePoint.cpp | 4 +- .../NEON/kernels/NEAbsoluteDifferenceKernel.cpp | 2 + src/core/NEON/kernels/NEAccumulateKernel.cpp | 4 +- src/core/NEON/kernels/NEActivationLayerKernel.cpp | 4 +- .../NEON/kernels/NEArithmeticAdditionKernel.cpp | 4 +- .../NEON/kernels/NEArithmeticSubtractionKernel.cpp | 4 +- .../NEON/kernels/NEBatchConcatenateLayerKernel.cpp | 2 + .../kernels/NEBatchNormalizationLayerKernel.cpp | 4 +- .../NEON/kernels/NEBatchToSpaceLayerKernel.cpp | 2 + src/core/NEON/kernels/NEBitwiseAndKernel.cpp | 2 + src/core/NEON/kernels/NEBitwiseNotKernel.cpp | 4 +- src/core/NEON/kernels/NEBitwiseOrKernel.cpp | 4 +- src/core/NEON/kernels/NEBitwiseXorKernel.cpp | 4 +- .../NEON/kernels/NEBoundingBoxTransformKernel.cpp | 6 +- src/core/NEON/kernels/NEBox3x3Kernel.cpp | 5 +- src/core/NEON/kernels/NECannyEdgeKernel.cpp | 8 +- src/core/NEON/kernels/NEChannelCombineKernel.cpp | 4 +- src/core/NEON/kernels/NEChannelExtractKernel.cpp | 4 +- .../NEON/kernels/NEChannelShuffleLayerKernel.cpp | 6 +- src/core/NEON/kernels/NECol2ImKernel.cpp | 4 +- src/core/NEON/kernels/NEColorConvertKernel.cpp | 2 + .../NEConvertFullyConnectedWeightsKernel.cpp | 4 +- .../kernels/NEConvertQuantizedSignednessKernel.cpp | 2 + src/core/NEON/kernels/NEConvolutionKernel.cpp | 4 +- src/core/NEON/kernels/NECopyKernel.cpp | 4 +- src/core/NEON/kernels/NECropKernel.cpp | 9 +- .../kernels/NECumulativeDistributionKernel.cpp | 4 +- .../NEON/kernels/NEDepthConcatenateLayerKernel.cpp | 2 + .../NEON/kernels/NEDepthConvertLayerKernel.cpp | 7 +- .../NEON/kernels/NEDepthToSpaceLayerKernel.cpp | 3 + .../NEDepthwiseConvolutionLayerNativeKernel.cpp | 7 +- .../NEON/kernels/NEDequantizationLayerKernel.cpp | 6 +- src/core/NEON/kernels/NEDerivativeKernel.cpp | 4 +- src/core/NEON/kernels/NEDilateKernel.cpp | 2 + .../kernels/NEDirectConvolutionLayerKernel.cpp | 6 +- .../NEDirectConvolutionLayerOutputStageKernel.cpp | 6 +- .../NEON/kernels/NEElementwiseOperationKernel.cpp | 4 +- src/core/NEON/kernels/NEElementwiseUnaryKernel.cpp | 4 +- src/core/NEON/kernels/NEErodeKernel.cpp | 2 + src/core/NEON/kernels/NEFFTDigitReverseKernel.cpp | 4 +- src/core/NEON/kernels/NEFFTRadixStageKernel.cpp | 7 +- src/core/NEON/kernels/NEFFTScaleKernel.cpp | 2 + src/core/NEON/kernels/NEFastCornersKernel.cpp | 4 +- src/core/NEON/kernels/NEFillArrayKernel.cpp | 1 + src/core/NEON/kernels/NEFillBorderKernel.cpp | 3 +- src/core/NEON/kernels/NEFlattenLayerKernel.cpp | 4 +- src/core/NEON/kernels/NEFloorKernel.cpp | 4 +- .../kernels/NEFuseBatchNormalizationKernel.cpp | 4 +- .../NEON/kernels/NEGEMMInterleave4x4Kernel.cpp | 2 + .../kernels/NEGEMMLowpMatrixMultiplyKernel.cpp | 6 +- .../kernels/NEGEMMLowpOffsetContributionKernel.cpp | 4 +- ...GEMMLowpOffsetContributionOutputStageKernel.cpp | 4 +- .../NEGEMMLowpQuantizeDownInt32ScaleKernel.cpp | 4 +- ...tizeDownInt32ToInt16ScaleByFixedPointKernel.cpp | 4 +- ...ntizeDownInt32ToInt8ScaleByFixedPointKernel.cpp | 4 +- ...tizeDownInt32ToUint8ScaleByFixedPointKernel.cpp | 4 +- .../NEON/kernels/NEGEMMLowpReductionKernel.cpp | 4 +- .../NEON/kernels/NEGEMMMatrixAdditionKernel.cpp | 4 +- .../NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp | 7 +- src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp | 4 +- src/core/NEON/kernels/NEGatherKernel.cpp | 6 +- src/core/NEON/kernels/NEGaussian3x3Kernel.cpp | 4 +- src/core/NEON/kernels/NEGaussian5x5Kernel.cpp | 4 +- src/core/NEON/kernels/NEGaussianPyramidKernel.cpp | 4 +- .../kernels/NEGenerateProposalsLayerKernel.cpp | 8 +- src/core/NEON/kernels/NEHOGDescriptorKernel.cpp | 4 +- src/core/NEON/kernels/NEHOGDetectorKernel.cpp | 4 +- src/core/NEON/kernels/NEHarrisCornersKernel.cpp | 4 +- .../kernels/NEHeightConcatenateLayerKernel.cpp | 2 + src/core/NEON/kernels/NEHistogramKernel.cpp | 4 +- src/core/NEON/kernels/NEIm2ColKernel.cpp | 4 +- .../kernels/NEInstanceNormalizationLayerKernel.cpp | 4 +- src/core/NEON/kernels/NEIntegralImageKernel.cpp | 4 +- src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp | 2 + src/core/NEON/kernels/NELKTrackerKernel.cpp | 6 +- .../NELocallyConnectedMatrixMultiplyKernel.cpp | 4 +- src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp | 4 +- .../NEON/kernels/NEMaxUnpoolingLayerKernel.cpp | 4 +- src/core/NEON/kernels/NEMeanStdDevKernel.cpp | 4 +- .../kernels/NEMeanStdDevNormalizationKernel.cpp | 4 +- src/core/NEON/kernels/NEMedian3x3Kernel.cpp | 4 +- src/core/NEON/kernels/NEMemsetKernel.cpp | 6 +- src/core/NEON/kernels/NEMinMaxLayerKernel.cpp | 4 +- src/core/NEON/kernels/NEMinMaxLocationKernel.cpp | 4 +- src/core/NEON/kernels/NENonLinearFilterKernel.cpp | 4 +- .../kernels/NENonMaximaSuppression3x3Kernel.cpp | 4 +- .../NEON/kernels/NENormalizationLayerKernel.cpp | 7 +- src/core/NEON/kernels/NEPadLayerKernel.cpp | 2 + src/core/NEON/kernels/NEPermuteKernel.cpp | 4 +- .../kernels/NEPixelWiseMultiplicationKernel.cpp | 4 +- src/core/NEON/kernels/NEPoolingLayerKernel.cpp | 6 +- src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp | 2 + .../kernels/NEQLSTMLayerNormalizationKernel.cpp | 4 +- .../NEON/kernels/NEQuantizationLayerKernel.cpp | 4 +- src/core/NEON/kernels/NEROIAlignLayerKernel.cpp | 6 +- src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp | 4 +- src/core/NEON/kernels/NERangeKernel.cpp | 2 + .../NEON/kernels/NEReductionOperationKernel.cpp | 6 +- src/core/NEON/kernels/NERemapKernel.cpp | 9 +- src/core/NEON/kernels/NEReorgLayerKernel.cpp | 4 +- src/core/NEON/kernels/NEReshapeLayerKernel.cpp | 6 +- src/core/NEON/kernels/NEReverseKernel.cpp | 2 + src/core/NEON/kernels/NEScaleKernel.cpp | 12 +- src/core/NEON/kernels/NEScharr3x3Kernel.cpp | 4 +- src/core/NEON/kernels/NESelectKernel.cpp | 4 +- src/core/NEON/kernels/NESobel3x3Kernel.cpp | 4 +- src/core/NEON/kernels/NESobel5x5Kernel.cpp | 4 +- src/core/NEON/kernels/NESobel7x7Kernel.cpp | 4 +- src/core/NEON/kernels/NESoftmaxLayerKernel.cpp | 8 +- .../NEON/kernels/NESpaceToBatchLayerKernel.cpp | 3 + .../NEON/kernels/NESpaceToDepthLayerKernel.cpp | 3 + src/core/NEON/kernels/NEStackLayerKernel.cpp | 2 + src/core/NEON/kernels/NEStridedSliceKernel.cpp | 9 +- src/core/NEON/kernels/NEThresholdKernel.cpp | 2 + src/core/NEON/kernels/NETileKernel.cpp | 4 +- src/core/NEON/kernels/NETransposeKernel.cpp | 6 +- src/core/NEON/kernels/NEUpsampleLayerKernel.cpp | 4 +- src/core/NEON/kernels/NEWarpKernel.cpp | 19 +- src/core/NEON/kernels/NEWeightsReshapeKernel.cpp | 2 + .../NEON/kernels/NEWidthConcatenateLayerKernel.cpp | 2 + .../kernels/NEWinogradConvolutionLayerKernel.cpp | 6 +- .../kernels/NEWinogradConvolutionLayerKernel.h | 4 +- src/core/NEON/kernels/NEYOLOLayerKernel.cpp | 4 +- .../NEON/kernels/assembly/INEGEMMWrapperKernel.cpp | 4 +- .../NEON/kernels/assembly/INEGEMMWrapperKernel.h | 108 +++ .../NEDepthwiseConvolutionAssemblyKernelWrapper.h | 88 +++ src/core/NEON/kernels/assembly/arm_gemm_local.hpp | 34 + .../NEON/kernels/convolution/common/activation.hpp | 37 + src/core/NEON/kernels/convolution/common/alloc.hpp | 31 + src/core/NEON/kernels/convolution/common/arm.hpp | 39 ++ .../kernels/convolution/common/convolution.hpp | 29 + .../NEON/kernels/convolution/common/padding.hpp | 91 +++ src/core/NEON/kernels/convolution/common/perf.h | 32 + .../NEON/kernels/convolution/common/qasymm8.hpp | 54 ++ .../NEON/kernels/convolution/common/qsymm8.hpp | 76 +++ src/core/NEON/kernels/convolution/common/shims.hpp | 749 +++++++++++++++++++++ .../NEON/kernels/convolution/common/tensor.hpp | 178 +++++ .../kernels/convolution/common/tensor_utils.hpp | 46 ++ src/core/NEON/kernels/convolution/common/utils.hpp | 60 ++ .../kernels/convolution/depthwise/depthwise.hpp | 551 +++++++++++++++ .../convolution/depthwise/depthwise_dilated.hpp | 156 +++++ .../convolution/depthwise/depthwise_quantized.hpp | 291 ++++++++ .../depthwise/depthwise_quantized_dilated.hpp | 88 +++ .../kernels/detail/NEDirectConvolutionDetail.h | 4 +- src/core/TensorInfo.cpp | 1 + src/core/helpers/AutoConfiguration.h | 176 +++++ src/core/helpers/NormalizationHelpers.h | 47 ++ src/core/helpers/ScaleHelpers.h | 331 +++++++++ src/core/helpers/SoftmaxHelpers.cpp | 45 ++ src/core/helpers/SoftmaxHelpers.h | 50 ++ src/core/helpers/Utils.h | 97 +++ src/core/helpers/WindowHelpers.cpp | 183 +++++ src/core/helpers/WindowHelpers.h | 172 +++++ src/core/utils/helpers/bit_ops.h | 52 ++ src/core/utils/helpers/fft.cpp | 4 +- src/core/utils/helpers/fft.h | 55 ++ src/core/utils/helpers/float_ops.h | 116 ++++ src/core/utils/helpers/tensor_info.h | 57 ++ src/core/utils/helpers/tensor_transform.cpp | 4 +- src/graph/algorithms/TopologicalSort.cpp | 6 +- src/graph/backends/CL/CLFunctionsFactory.cpp | 2 +- src/graph/backends/CL/CLNodeValidator.cpp | 2 +- src/graph/backends/CL/CLSubTensorHandle.cpp | 4 +- src/graph/backends/GLES/GCFunctionsFactory.cpp | 2 +- src/graph/backends/GLES/GCNodeValidator.cpp | 4 +- src/graph/backends/NEON/NEFunctionFactory.cpp | 2 +- src/graph/backends/NEON/NENodeValidator.cpp | 2 +- src/graph/backends/NEON/NETensorHandle.cpp | 4 +- .../detail/CrossLayerMemoryManagerHelpers.cpp | 2 +- src/graph/mutators/DepthConcatSubTensorMutator.cpp | 6 +- src/graph/mutators/GroupedConvolutionMutator.cpp | 2 +- src/graph/mutators/NodeExecutionMethodMutator.cpp | 4 +- src/graph/mutators/NodeFusionMutator.cpp | 13 +- src/graph/mutators/SplitLayerSubTensorMutator.cpp | 4 +- src/graph/mutators/SyntheticDataTypeMutator.cpp | 2 +- src/runtime/CL/CLHelpers.cpp | 3 +- src/runtime/CL/CLMemory.cpp | 4 +- src/runtime/CL/CLRuntimeContext.cpp | 2 + src/runtime/CL/CLTensorAllocator.cpp | 4 +- src/runtime/CL/functions/CLArgMinMaxLayer.cpp | 11 +- src/runtime/CL/functions/CLConcatenateLayer.cpp | 1 + .../functions/CLConvertFullyConnectedWeights.cpp | 2 + src/runtime/CL/functions/CLConvolutionLayer.cpp | 2 + src/runtime/CL/functions/CLCropResize.cpp | 4 + src/runtime/CL/functions/CLDeconvolutionLayer.cpp | 2 + .../CL/functions/CLDirectConvolutionLayer.cpp | 2 +- .../CL/functions/CLDirectDeconvolutionLayer.cpp | 1 + src/runtime/CL/functions/CLFFT1D.cpp | 2 +- src/runtime/CL/functions/CLFFTConvolutionLayer.cpp | 5 +- src/runtime/CL/functions/CLFill.cpp | 2 + src/runtime/CL/functions/CLFullyConnectedLayer.cpp | 2 +- src/runtime/CL/functions/CLGEMM.cpp | 15 +- .../CL/functions/CLGEMMConvolutionLayer.cpp | 3 +- .../CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp | 7 +- .../CL/functions/CLGenerateProposalsLayer.cpp | 1 + .../CL/functions/CLInstanceNormalizationLayer.cpp | 2 + src/runtime/CL/functions/CLLSTMLayerQuantized.cpp | 1 + src/runtime/CL/functions/CLPriorBoxLayer.cpp | 2 + src/runtime/CL/functions/CLQLSTMLayer.cpp | 1 + src/runtime/CL/functions/CLReduceMean.cpp | 3 +- src/runtime/CL/functions/CLReductionOperation.cpp | 8 +- src/runtime/CL/functions/CLRemap.cpp | 2 +- src/runtime/CL/functions/CLSelect.cpp | 2 + src/runtime/CL/functions/CLSoftmaxLayer.cpp | 7 +- src/runtime/CL/functions/CLSplit.cpp | 1 + .../CL/functions/CLWinogradConvolutionLayer.cpp | 2 +- src/runtime/CL/gemm/CLGEMMKernelSelection.h | 65 ++ .../CL/gemm/CLGEMMKernelSelectionBifrost.cpp | 4 +- src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.h | 55 ++ .../CL/gemm/CLGEMMKernelSelectionMidgard.cpp | 4 +- src/runtime/CL/gemm/CLGEMMKernelSelectionMidgard.h | 53 ++ .../CL/gemm/CLGEMMKernelSelectionValhall.cpp | 4 +- src/runtime/CL/gemm/CLGEMMKernelSelectionValhall.h | 53 ++ src/runtime/CL/tuners/BifrostTuner.cpp | 2 +- src/runtime/CL/tuners/MidgardTuner.cpp | 2 +- src/runtime/CPP/CPPScheduler.cpp | 3 +- .../CPP/functions/CPPDetectionOutputLayer.cpp | 1 + .../CPP/functions/CPPDetectionPostProcessLayer.cpp | 1 + src/runtime/CPUUtils.cpp | 9 +- src/runtime/CPUUtils.h | 51 ++ src/runtime/DeviceProperties.cpp | 6 +- src/runtime/GLES_COMPUTE/GCMemory.cpp | 4 +- .../GLES_COMPUTE/functions/GCConcatenateLayer.cpp | 2 + src/runtime/IScheduler.cpp | 10 +- src/runtime/NEON/INESimpleFunctionNoBorder.cpp | 6 +- src/runtime/NEON/functions/NEArgMinMaxLayer.cpp | 4 +- .../NEON/functions/NEBatchNormalizationLayer.cpp | 4 +- src/runtime/NEON/functions/NEBatchToSpaceLayer.cpp | 4 +- src/runtime/NEON/functions/NEConcatenateLayer.cpp | 1 + src/runtime/NEON/functions/NECropResize.cpp | 2 + .../NEON/functions/NEDeconvolutionLayer.cpp | 1 + src/runtime/NEON/functions/NEDepthToSpaceLayer.cpp | 4 +- src/runtime/NEON/functions/NEFFT1D.cpp | 4 +- .../NEON/functions/NEFFTConvolutionLayer.cpp | 7 +- .../NEON/functions/NEFullyConnectedLayer.cpp | 2 + src/runtime/NEON/functions/NEGEMM.cpp | 3 +- .../NEON/functions/NEGEMMAssemblyDispatch.cpp | 10 +- .../functions/NEGEMMLowpMatrixMultiplyCore.cpp | 1 + .../NEON/functions/NEGenerateProposalsLayer.cpp | 1 + .../NEON/functions/NELSTMLayerQuantized.cpp | 3 +- src/runtime/NEON/functions/NEPadLayer.cpp | 1 + src/runtime/NEON/functions/NEPriorBoxLayer.cpp | 4 +- src/runtime/NEON/functions/NEQLSTMLayer.cpp | 1 + src/runtime/NEON/functions/NEReduceMean.cpp | 3 +- .../NEON/functions/NEReductionOperation.cpp | 1 + src/runtime/NEON/functions/NEScale.cpp | 4 +- .../NEON/functions/NESimpleAssemblyFunction.cpp | 4 +- .../NEON/functions/NESimpleAssemblyFunction.h | 56 ++ src/runtime/NEON/functions/NESoftmaxLayer.cpp | 7 +- .../NEON/functions/NEWinogradConvolutionLayer.cpp | 4 +- .../NEDepthwiseConvolutionAssemblyDispatch.cpp | 11 +- src/runtime/OMP/OMPScheduler.cpp | 2 +- src/runtime/SchedulerUtils.cpp | 4 + src/runtime/SchedulerUtils.h | 45 ++ src/runtime/Utils.cpp | 7 +- src/runtime/Utils.h | 60 ++ 450 files changed, 6798 insertions(+), 910 deletions(-) create mode 100644 src/core/AccessWindowAutoPadding.h create mode 100644 src/core/AccessWindowStatic.h create mode 100644 src/core/AccessWindowTranspose.h create mode 100644 src/core/CL/CLValidate.h create mode 100644 src/core/CL/ICLGEMMKernelConfiguration.h create mode 100644 src/core/CL/gemm/CLGEMMHelpers.h create mode 100644 src/core/CL/gemm/native/CLGEMMNativeKernelConfiguration.h create mode 100644 src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationBifrost.h create mode 100644 src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationMidgard.h create mode 100644 src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationValhall.h create mode 100644 src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfiguration.h create mode 100644 src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.h create mode 100644 src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationValhall.h create mode 100644 src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h create mode 100644 src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.h create mode 100644 src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.h create mode 100644 src/core/CPP/Validate.h create mode 100644 src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h create mode 100644 src/core/NEON/kernels/assembly/NEDepthwiseConvolutionAssemblyKernelWrapper.h create mode 100644 src/core/NEON/kernels/assembly/arm_gemm_local.hpp create mode 100644 src/core/NEON/kernels/convolution/common/activation.hpp create mode 100644 src/core/NEON/kernels/convolution/common/alloc.hpp create mode 100644 src/core/NEON/kernels/convolution/common/arm.hpp create mode 100644 src/core/NEON/kernels/convolution/common/convolution.hpp create mode 100644 src/core/NEON/kernels/convolution/common/padding.hpp create mode 100644 src/core/NEON/kernels/convolution/common/perf.h create mode 100644 src/core/NEON/kernels/convolution/common/qasymm8.hpp create mode 100644 src/core/NEON/kernels/convolution/common/qsymm8.hpp create mode 100644 src/core/NEON/kernels/convolution/common/shims.hpp create mode 100644 src/core/NEON/kernels/convolution/common/tensor.hpp create mode 100644 src/core/NEON/kernels/convolution/common/tensor_utils.hpp create mode 100644 src/core/NEON/kernels/convolution/common/utils.hpp create mode 100644 src/core/NEON/kernels/convolution/depthwise/depthwise.hpp create mode 100644 src/core/NEON/kernels/convolution/depthwise/depthwise_dilated.hpp create mode 100644 src/core/NEON/kernels/convolution/depthwise/depthwise_quantized.hpp create mode 100644 src/core/NEON/kernels/convolution/depthwise/depthwise_quantized_dilated.hpp create mode 100644 src/core/helpers/AutoConfiguration.h create mode 100644 src/core/helpers/NormalizationHelpers.h create mode 100644 src/core/helpers/ScaleHelpers.h create mode 100644 src/core/helpers/SoftmaxHelpers.cpp create mode 100644 src/core/helpers/SoftmaxHelpers.h create mode 100644 src/core/helpers/Utils.h create mode 100644 src/core/helpers/WindowHelpers.cpp create mode 100644 src/core/helpers/WindowHelpers.h create mode 100644 src/core/utils/helpers/bit_ops.h create mode 100644 src/core/utils/helpers/fft.h create mode 100644 src/core/utils/helpers/float_ops.h create mode 100644 src/core/utils/helpers/tensor_info.h create mode 100644 src/runtime/CL/gemm/CLGEMMKernelSelection.h create mode 100644 src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.h create mode 100644 src/runtime/CL/gemm/CLGEMMKernelSelectionMidgard.h create mode 100644 src/runtime/CL/gemm/CLGEMMKernelSelectionValhall.h create mode 100644 src/runtime/CPUUtils.h create mode 100644 src/runtime/NEON/functions/NESimpleAssemblyFunction.h create mode 100644 src/runtime/SchedulerUtils.h create mode 100644 src/runtime/Utils.h (limited to 'src') diff --git a/src/core/AccessWindowAutoPadding.cpp b/src/core/AccessWindowAutoPadding.cpp index 85c5b27d82..ca2f7d238f 100644 --- a/src/core/AccessWindowAutoPadding.cpp +++ b/src/core/AccessWindowAutoPadding.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/AccessWindowAutoPadding.h" +#include "src/core/AccessWindowAutoPadding.h" #include "arm_compute/core/ITensorInfo.h" #include "arm_compute/core/Window.h" diff --git a/src/core/AccessWindowAutoPadding.h b/src/core/AccessWindowAutoPadding.h new file mode 100644 index 0000000000..b8d1508679 --- /dev/null +++ b/src/core/AccessWindowAutoPadding.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_ACCESS_WINDOW_AUTO_PADDING_H +#define ARM_COMPUTE_ACCESS_WINDOW_AUTO_PADDING_H + +#include "arm_compute/core/Coordinates.h" +#include "arm_compute/core/IAccessWindow.h" +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class Window; +class ITensorInfo; + +/** Dummy access window. + * + * This implementation always uses the auto padding of the tensor info and + * never updates the window. The valid region is always set to cover the entire + * tensor. + * + * @note This access window is only used during the migration to the new + * padding system. It will be removed once all kernels have been ported. + * + * */ +class AccessWindowAutoPadding : public IAccessWindow +{ +public: + /** Default constructor. + * + * @param[in,out] info Tensor info of the accessed kernel. + */ + AccessWindowAutoPadding(ITensorInfo *info); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + AccessWindowAutoPadding(const AccessWindowAutoPadding &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + AccessWindowAutoPadding &operator=(const AccessWindowAutoPadding &) = delete; + /** Allow instances of this class to be move constructed */ + AccessWindowAutoPadding(AccessWindowAutoPadding &&) = default; + /** Allow instances of this class to be moved */ + AccessWindowAutoPadding &operator=(AccessWindowAutoPadding &&) = default; + /** Default destructor */ + ~AccessWindowAutoPadding() = default; + + /** Set the valid region to match the entire tensor. */ + void set_valid_region(); + + /** Return a valid region that spans across the entire tensor. + * + * @return a valid region. + * + */ + ValidRegion compute_valid_region() const; + + // Inherited methods overridden: + bool update_window_if_needed(Window &window) const override; + bool update_padding_if_needed(const Window &window) override; + ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) const override; + +private: + ITensorInfo *_info; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_ACCESS_WINDOW_AUTO_PADDING_H*/ diff --git a/src/core/AccessWindowStatic.cpp b/src/core/AccessWindowStatic.cpp index 10e88b8632..0607011bc5 100644 --- a/src/core/AccessWindowStatic.cpp +++ b/src/core/AccessWindowStatic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/AccessWindowStatic.h" +#include "src/core/AccessWindowStatic.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensorInfo.h" diff --git a/src/core/AccessWindowStatic.h b/src/core/AccessWindowStatic.h new file mode 100644 index 0000000000..f7d43cbb55 --- /dev/null +++ b/src/core/AccessWindowStatic.h @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_IACCESS_WINDOW_STATIC_H +#define ARM_COMPUTE_IACCESS_WINDOW_STATIC_H + +#include "arm_compute/core/Coordinates.h" +#include "arm_compute/core/IAccessWindow.h" +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +class Window; +class ITensorInfo; + +/** Implementation of a static rectangular access pattern. + * + * In this implementation the access offsets and sizes are not relative to the + * current element. Instead they are considered to be absolute coordinates + * within the accessed tensor's shape. + * + * */ +class AccessWindowStatic : public IAccessWindow +{ +public: + /** Constructor for a static access pattern. + * + * @param[in,out] info Tensor info of the accessed kernel. + * @param[in] start_x Start of the access in X direction. + * @param[in] start_y Start of the access in Y direction. + * @param[in] end_x End of the access in X direction. + * @param[in] end_y End of the access in Y direction. + */ + AccessWindowStatic(ITensorInfo *info, int start_x, int start_y, int end_x, int end_y); + + /** Prevent instances of this class from being copied (As this class contains pointers) */ + AccessWindowStatic(const AccessWindowStatic &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + AccessWindowStatic &operator=(const AccessWindowStatic &) = delete; + /** Allow instances of this class to be move constructed */ + AccessWindowStatic(AccessWindowStatic &&) = default; + /** Allow instances of this class to be moved */ + AccessWindowStatic &operator=(AccessWindowStatic &&) = default; + /** Default destructor */ + ~AccessWindowStatic() = default; + + /** Set the valid region based on the static access pattern and valid + * region of the inputs. + * + * @param[in] window Execution window of the kernel. + * @param[in] input_valid_region Combined valid region of all inputs. + */ + void set_valid_region(const Window &window, const ValidRegion &input_valid_region); + + /** Compute the valid region based on the static access pattern and valid region of the inputs. + * + * @param[in] window Execution window of the kernel. + * @param[in] input_valid_region Combined valid region of all inputs. + * + * @return a valid region. + * + */ + ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region) const; + + // Inherited methods overriden: + bool update_window_if_needed(Window &window) const override; + bool update_padding_if_needed(const Window &window) override; + ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) const override; + +private: + ITensorInfo *_info; + int _start_x; + int _start_y; + int _end_x; + int _end_y; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_IACCESS_WINDOW_STATIC_H*/ diff --git a/src/core/AccessWindowTranspose.cpp b/src/core/AccessWindowTranspose.cpp index 4c03ca16c7..d8bd4c4de1 100644 --- a/src/core/AccessWindowTranspose.cpp +++ b/src/core/AccessWindowTranspose.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/AccessWindowTranspose.h" +#include "src/core/AccessWindowTranspose.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensorInfo.h" diff --git a/src/core/AccessWindowTranspose.h b/src/core/AccessWindowTranspose.h new file mode 100644 index 0000000000..0306076d6e --- /dev/null +++ b/src/core/AccessWindowTranspose.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_IACCESS_WINDOW_TRANSPOSE_H +#define ARM_COMPUTE_IACCESS_WINDOW_TRANSPOSE_H + +#include "arm_compute/core/Coordinates.h" +#include "arm_compute/core/IAccessWindow.h" +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class Window; +class ITensorInfo; + +/** Implementation of a XY-transpose access pattern. */ +class AccessWindowTranspose : public AccessWindowRectangle +{ +public: + using AccessWindowRectangle::AccessWindowRectangle; + bool update_window_if_needed(Window &window) const override; + bool update_padding_if_needed(const Window &window) override; + using AccessWindowRectangle::compute_valid_region; + ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) const override; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_IACCESS_WINDOW_TRANSPOSE_H*/ diff --git a/src/core/CL/CLValidate.h b/src/core/CL/CLValidate.h new file mode 100644 index 0000000000..cbbdf2d9d2 --- /dev/null +++ b/src/core/CL/CLValidate.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CL_VALIDATE_H +#define ARM_COMPUTE_CL_VALIDATE_H + +#include "arm_compute/core/Validate.h" + +namespace arm_compute +{ +#define ARM_COMPUTE_ERROR_ON_F16_UNSUPPORTED(tensor) \ + ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_unsupported_fp16(__func__, __FILE__, __LINE__, tensor, CLKernelLibrary::get().fp16_supported())) + +#define ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(tensor) \ + ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_fp16(__func__, __FILE__, __LINE__, tensor, CLKernelLibrary::get().fp16_supported())) + +/** Return an error if int64_base_atomics extension is not supported by the device. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * + * @return Status + */ +inline arm_compute::Status error_on_unsupported_int64_base_atomics(const char *function, const char *file, const int line) +{ + if(!CLKernelLibrary::get().int64_base_atomics_supported()) + { + return ARM_COMPUTE_CREATE_ERROR_LOC(arm_compute::ErrorCode::UNSUPPORTED_EXTENSION_USE, function, file, line, "Atomic functions are not supported"); + } + return arm_compute::Status{}; +} + +#define ARM_COMPUTE_ERROR_ON_INT64_BASE_ATOMICS_UNSUPPORTED() \ + ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_unsupported_int64_base_atomics(__func__, __FILE__, __LINE__)); + +#define ARM_COMPUTE_RETURN_ERROR_ON_INT64_BASE_ATOMICS_UNSUPPORTED() \ + ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_int64_base_atomics(__func__, __FILE__, __LINE__)); + +} // namespace arm_compute +#endif /* ARM_COMPUTE_CL_VALIDATE_H */ diff --git a/src/core/CL/ICLGEMMKernelConfiguration.h b/src/core/CL/ICLGEMMKernelConfiguration.h new file mode 100644 index 0000000000..ac0e7ab7ff --- /dev/null +++ b/src/core/CL/ICLGEMMKernelConfiguration.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_ICLGEMMKERNELCONFIGURATION_H +#define ARM_COMPUTE_ICLGEMMKERNELCONFIGURATION_H + +#include "arm_compute/core/GPUTarget.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +/** Basic interface for the GEMM kernel configuration */ +class ICLGEMMKernelConfiguration +{ +public: + /** Constructor + * + * @param[in] arch GPU target + */ + ICLGEMMKernelConfiguration(GPUTarget arch) + : _target(arch) + { + } + /** Prevent instances of this class from being copied (As this class contains pointers) */ + ICLGEMMKernelConfiguration(const ICLGEMMKernelConfiguration &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + ICLGEMMKernelConfiguration &operator=(const ICLGEMMKernelConfiguration &) = delete; + /** Default Move Constructor. */ + ICLGEMMKernelConfiguration(ICLGEMMKernelConfiguration &&) = default; + /** Default move assignment operator */ + ICLGEMMKernelConfiguration &operator=(ICLGEMMKernelConfiguration &&) = default; + /** Virtual destructor */ + virtual ~ICLGEMMKernelConfiguration() = default; + /** Given M, N, K and B, this method returns the @ref GEMMLHSMatrixInfo and @ref GEMMRHSMatrixInfo to be used + * + * @param[in] m Number of rows LHS matrix + * @param[in] n Number of columns RHS matrix + * @param[in] k Number of columns LHS matrix or number of rows RHS matrix + * @param[in] b Batch size + * @param[in] data_type Data type + */ + virtual std::pair configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) = 0; + +protected: + GPUTarget _target; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_ICLGEMMKERNELCONFIGURATION_H */ diff --git a/src/core/CL/ICLKernel.cpp b/src/core/CL/ICLKernel.cpp index be633746a2..f91510b4a7 100644 --- a/src/core/CL/ICLKernel.cpp +++ b/src/core/CL/ICLKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,14 +23,9 @@ */ #include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" +#include "src/core/helpers/Utils.h" #include diff --git a/src/core/CL/ICLSimple2DKernel.cpp b/src/core/CL/ICLSimple2DKernel.cpp index ce95495fff..dfef5822b2 100644 --- a/src/core/CL/ICLSimple2DKernel.cpp +++ b/src/core/CL/ICLSimple2DKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,10 +23,7 @@ */ #include "arm_compute/core/CL/ICLSimple2DKernel.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" +#include "src/core/helpers/WindowHelpers.h" using namespace arm_compute; diff --git a/src/core/CL/ICLSimpleKernel.cpp b/src/core/CL/ICLSimpleKernel.cpp index d2f09a3478..90b5be8069 100644 --- a/src/core/CL/ICLSimpleKernel.cpp +++ b/src/core/CL/ICLSimpleKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -27,6 +27,7 @@ #include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/helpers/WindowHelpers.h" using namespace arm_compute; diff --git a/src/core/CL/gemm/CLGEMMHelpers.cpp b/src/core/CL/gemm/CLGEMMHelpers.cpp index 0a4a4adc31..877bf1e047 100644 --- a/src/core/CL/gemm/CLGEMMHelpers.cpp +++ b/src/core/CL/gemm/CLGEMMHelpers.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/gemm/CLGEMMHelpers.h" +#include "src/core/CL/gemm/CLGEMMHelpers.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/gemm/CLGEMMHelpers.h b/src/core/CL/gemm/CLGEMMHelpers.h new file mode 100644 index 0000000000..013c068cf7 --- /dev/null +++ b/src/core/CL/gemm/CLGEMMHelpers.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGEMMHELPERS_H +#define ARM_COMPUTE_CLGEMMHELPERS_H + +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ITensorInfo; +struct GEMMRHSMatrixInfo; + +namespace cl_gemm +{ +/** Configure @ref GEMMLHSMatrixInfo and @ref GEMMRHSMatrixInfo + * + * @param[in] m Number of rows (M) in the LHS matrix not reshaped + * @param[in] n Number of columns (N) in the RHS matrix not reshaped + * @param[in] m0 Number of rows processed by each thread/work-item + * @param[in] n0 Number of columns processed by each thread/work-item + * @param[in] k0 Number of inner accumulation performed by each thread/work-item + * @param[in] v0 Number of vertical blocks of size (m0xk0) stored on the same output row + * @param[in] h0 Number of horizontal blocks of size (k0xn0) stored on the same output row + * @param[in] lhs_interleave True if the v0 (m0xk0) blocks have to be interleaved in the output row + * @param[in] rhs_interleave True if the h0 (k0xn0) blocks have to be interleaved in the output row + * @param[in] lhs_transpose True if the (m0xk0) block has to be transposed before been stored + * @param[in] rhs_transpose True if the (k0xn0) block has to be transposed before been stored + * @param[in] export_to_cl_image (Optional) True if the RHS reshaped matrix has to be exported to cl_image + * + * @return @ref GEMMLHSMatrixInfo and @ref GEMMRHSMatrixInfo + */ +std::pair configure_lhs_rhs_info(unsigned int m, unsigned int n, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int v0, unsigned int h0, + bool lhs_interleave, bool rhs_interleave, bool lhs_transpose, bool rhs_transpose, bool export_to_cl_image = false); + +/** Update padding required to export the OpenCL buffer to OpenCL image2d + * + * @param[in,out] tensor ITensorInfo of the tensor required to be exported to OpenCL image2d + */ +void update_padding_for_cl_image(ITensorInfo *tensor); + +/** Utility function to validate the image2d OpenCL object support on the RHS reshaped matrix + * + * @param[in] tensor_reshaped_info TensorInfo for the RHS reshaped matrix + * @param[in] rhs_info @ref GEMMRHSMatrixInfo + * + * @return Status reporting if we can use the image2d OpenCL object on the RHS reshaped matrix + */ +Status validate_image2d_support_on_rhs(const ITensorInfo &tensor_reshaped_info, const GEMMRHSMatrixInfo &rhs_info); +} // namespace cl_gemm +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLGEMMHELPERS_H */ diff --git a/src/core/CL/gemm/native/CLGEMMNativeKernelConfiguration.h b/src/core/CL/gemm/native/CLGEMMNativeKernelConfiguration.h new file mode 100644 index 0000000000..aecf5a8aa8 --- /dev/null +++ b/src/core/CL/gemm/native/CLGEMMNativeKernelConfiguration.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATION_H +#define ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATION_H + +#include "src/core/CL/ICLGEMMKernelConfiguration.h" +#include "src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationBifrost.h" +#include "src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationMidgard.h" +#include "src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationValhall.h" + +#include "support/MemorySupport.h" + +namespace arm_compute +{ +namespace cl_gemm +{ +/** CLGEMMNative factory class */ +class CLGEMMNativeKernelConfigurationFactory final +{ +public: + /** Static method to construct CLGEMMNative kernel object accordingly with the GPU target + * + * @param[in] gpu GPU target + * + * @return CLGEMMNative kernel configuration class + */ + static std::unique_ptr create(GPUTarget gpu) + { + switch(get_arch_from_target(gpu)) + { + case GPUTarget::MIDGARD: + return support::cpp14::make_unique(gpu); + case GPUTarget::BIFROST: + return support::cpp14::make_unique(gpu); + case GPUTarget::VALHALL: + return support::cpp14::make_unique(gpu); + default: + ARM_COMPUTE_ERROR("Not supported GPU target"); + } + } +}; +} // namespace cl_gemm +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATION_H */ diff --git a/src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationBifrost.cpp b/src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationBifrost.cpp index 51b7fc7190..4cc3d6ae74 100644 --- a/src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationBifrost.cpp +++ b/src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationBifrost.cpp @@ -21,12 +21,12 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationBifrost.h" +#include "src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationBifrost.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/gemm/CLGEMMHelpers.h" #include "arm_compute/core/GPUTarget.h" +#include "src/core/CL/gemm/CLGEMMHelpers.h" #include #include diff --git a/src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationBifrost.h b/src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationBifrost.h new file mode 100644 index 0000000000..1e7432c89a --- /dev/null +++ b/src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationBifrost.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONBIFROST_H +#define ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONBIFROST_H + +#include "src/core/CL/ICLGEMMKernelConfiguration.h" + +namespace arm_compute +{ +namespace cl_gemm +{ +/** Bifrost based OpenCL GEMMNative configuration */ +class CLGEMMNativeKernelConfigurationBifrost final : public ICLGEMMKernelConfiguration +{ +public: + /** Constructor + * + * @param[in] gpu GPU target + */ + CLGEMMNativeKernelConfigurationBifrost(GPUTarget gpu); + + // Inherited overridden method + std::pair configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override; + +private: + std::pair configure_G71_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b); + std::pair configure_G71_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b); + std::pair configure_G76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b); + std::pair configure_G76_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b); + std::pair configure_default_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b); + std::pair configure_default_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b); +}; +} // namespace cl_gemm +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONBIFROST_H */ diff --git a/src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationMidgard.cpp b/src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationMidgard.cpp index 3e7c17664a..fd699a08f7 100644 --- a/src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationMidgard.cpp +++ b/src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationMidgard.cpp @@ -21,12 +21,12 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationMidgard.h" +#include "src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationMidgard.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/gemm/CLGEMMHelpers.h" #include "arm_compute/core/GPUTarget.h" +#include "src/core/CL/gemm/CLGEMMHelpers.h" #include #include diff --git a/src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationMidgard.h b/src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationMidgard.h new file mode 100644 index 0000000000..2f6671706e --- /dev/null +++ b/src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationMidgard.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONMIDGARD_H +#define ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONMIDGARD_H + +#include "src/core/CL/ICLGEMMKernelConfiguration.h" + +namespace arm_compute +{ +namespace cl_gemm +{ +/** Midgard based OpenCL GEMMNative configuration */ +class CLGEMMNativeKernelConfigurationMidgard final : public ICLGEMMKernelConfiguration +{ +public: + /** Constructor + * + * @param[in] gpu GPU target + */ + CLGEMMNativeKernelConfigurationMidgard(GPUTarget gpu); + + // Inherited overridden method + std::pair configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override; + +private: + std::pair default_q8(unsigned int m, unsigned int n, unsigned int k, unsigned int b); +}; +} // namespace cl_gemm +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONMIDGARD_H */ diff --git a/src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationValhall.cpp b/src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationValhall.cpp index efc82fb78c..2c82340eef 100644 --- a/src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationValhall.cpp +++ b/src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationValhall.cpp @@ -21,12 +21,12 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationValhall.h" +#include "src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationValhall.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/gemm/CLGEMMHelpers.h" #include "arm_compute/core/GPUTarget.h" +#include "src/core/CL/gemm/CLGEMMHelpers.h" #include #include diff --git a/src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationValhall.h b/src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationValhall.h new file mode 100644 index 0000000000..fb51b02edf --- /dev/null +++ b/src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationValhall.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONVALHALL_H +#define ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONVALHALL_H + +#include "src/core/CL/ICLGEMMKernelConfiguration.h" + +namespace arm_compute +{ +namespace cl_gemm +{ +/** Valhall based OpenCL GEMMNative configuration */ +class CLGEMMNativeKernelConfigurationValhall final : public ICLGEMMKernelConfiguration +{ +public: + /** Constructor + * + * @param[in] gpu GPU target + */ + CLGEMMNativeKernelConfigurationValhall(GPUTarget gpu); + + // Inherited overridden method + std::pair configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override; + +private: + std::pair configure_G77_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b); + std::pair configure_G77_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b); + std::pair configure_G77_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b); +}; +} // namespace cl_gemm +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONVALHALL_H */ diff --git a/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfiguration.h b/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfiguration.h new file mode 100644 index 0000000000..21ccf2d647 --- /dev/null +++ b/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfiguration.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATION_H +#define ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATION_H + +#include "src/core/CL/ICLGEMMKernelConfiguration.h" +#include "src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.h" +#include "src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationValhall.h" + +#include "support/MemorySupport.h" + +namespace arm_compute +{ +namespace cl_gemm +{ +/** CLGEMMReshaped factory class */ +class CLGEMMReshapedKernelConfigurationFactory final +{ +public: + /** Static method to call the CLGEMMReshaped kernel configuration class accordingly with the GPU target + * + * @param[in] gpu GPU target + * + * @return CLGEMMReshaped kernel configuration class + */ + static std::unique_ptr create(GPUTarget gpu) + { + switch(get_arch_from_target(gpu)) + { + case GPUTarget::MIDGARD: + case GPUTarget::BIFROST: + return support::cpp14::make_unique(gpu); + case GPUTarget::VALHALL: + return support::cpp14::make_unique(gpu); + default: + ARM_COMPUTE_ERROR("Not supported GPU target"); + } + } +}; +} // namespace cl_gemm +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATION_H */ diff --git a/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.cpp b/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.cpp index b5fc074fb4..00c284facc 100644 --- a/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.cpp +++ b/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.cpp @@ -21,15 +21,15 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.h" +#include "src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/gemm/CLGEMMHelpers.h" #include "arm_compute/core/GPUTarget.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/CL/gemm/CLGEMMHelpers.h" #include #include @@ -216,17 +216,17 @@ std::pair CLGEMMReshapedKernelConfiguratio { if(workload <= 790.39f) { - return configure_lhs_rhs_info(m,n,2,4,4,2,2,false,false,true,false,false); + return configure_lhs_rhs_info(m, n, 2, 4, 4, 2, 2, false, false, true, false, false); } else { if(workload <= 982.39f) { - return configure_lhs_rhs_info(m,n,4,2,4,4,4,false,false,true,false,false); + return configure_lhs_rhs_info(m, n, 4, 2, 4, 4, 4, false, false, true, false, false); } else { - return configure_lhs_rhs_info(m,n,2,4,4,2,1,false,true,true,false,false); + return configure_lhs_rhs_info(m, n, 2, 4, 4, 2, 1, false, true, true, false, false); } } } @@ -236,16 +236,16 @@ std::pair CLGEMMReshapedKernelConfiguratio { if(r_mn <= 0.11f) { - return configure_lhs_rhs_info(m,n,2,4,4,2,2,false,false,true,false,false); + return configure_lhs_rhs_info(m, n, 2, 4, 4, 2, 2, false, false, true, false, false); } else { - return configure_lhs_rhs_info(m,n,4,4,4,4,4,false,true,true,false,false); + return configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 4, false, true, true, false, false); } } else { - return configure_lhs_rhs_info(m,n,2,4,4,2,2,false,false,true,false,false); + return configure_lhs_rhs_info(m, n, 2, 4, 4, 2, 2, false, false, true, false, false); } } } @@ -257,22 +257,22 @@ std::pair CLGEMMReshapedKernelConfiguratio { if(m <= 64.5) { - return configure_lhs_rhs_info(m,n,4,4,4,2,4,true,false,true,false,false); + return configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 4, true, false, true, false, false); } else { - return configure_lhs_rhs_info(m,n,4,4,4,2,2,false,true,true,false,false); + return configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 2, false, true, true, false, false); } } else { if(r_mn <= 1.09f) { - return configure_lhs_rhs_info(m,n,4,4,4,4,4,false,true,true,false,false); + return configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 4, false, true, true, false, false); } else { - return configure_lhs_rhs_info(m,n,4,4,4,2,2,true,true,true,false,false); + return configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 2, true, true, true, false, false); } } } @@ -280,17 +280,17 @@ std::pair CLGEMMReshapedKernelConfiguratio { if(m <= 43) { - return configure_lhs_rhs_info(m,n,4,4,4,2,4,true,false,true,false,false); + return configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 4, true, false, true, false, false); } else { if(workload <= 26364.79f) { - return configure_lhs_rhs_info(m,n,4,4,4,2,2,false,true,true,false,false); + return configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 2, false, true, true, false, false); } else { - return configure_lhs_rhs_info(m,n,4,4,4,4,4,false,true,true,false,false); + return configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 4, false, true, true, false, false); } } } diff --git a/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.h b/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.h new file mode 100644 index 0000000000..e3b62ced6a --- /dev/null +++ b/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATIONBIFROST_H +#define ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATIONBIFROST_H + +#include "src/core/CL/ICLGEMMKernelConfiguration.h" + +namespace arm_compute +{ +namespace cl_gemm +{ +/** Bifrost based OpenCL GEMMReshaped configuration */ +class CLGEMMReshapedKernelConfigurationBifrost final : public ICLGEMMKernelConfiguration +{ +public: + /** Constructor + * + * @param[in] gpu GPU target + */ + CLGEMMReshapedKernelConfigurationBifrost(GPUTarget gpu); + + // Inherited overridden method + std::pair configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override; + +private: + std::pair configure_G7x_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b); + std::pair configure_G76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b); + std::pair configure_G7x_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b); + std::pair configure_G76_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b); + std::pair configure_G7x_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b); + std::pair configure_G76_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b); +}; +} // namespace cl_gemm +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATIONBIFROST_H */ diff --git a/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationValhall.cpp b/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationValhall.cpp index 0c09f5084a..519e903a5a 100644 --- a/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationValhall.cpp +++ b/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationValhall.cpp @@ -21,12 +21,12 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationValhall.h" +#include "src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationValhall.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/gemm/CLGEMMHelpers.h" #include "arm_compute/core/GPUTarget.h" +#include "src/core/CL/gemm/CLGEMMHelpers.h" #include #include diff --git a/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationValhall.h b/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationValhall.h new file mode 100644 index 0000000000..5f7e701e0e --- /dev/null +++ b/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationValhall.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATIONVALHALL_H +#define ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATIONVALHALL_H + +#include "src/core/CL/ICLGEMMKernelConfiguration.h" + +namespace arm_compute +{ +namespace cl_gemm +{ +/** Valhall based OpenCL GEMMReshaped configuration */ +class CLGEMMReshapedKernelConfigurationValhall final : public ICLGEMMKernelConfiguration +{ +public: + /** Constructor + * + * @param[in] gpu GPU target + */ + CLGEMMReshapedKernelConfigurationValhall(GPUTarget gpu); + + // Inherited overridden method + std::pair configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override; + +private: + std::pair configure_G77_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b); + std::pair configure_G77_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b); + std::pair configure_G77_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b); +}; +} // namespace cl_gemm +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATIONVALHALL_H */ diff --git a/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h b/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h new file mode 100644 index 0000000000..4efe28ce69 --- /dev/null +++ b/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATION_H +#define ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATION_H + +#include "src/core/CL/ICLGEMMKernelConfiguration.h" +#include "src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.h" +#include "src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.h" + +#include "support/MemorySupport.h" + +namespace arm_compute +{ +namespace cl_gemm +{ +/** CLGEMMReshapedOnlyRHS factory class */ +class CLGEMMReshapedOnlyRHSKernelConfigurationFactory final +{ +public: + /** Static method to call the CLGEMMReshapedOnlyRHS kernel configuration class accordingly with the GPU target + * + * @param[in] gpu GPU target + * + * @return CLGEMMReshapedOnlyRHS kernel configuration class + */ + static std::unique_ptr create(GPUTarget gpu) + { + switch(get_arch_from_target(gpu)) + { + case GPUTarget::MIDGARD: + case GPUTarget::BIFROST: + return support::cpp14::make_unique(gpu); + case GPUTarget::VALHALL: + return support::cpp14::make_unique(gpu); + default: + ARM_COMPUTE_ERROR("Not supported GPU target"); + } + } +}; +} // namespace cl_gemm +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATION_H */ diff --git a/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.cpp b/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.cpp index 16eabf069c..0a0fc5d152 100644 --- a/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.cpp +++ b/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.cpp @@ -21,15 +21,15 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.h" +#include "src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/gemm/CLGEMMHelpers.h" #include "arm_compute/core/GPUTarget.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/CL/gemm/CLGEMMHelpers.h" #include #include @@ -122,13 +122,13 @@ std::pair CLGEMMReshapedOnlyRHSKernelConfi if(m == 1) { - if ( n <= 2548 ) + if(n <= 2548) { - return configure_lhs_rhs_info(m,n,1,2,16,1,4,false,true,false,true,false); + return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 4, false, true, false, true, false); } else { - return configure_lhs_rhs_info(m,n,1,4,16,1,8,false,true,false,true,false); + return configure_lhs_rhs_info(m, n, 1, 4, 16, 1, 8, false, true, false, true, false); } } else @@ -251,7 +251,7 @@ std::pair CLGEMMReshapedOnlyRHSKernelConfi if(m == 1) { - return configure_lhs_rhs_info(m,n,1,2,16,1,32,false,true,false,true,false); + return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 32, false, true, false, true, false); } else { diff --git a/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.h b/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.h new file mode 100644 index 0000000000..618dbd9923 --- /dev/null +++ b/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATIONBIFROST_H +#define ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATIONBIFROST_H + +#include "src/core/CL/ICLGEMMKernelConfiguration.h" + +namespace arm_compute +{ +namespace cl_gemm +{ +/** Bifrost based OpenCL GEMMReshapedOnlyRHS configuration */ +class CLGEMMReshapedOnlyRHSKernelConfigurationBifrost final : public ICLGEMMKernelConfiguration +{ +public: + /** Constructor + * + * @param[in] gpu GPU target + */ + CLGEMMReshapedOnlyRHSKernelConfigurationBifrost(GPUTarget gpu); + + // Inherited overridden method + std::pair configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override; + +private: + std::pair configure_G7x_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b); + std::pair configure_G76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b); + std::pair configure_G51_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b); + std::pair configure_G7x_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b); + std::pair configure_G76_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b); + std::pair configure_G51_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b); + std::pair configure_G7x_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b); + std::pair configure_G76_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b); + std::pair configure_G51_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b); +}; +} // namespace cl_gemm +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATIONBIFROST_H */ diff --git a/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.cpp b/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.cpp index 23bf02c61a..f7939d29c0 100644 --- a/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.cpp +++ b/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.cpp @@ -21,15 +21,15 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.h" +#include "src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/gemm/CLGEMMHelpers.h" #include "arm_compute/core/GPUTarget.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/CL/gemm/CLGEMMHelpers.h" #include #include diff --git a/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.h b/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.h new file mode 100644 index 0000000000..b9289923b9 --- /dev/null +++ b/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATIONVALHALL_H +#define ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATIONVALHALL_H + +#include "src/core/CL/ICLGEMMKernelConfiguration.h" + +namespace arm_compute +{ +namespace cl_gemm +{ +/** Valhall based OpenCL GEMMReshapedOnlyRHS configuration */ +class CLGEMMReshapedOnlyRHSKernelConfigurationValhall final : public ICLGEMMKernelConfiguration +{ +public: + /** Constructor + * + * @param[in] gpu GPU target + */ + CLGEMMReshapedOnlyRHSKernelConfigurationValhall(GPUTarget gpu); + + // Inherited overridden method + std::pair configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override; + +private: + std::pair configure_G77_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b); + std::pair configure_G77_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b); + std::pair configure_G77_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b); +}; +} // namespace cl_gemm +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATIONVALHALL_H */ diff --git a/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp b/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp index 9deb16524e..29745beee7 100644 --- a/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp +++ b/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp @@ -27,12 +27,9 @@ #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/CL/kernels/CLActivationLayerKernel.cpp b/src/core/CL/kernels/CLActivationLayerKernel.cpp index 5a9434ee5a..f0e3047796 100644 --- a/src/core/CL/kernels/CLActivationLayerKernel.cpp +++ b/src/core/CL/kernels/CLActivationLayerKernel.cpp @@ -25,14 +25,14 @@ #include "arm_compute/core/CL/CLCoreRuntimeContext.h" #include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/Utils.h" -#include "arm_compute/core/Window.h" -#include "arm_compute/core/utils/helpers/float_ops.h" -#include "arm_compute/core/utils/misc/Cast.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" +#include "support/Cast.h" + #include "support/StringSupport.h" #include diff --git a/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp b/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp index b78ac27cfa..b5a801a97f 100644 --- a/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp +++ b/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp @@ -23,16 +23,17 @@ */ #include "arm_compute/core/CL/kernels/CLArgMinMaxLayerKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" diff --git a/src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp index dd9c234c56..7a8c9ad0fb 100644 --- a/src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp +++ b/src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp @@ -25,12 +25,12 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Utils.h" -#include "arm_compute/core/Window.h" -#include "arm_compute/core/utils/misc/Cast.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/WindowHelpers.h" +#include "support/Cast.h" #include "support/StringSupport.h" diff --git a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp index 1c1df6c4eb..09b668d6cd 100644 --- a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp @@ -25,12 +25,13 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" -#include "arm_compute/core/Window.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" diff --git a/src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp b/src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp index c74f7e055b..6eb22c9ada 100644 --- a/src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp +++ b/src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp @@ -24,9 +24,11 @@ #include "arm_compute/core/CL/kernels/CLBatchToSpaceLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" using namespace arm_compute::misc::shape_calculator; diff --git a/src/core/CL/kernels/CLBitwiseAndKernel.cpp b/src/core/CL/kernels/CLBitwiseAndKernel.cpp index 44378c8239..53a438dcf6 100644 --- a/src/core/CL/kernels/CLBitwiseAndKernel.cpp +++ b/src/core/CL/kernels/CLBitwiseAndKernel.cpp @@ -27,9 +27,8 @@ #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" #include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" +#include "src/core/helpers/WindowHelpers.h" using namespace arm_compute; diff --git a/src/core/CL/kernels/CLBitwiseOrKernel.cpp b/src/core/CL/kernels/CLBitwiseOrKernel.cpp index 77c48e6e82..0e2e5d4f3c 100644 --- a/src/core/CL/kernels/CLBitwiseOrKernel.cpp +++ b/src/core/CL/kernels/CLBitwiseOrKernel.cpp @@ -27,9 +27,8 @@ #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" #include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" +#include "src/core/helpers/WindowHelpers.h" using namespace arm_compute; diff --git a/src/core/CL/kernels/CLBitwiseXorKernel.cpp b/src/core/CL/kernels/CLBitwiseXorKernel.cpp index a15305e3b7..65b17c02bd 100644 --- a/src/core/CL/kernels/CLBitwiseXorKernel.cpp +++ b/src/core/CL/kernels/CLBitwiseXorKernel.cpp @@ -27,9 +27,8 @@ #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" #include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" +#include "src/core/helpers/WindowHelpers.h" using namespace arm_compute; diff --git a/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp b/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp index 95ea3d7df5..b8c0d2f2b8 100644 --- a/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp +++ b/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp @@ -23,17 +23,18 @@ */ #include "arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLArray.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" -#include "arm_compute/core/Window.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" namespace arm_compute diff --git a/src/core/CL/kernels/CLBox3x3Kernel.cpp b/src/core/CL/kernels/CLBox3x3Kernel.cpp index 7916dce241..2f6c09df0b 100644 --- a/src/core/CL/kernels/CLBox3x3Kernel.cpp +++ b/src/core/CL/kernels/CLBox3x3Kernel.cpp @@ -26,9 +26,8 @@ #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/IAccessWindow.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/CL/kernels/CLCannyEdgeKernel.cpp b/src/core/CL/kernels/CLCannyEdgeKernel.cpp index b8a53650e8..c76ec6769e 100644 --- a/src/core/CL/kernels/CLCannyEdgeKernel.cpp +++ b/src/core/CL/kernels/CLCannyEdgeKernel.cpp @@ -29,6 +29,7 @@ #include "arm_compute/core/CL/OpenCL.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" using namespace arm_compute; diff --git a/src/core/CL/kernels/CLChannelCombineKernel.cpp b/src/core/CL/kernels/CLChannelCombineKernel.cpp index b0e5111417..d574f352ae 100644 --- a/src/core/CL/kernels/CLChannelCombineKernel.cpp +++ b/src/core/CL/kernels/CLChannelCombineKernel.cpp @@ -27,14 +27,12 @@ #include "arm_compute/core/CL/ICLMultiImage.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/MultiImageInfo.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/CL/kernels/CLChannelExtractKernel.cpp b/src/core/CL/kernels/CLChannelExtractKernel.cpp index 13ae8f5ef4..7911b948ae 100644 --- a/src/core/CL/kernels/CLChannelExtractKernel.cpp +++ b/src/core/CL/kernels/CLChannelExtractKernel.cpp @@ -28,14 +28,13 @@ #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" #include "arm_compute/core/Coordinates.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/MultiImageInfo.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp b/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp index ad000ba17f..301a762850 100644 --- a/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp +++ b/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp @@ -25,12 +25,12 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Window.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" namespace arm_compute diff --git a/src/core/CL/kernels/CLCol2ImKernel.cpp b/src/core/CL/kernels/CLCol2ImKernel.cpp index 4050b24e0c..3dc007d9e0 100644 --- a/src/core/CL/kernels/CLCol2ImKernel.cpp +++ b/src/core/CL/kernels/CLCol2ImKernel.cpp @@ -25,13 +25,13 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" #include diff --git a/src/core/CL/kernels/CLColorConvertKernel.cpp b/src/core/CL/kernels/CLColorConvertKernel.cpp index e14b871ae6..0f82d87348 100644 --- a/src/core/CL/kernels/CLColorConvertKernel.cpp +++ b/src/core/CL/kernels/CLColorConvertKernel.cpp @@ -27,14 +27,12 @@ #include "arm_compute/core/CL/ICLMultiImage.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/MultiImageInfo.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" #include diff --git a/src/core/CL/kernels/CLComparisonKernel.cpp b/src/core/CL/kernels/CLComparisonKernel.cpp index 5bb1d56690..2b72946f49 100644 --- a/src/core/CL/kernels/CLComparisonKernel.cpp +++ b/src/core/CL/kernels/CLComparisonKernel.cpp @@ -24,8 +24,10 @@ #include "arm_compute/core/CL/kernels/CLComparisonKernel.h" #include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" #include diff --git a/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp b/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp index 7c6114640c..b4e42bf3bc 100644 --- a/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp +++ b/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp @@ -25,10 +25,11 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" namespace arm_compute diff --git a/src/core/CL/kernels/CLConvolutionKernel.cpp b/src/core/CL/kernels/CLConvolutionKernel.cpp index ca07e68345..48b185f78d 100644 --- a/src/core/CL/kernels/CLConvolutionKernel.cpp +++ b/src/core/CL/kernels/CLConvolutionKernel.cpp @@ -31,9 +31,9 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" #include diff --git a/src/core/CL/kernels/CLCopyKernel.cpp b/src/core/CL/kernels/CLCopyKernel.cpp index 37c3241302..0b7e9aff53 100644 --- a/src/core/CL/kernels/CLCopyKernel.cpp +++ b/src/core/CL/kernels/CLCopyKernel.cpp @@ -23,15 +23,15 @@ */ #include "arm_compute/core/CL/kernels/CLCopyKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" namespace arm_compute diff --git a/src/core/CL/kernels/CLCropKernel.cpp b/src/core/CL/kernels/CLCropKernel.cpp index f828162177..2c99d46929 100644 --- a/src/core/CL/kernels/CLCropKernel.cpp +++ b/src/core/CL/kernels/CLCropKernel.cpp @@ -26,16 +26,11 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Window.h" +#include "src/core/CPP/Validate.h" +#include "src/core/helpers/WindowHelpers.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/utils/helpers/bit_ops.h" -#include "arm_compute/core/utils/helpers/tensor_transform.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "support/StringSupport.h" #include diff --git a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp index e8f12d5d9d..9ba3dc3d8f 100644 --- a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp +++ b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp @@ -25,12 +25,11 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/WindowHelpers.h" namespace arm_compute { diff --git a/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.cpp b/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.cpp index 69730346fe..1514d906dc 100644 --- a/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.cpp +++ b/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.cpp @@ -30,6 +30,8 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" namespace arm_compute diff --git a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp index 87067cf717..cb5d727e9b 100644 --- a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp +++ b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp @@ -25,12 +25,12 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Utils.h" -#include "arm_compute/core/Window.h" -#include "arm_compute/core/utils/misc/Cast.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/WindowHelpers.h" +#include "support/Cast.h" #include "support/StringSupport.h" diff --git a/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp b/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp index 11297e7901..24f638f8c4 100644 --- a/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp +++ b/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp @@ -25,13 +25,13 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" #include "support/StringSupport.h" #include diff --git a/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp b/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp index e6f909e884..50bb3b827c 100644 --- a/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp +++ b/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp @@ -24,9 +24,11 @@ #include "arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" using namespace arm_compute::misc::shape_calculator; diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp index 066e9a5a40..7958230aac 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp @@ -23,19 +23,19 @@ */ #include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLKernel.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" namespace arm_compute diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp index 0930fee712..5a0d2d0a62 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp @@ -23,19 +23,19 @@ */ #include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLKernel.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" namespace arm_compute diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp index a538ab51cb..5a3a0ec435 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp @@ -25,17 +25,16 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLKernel.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" -#include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" namespace arm_compute diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.cpp index 07f25a80cf..0ff3c520ba 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.cpp @@ -23,19 +23,18 @@ */ #include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLKernel.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/core/utils/quantization/AsymmHelpers.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" namespace arm_compute diff --git a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp index 72eac858ad..e653c59550 100644 --- a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp @@ -23,15 +23,16 @@ */ #include "arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" namespace arm_compute diff --git a/src/core/CL/kernels/CLDerivativeKernel.cpp b/src/core/CL/kernels/CLDerivativeKernel.cpp index ab5f9dab76..659a7cb209 100644 --- a/src/core/CL/kernels/CLDerivativeKernel.cpp +++ b/src/core/CL/kernels/CLDerivativeKernel.cpp @@ -26,11 +26,9 @@ #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" #include diff --git a/src/core/CL/kernels/CLDilateKernel.cpp b/src/core/CL/kernels/CLDilateKernel.cpp index ae948314a3..1e59c349e7 100644 --- a/src/core/CL/kernels/CLDilateKernel.cpp +++ b/src/core/CL/kernels/CLDilateKernel.cpp @@ -26,8 +26,8 @@ #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/WindowHelpers.h" using namespace arm_compute; diff --git a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp index d5d808a80f..161b221e81 100644 --- a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp +++ b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp @@ -23,19 +23,18 @@ */ #include "arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" -#include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" namespace arm_compute diff --git a/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp b/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp index c8c7fb03b8..c7f9df0baa 100644 --- a/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp +++ b/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp @@ -24,9 +24,10 @@ #include "arm_compute/core/CL/kernels/CLElementWiseUnaryLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/utils/misc/Cast.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/WindowHelpers.h" +#include "support/Cast.h" #include "support/StringSupport.h" namespace arm_compute diff --git a/src/core/CL/kernels/CLElementwiseOperationKernel.cpp b/src/core/CL/kernels/CLElementwiseOperationKernel.cpp index ec33500f20..f0712f5863 100644 --- a/src/core/CL/kernels/CLElementwiseOperationKernel.cpp +++ b/src/core/CL/kernels/CLElementwiseOperationKernel.cpp @@ -24,9 +24,11 @@ #include "arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h" #include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/utils/misc/Cast.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" +#include "support/Cast.h" #include "support/StringSupport.h" #include diff --git a/src/core/CL/kernels/CLErodeKernel.cpp b/src/core/CL/kernels/CLErodeKernel.cpp index a5eb79f73b..29a32979a3 100644 --- a/src/core/CL/kernels/CLErodeKernel.cpp +++ b/src/core/CL/kernels/CLErodeKernel.cpp @@ -26,8 +26,8 @@ #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/WindowHelpers.h" using namespace arm_compute; diff --git a/src/core/CL/kernels/CLFFTDigitReverseKernel.cpp b/src/core/CL/kernels/CLFFTDigitReverseKernel.cpp index 30bca2f0f9..0478f550f9 100644 --- a/src/core/CL/kernels/CLFFTDigitReverseKernel.cpp +++ b/src/core/CL/kernels/CLFFTDigitReverseKernel.cpp @@ -24,11 +24,11 @@ #include "arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Window.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" namespace arm_compute diff --git a/src/core/CL/kernels/CLFFTRadixStageKernel.cpp b/src/core/CL/kernels/CLFFTRadixStageKernel.cpp index 6c36338dae..7b17a227e1 100644 --- a/src/core/CL/kernels/CLFFTRadixStageKernel.cpp +++ b/src/core/CL/kernels/CLFFTRadixStageKernel.cpp @@ -25,12 +25,12 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/Utils.h" -#include "arm_compute/core/Window.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" #include diff --git a/src/core/CL/kernels/CLFFTScaleKernel.cpp b/src/core/CL/kernels/CLFFTScaleKernel.cpp index ac5f2b38c3..49fcbb6c7b 100644 --- a/src/core/CL/kernels/CLFFTScaleKernel.cpp +++ b/src/core/CL/kernels/CLFFTScaleKernel.cpp @@ -24,11 +24,11 @@ #include "arm_compute/core/CL/kernels/CLFFTScaleKernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Window.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" namespace arm_compute diff --git a/src/core/CL/kernels/CLFastCornersKernel.cpp b/src/core/CL/kernels/CLFastCornersKernel.cpp index e71b47228e..ebdfd2741f 100644 --- a/src/core/CL/kernels/CLFastCornersKernel.cpp +++ b/src/core/CL/kernels/CLFastCornersKernel.cpp @@ -26,11 +26,9 @@ #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" -#include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" #include diff --git a/src/core/CL/kernels/CLFillBorderKernel.cpp b/src/core/CL/kernels/CLFillBorderKernel.cpp index 1ea654b5cc..e92619a242 100644 --- a/src/core/CL/kernels/CLFillBorderKernel.cpp +++ b/src/core/CL/kernels/CLFillBorderKernel.cpp @@ -27,13 +27,11 @@ #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" -#include "arm_compute/core/utils/misc/Cast.h" +#include "src/core/helpers/WindowHelpers.h" +#include "support/Cast.h" #include "support/StringSupport.h" namespace arm_compute diff --git a/src/core/CL/kernels/CLFlattenLayerKernel.cpp b/src/core/CL/kernels/CLFlattenLayerKernel.cpp index 6bd1149612..dc1d33869f 100644 --- a/src/core/CL/kernels/CLFlattenLayerKernel.cpp +++ b/src/core/CL/kernels/CLFlattenLayerKernel.cpp @@ -23,9 +23,10 @@ */ #include "arm_compute/core/CL/kernels/CLFlattenLayerKernel.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" using namespace arm_compute::misc::shape_calculator; diff --git a/src/core/CL/kernels/CLFloorKernel.cpp b/src/core/CL/kernels/CLFloorKernel.cpp index 09f5f61a50..8884f3fe36 100644 --- a/src/core/CL/kernels/CLFloorKernel.cpp +++ b/src/core/CL/kernels/CLFloorKernel.cpp @@ -25,14 +25,14 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Helpers.h" -#include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" namespace arm_compute diff --git a/src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp b/src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp index b582295f44..61e2b2700a 100644 --- a/src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp +++ b/src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp @@ -25,12 +25,13 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" -#include "arm_compute/core/Window.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" diff --git a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.cpp b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.cpp index d30a9e5d18..cc98845e0f 100644 --- a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.cpp @@ -23,19 +23,18 @@ */ #include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" #include @@ -137,7 +136,7 @@ std::pair validate_and_configure_window(ITensorInfo *input0, ITe num_elems_processed_per_iteration_x = rhs_info.n0; num_elems_processed_per_iteration_y = lhs_info.m0; - win = calculate_max_window(tmp_info, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y)); + win = calculate_max_window(tmp_info, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y)); output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape())); // Collapse along the Z direction diff --git a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.cpp b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.cpp index 56b92a3d41..5469c89c30 100644 --- a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.cpp @@ -23,19 +23,18 @@ */ #include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" #include diff --git a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp index 4770329b7d..4a3ac2da81 100644 --- a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp @@ -23,19 +23,18 @@ */ #include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" #include diff --git a/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp b/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp index 6ef9fd2565..7ab96e5fa9 100644 --- a/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp @@ -23,15 +23,13 @@ */ #include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" #include @@ -148,7 +146,8 @@ void CLGEMMLowpOffsetContributionKernel::configure(ICLTensor *mm_result, const I configure(CLKernelLibrary::get().get_compile_context(), mm_result, vector_sum_col, vector_sum_row, bias, k, a_offset, b_offset); } -void CLGEMMLowpOffsetContributionKernel::configure(const CLCompileContext &compile_context, ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, +void CLGEMMLowpOffsetContributionKernel::configure(const CLCompileContext &compile_context, ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, + const ICLTensor *bias, int32_t k, int32_t a_offset, int32_t b_offset) { diff --git a/src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.cpp b/src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.cpp index 6d3aa6fbf6..85285d6704 100644 --- a/src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.cpp @@ -23,16 +23,15 @@ */ #include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" #include diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.cpp index eae66413a6..ab1b5a2203 100644 --- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.cpp @@ -25,14 +25,14 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.cpp index 430a84cfa0..ad5bac015b 100644 --- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.cpp @@ -25,14 +25,14 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" namespace arm_compute diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.cpp index 79888cdba2..8e4b291dbe 100644 --- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.cpp @@ -25,12 +25,12 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" namespace arm_compute diff --git a/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp b/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp index 31a97ca32b..826b265dbf 100644 --- a/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp @@ -23,10 +23,12 @@ */ #include "arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/KernelDescriptors.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" namespace arm_compute diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp index c2dd92c0fd..aa69ed06d1 100644 --- a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp @@ -23,20 +23,19 @@ */ #include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/Utils.h" -#include "arm_compute/core/Window.h" -#include "arm_compute/core/utils/helpers/float_ops.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" +#include "src/core/utils/helpers/float_ops.h" #include "support/StringSupport.h" #include @@ -310,7 +309,8 @@ void CLGEMMMatrixMultiplyKernel::configure(const ICLTensor *input0, const ICLTen configure(CLKernelLibrary::get().get_compile_context(), input0, input1, input2, output, alpha, beta, is_interleaved_transposed, reshape_info, fp_mixed_precision, activation_info); } -void CLGEMMMatrixMultiplyKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, +void CLGEMMMatrixMultiplyKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, + float beta, bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info, bool fp_mixed_precision, const ActivationLayerInfo &activation_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input0, input1, output); diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp index da57aa447f..cea147b10c 100644 --- a/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp @@ -23,20 +23,19 @@ */ #include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" -#include "arm_compute/core/utils/helpers/float_ops.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" +#include "src/core/utils/helpers/float_ops.h" #include "support/StringSupport.h" #include diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp index b0f0e8a81f..eaf57086a3 100644 --- a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp @@ -23,23 +23,22 @@ */ #include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/CL/gemm/CLGEMMHelpers.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" -#include "arm_compute/core/utils/helpers/float_ops.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/AccessWindowStatic.h" #include "src/core/CL/CLUtils.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/CL/gemm/CLGEMMHelpers.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" +#include "src/core/utils/helpers/float_ops.h" #include "support/StringSupport.h" #include diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp index 0ae30ed30e..912c763ed5 100644 --- a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp @@ -23,17 +23,18 @@ */ #include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/gemm/CLGEMMHelpers.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" -#include "arm_compute/core/utils/helpers/float_ops.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/AccessWindowStatic.h" #include "src/core/CL/CLUtils.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/CL/gemm/CLGEMMHelpers.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" +#include "src/core/utils/helpers/float_ops.h" #include "support/StringSupport.h" #include diff --git a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp index f52384593b..04aa061e98 100644 --- a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp @@ -23,15 +23,14 @@ */ #include "arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" namespace arm_compute diff --git a/src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.cpp b/src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.cpp index 156a657f28..f2ad677976 100644 --- a/src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.cpp +++ b/src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.cpp @@ -23,19 +23,18 @@ */ #include "arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/Utils.h" -#include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" namespace arm_compute diff --git a/src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.cpp b/src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.cpp index ce294646a0..d94e834d2c 100644 --- a/src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.cpp +++ b/src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.cpp @@ -23,20 +23,19 @@ */ #include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/CL/gemm/CLGEMMHelpers.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/Utils.h" -#include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/CL/gemm/CLGEMMHelpers.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" namespace arm_compute diff --git a/src/core/CL/kernels/CLGatherKernel.cpp b/src/core/CL/kernels/CLGatherKernel.cpp index 57759fc1c1..a8508bed2d 100644 --- a/src/core/CL/kernels/CLGatherKernel.cpp +++ b/src/core/CL/kernels/CLGatherKernel.cpp @@ -23,8 +23,9 @@ */ #include "arm_compute/core/CL/kernels/CLGatherKernel.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" #include diff --git a/src/core/CL/kernels/CLGaussian3x3Kernel.cpp b/src/core/CL/kernels/CLGaussian3x3Kernel.cpp index 08e7e27b3c..c9ed1ac0d7 100644 --- a/src/core/CL/kernels/CLGaussian3x3Kernel.cpp +++ b/src/core/CL/kernels/CLGaussian3x3Kernel.cpp @@ -26,8 +26,8 @@ #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/CL/kernels/CLGaussianPyramidKernel.cpp b/src/core/CL/kernels/CLGaussianPyramidKernel.cpp index 0e20187d1c..2686e8b32e 100644 --- a/src/core/CL/kernels/CLGaussianPyramidKernel.cpp +++ b/src/core/CL/kernels/CLGaussianPyramidKernel.cpp @@ -28,6 +28,7 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" using namespace arm_compute; diff --git a/src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp b/src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp index 3108ad87d0..a2fcbbab78 100644 --- a/src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp +++ b/src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp @@ -23,17 +23,18 @@ */ #include "arm_compute/core/CL/kernels/CLGenerateProposalsLayerKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLArray.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" -#include "arm_compute/core/Window.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" namespace arm_compute diff --git a/src/core/CL/kernels/CLHOGDescriptorKernel.cpp b/src/core/CL/kernels/CLHOGDescriptorKernel.cpp index 7f618b294b..eaf5ea4880 100644 --- a/src/core/CL/kernels/CLHOGDescriptorKernel.cpp +++ b/src/core/CL/kernels/CLHOGDescriptorKernel.cpp @@ -27,12 +27,10 @@ #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" #include diff --git a/src/core/CL/kernels/CLHOGDetectorKernel.cpp b/src/core/CL/kernels/CLHOGDetectorKernel.cpp index fbd2208894..6e14996732 100644 --- a/src/core/CL/kernels/CLHOGDetectorKernel.cpp +++ b/src/core/CL/kernels/CLHOGDetectorKernel.cpp @@ -27,12 +27,10 @@ #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLHOG.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" using namespace arm_compute; diff --git a/src/core/CL/kernels/CLHarrisCornersKernel.cpp b/src/core/CL/kernels/CLHarrisCornersKernel.cpp index 08e670f5d2..19c4e579a0 100644 --- a/src/core/CL/kernels/CLHarrisCornersKernel.cpp +++ b/src/core/CL/kernels/CLHarrisCornersKernel.cpp @@ -23,17 +23,16 @@ */ #include "arm_compute/core/CL/kernels/CLHarrisCornersKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" #include diff --git a/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp index 22b2cfcbc5..1ae2599721 100644 --- a/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp +++ b/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp @@ -25,13 +25,13 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Utils.h" -#include "arm_compute/core/Window.h" -#include "arm_compute/core/utils/misc/Cast.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/WindowHelpers.h" +#include "support/Cast.h" #include "support/StringSupport.h" diff --git a/src/core/CL/kernels/CLHistogramKernel.cpp b/src/core/CL/kernels/CLHistogramKernel.cpp index b8a4e8619d..a85429c1a0 100644 --- a/src/core/CL/kernels/CLHistogramKernel.cpp +++ b/src/core/CL/kernels/CLHistogramKernel.cpp @@ -27,12 +27,10 @@ #include "arm_compute/core/CL/ICLDistribution1D.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" #include diff --git a/src/core/CL/kernels/CLIm2ColKernel.cpp b/src/core/CL/kernels/CLIm2ColKernel.cpp index c94e313b9a..76490f82f6 100644 --- a/src/core/CL/kernels/CLIm2ColKernel.cpp +++ b/src/core/CL/kernels/CLIm2ColKernel.cpp @@ -23,18 +23,18 @@ */ #include "arm_compute/core/CL/kernels/CLIm2ColKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" #include diff --git a/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.cpp index 2ad5233de8..e97b856456 100644 --- a/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.cpp @@ -25,12 +25,13 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" -#include "arm_compute/core/Window.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" diff --git a/src/core/CL/kernels/CLIntegralImageKernel.cpp b/src/core/CL/kernels/CLIntegralImageKernel.cpp index aff4bd9cea..82f6da85a5 100644 --- a/src/core/CL/kernels/CLIntegralImageKernel.cpp +++ b/src/core/CL/kernels/CLIntegralImageKernel.cpp @@ -28,9 +28,8 @@ #include "arm_compute/core/CL/OpenCL.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" #include diff --git a/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp b/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp index a68d8db3c0..9936e29c5f 100644 --- a/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp +++ b/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp @@ -25,13 +25,14 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" diff --git a/src/core/CL/kernels/CLLKTrackerKernel.cpp b/src/core/CL/kernels/CLLKTrackerKernel.cpp index fae5fe2c8e..0fa2e703ec 100644 --- a/src/core/CL/kernels/CLLKTrackerKernel.cpp +++ b/src/core/CL/kernels/CLLKTrackerKernel.cpp @@ -23,16 +23,15 @@ */ #include "arm_compute/core/CL/kernels/CLLKTrackerKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLArray.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Coordinates.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp index 0da0d4ca1f..6e4c45eab7 100644 --- a/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp +++ b/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp @@ -23,15 +23,13 @@ */ #include "arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/Utils.h" -#include "arm_compute/core/Window.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/WindowHelpers.h" namespace arm_compute { diff --git a/src/core/CL/kernels/CLMagnitudePhaseKernel.cpp b/src/core/CL/kernels/CLMagnitudePhaseKernel.cpp index ef8ebd52e5..dc130d0ff9 100644 --- a/src/core/CL/kernels/CLMagnitudePhaseKernel.cpp +++ b/src/core/CL/kernels/CLMagnitudePhaseKernel.cpp @@ -27,11 +27,10 @@ #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" #include diff --git a/src/core/CL/kernels/CLMaxUnpoolingLayerKernel.cpp b/src/core/CL/kernels/CLMaxUnpoolingLayerKernel.cpp index 08c74642f4..a78996ddae 100644 --- a/src/core/CL/kernels/CLMaxUnpoolingLayerKernel.cpp +++ b/src/core/CL/kernels/CLMaxUnpoolingLayerKernel.cpp @@ -23,19 +23,18 @@ */ #include "arm_compute/core/CL/kernels/CLMaxUnpoolingLayerKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/Utils.h" -#include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" namespace arm_compute diff --git a/src/core/CL/kernels/CLMeanStdDevKernel.cpp b/src/core/CL/kernels/CLMeanStdDevKernel.cpp index 33099c928d..5acc3ac3d6 100644 --- a/src/core/CL/kernels/CLMeanStdDevKernel.cpp +++ b/src/core/CL/kernels/CLMeanStdDevKernel.cpp @@ -25,14 +25,12 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Window.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp b/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp index 5ecbb4b2a6..82a22a9f19 100644 --- a/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp +++ b/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp @@ -25,14 +25,13 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Window.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" namespace arm_compute diff --git a/src/core/CL/kernels/CLMedian3x3Kernel.cpp b/src/core/CL/kernels/CLMedian3x3Kernel.cpp index 5f8c9e5a93..4b899502f9 100644 --- a/src/core/CL/kernels/CLMedian3x3Kernel.cpp +++ b/src/core/CL/kernels/CLMedian3x3Kernel.cpp @@ -26,8 +26,8 @@ #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" using namespace arm_compute; diff --git a/src/core/CL/kernels/CLMemsetKernel.cpp b/src/core/CL/kernels/CLMemsetKernel.cpp index f591c2f6d5..186ed2a38c 100644 --- a/src/core/CL/kernels/CLMemsetKernel.cpp +++ b/src/core/CL/kernels/CLMemsetKernel.cpp @@ -23,8 +23,8 @@ */ #include "arm_compute/core/CL/kernels/CLMemsetKernel.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" namespace arm_compute diff --git a/src/core/CL/kernels/CLMinMaxLayerKernel.cpp b/src/core/CL/kernels/CLMinMaxLayerKernel.cpp index 5f0e48dbb9..bf645f82e9 100644 --- a/src/core/CL/kernels/CLMinMaxLayerKernel.cpp +++ b/src/core/CL/kernels/CLMinMaxLayerKernel.cpp @@ -23,14 +23,15 @@ */ #include "arm_compute/core/CL/kernels/CLMinMaxLayerKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" #include diff --git a/src/core/CL/kernels/CLMinMaxLocationKernel.cpp b/src/core/CL/kernels/CLMinMaxLocationKernel.cpp index 9bbda40782..634b58077a 100644 --- a/src/core/CL/kernels/CLMinMaxLocationKernel.cpp +++ b/src/core/CL/kernels/CLMinMaxLocationKernel.cpp @@ -28,7 +28,8 @@ #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" #include diff --git a/src/core/CL/kernels/CLNonLinearFilterKernel.cpp b/src/core/CL/kernels/CLNonLinearFilterKernel.cpp index 16e5113c62..0a8472bf04 100644 --- a/src/core/CL/kernels/CLNonLinearFilterKernel.cpp +++ b/src/core/CL/kernels/CLNonLinearFilterKernel.cpp @@ -27,12 +27,11 @@ #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp b/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp index 958d94ce11..9c6d44b6c5 100644 --- a/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp +++ b/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp @@ -30,6 +30,7 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/CL/kernels/CLNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLNormalizationLayerKernel.cpp index 7d8e5db2b4..686e6f1b26 100644 --- a/src/core/CL/kernels/CLNormalizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLNormalizationLayerKernel.cpp @@ -23,15 +23,18 @@ */ #include "arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Window.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/NormalizationHelpers.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" using namespace arm_compute; diff --git a/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp b/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp index 00bdac3441..407ce6626b 100644 --- a/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp +++ b/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp @@ -23,15 +23,16 @@ */ #include "arm_compute/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" -#include "arm_compute/core/Window.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" diff --git a/src/core/CL/kernels/CLPadLayerKernel.cpp b/src/core/CL/kernels/CLPadLayerKernel.cpp index b2432a058d..45729738fb 100644 --- a/src/core/CL/kernels/CLPadLayerKernel.cpp +++ b/src/core/CL/kernels/CLPadLayerKernel.cpp @@ -26,6 +26,8 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" namespace arm_compute diff --git a/src/core/CL/kernels/CLPermuteKernel.cpp b/src/core/CL/kernels/CLPermuteKernel.cpp index dc2d6fe4b4..620665791f 100644 --- a/src/core/CL/kernels/CLPermuteKernel.cpp +++ b/src/core/CL/kernels/CLPermuteKernel.cpp @@ -23,8 +23,9 @@ */ #include "arm_compute/core/CL/kernels/CLPermuteKernel.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" using namespace arm_compute; diff --git a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp index 229937ef31..a7bd4dad60 100644 --- a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp +++ b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp @@ -25,11 +25,13 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/utils/misc/Cast.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" +#include "support/Cast.h" #include "support/StringSupport.h" namespace arm_compute diff --git a/src/core/CL/kernels/CLPoolingLayerKernel.cpp b/src/core/CL/kernels/CLPoolingLayerKernel.cpp index 1771834aac..0570887b91 100644 --- a/src/core/CL/kernels/CLPoolingLayerKernel.cpp +++ b/src/core/CL/kernels/CLPoolingLayerKernel.cpp @@ -23,18 +23,19 @@ */ #include "arm_compute/core/CL/kernels/CLPoolingLayerKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLKernel.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" -#include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" #include diff --git a/src/core/CL/kernels/CLPriorBoxLayerKernel.cpp b/src/core/CL/kernels/CLPriorBoxLayerKernel.cpp index 3429ef75d1..202e9fbb37 100644 --- a/src/core/CL/kernels/CLPriorBoxLayerKernel.cpp +++ b/src/core/CL/kernels/CLPriorBoxLayerKernel.cpp @@ -25,13 +25,14 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" -#include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" diff --git a/src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.cpp b/src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.cpp index 2f676d30d1..ff6cc86103 100644 --- a/src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.cpp +++ b/src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.cpp @@ -23,8 +23,9 @@ */ #include "arm_compute/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" namespace arm_compute diff --git a/src/core/CL/kernels/CLQuantizationLayerKernel.cpp b/src/core/CL/kernels/CLQuantizationLayerKernel.cpp index f6b08884e7..983cbedc0f 100644 --- a/src/core/CL/kernels/CLQuantizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLQuantizationLayerKernel.cpp @@ -23,16 +23,16 @@ */ #include "arm_compute/core/CL/kernels/CLQuantizationLayerKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" namespace arm_compute diff --git a/src/core/CL/kernels/CLROIAlignLayerKernel.cpp b/src/core/CL/kernels/CLROIAlignLayerKernel.cpp index 3f2a904f58..ca6c6fad1a 100644 --- a/src/core/CL/kernels/CLROIAlignLayerKernel.cpp +++ b/src/core/CL/kernels/CLROIAlignLayerKernel.cpp @@ -23,18 +23,19 @@ */ #include "arm_compute/core/CL/kernels/CLROIAlignLayerKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLArray.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" -#include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" using namespace arm_compute::misc::shape_calculator; diff --git a/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp b/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp index c2ed32653a..55fe5a5321 100644 --- a/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp +++ b/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp @@ -23,17 +23,18 @@ */ #include "arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLArray.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" -#include "arm_compute/core/Window.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" #include diff --git a/src/core/CL/kernels/CLRangeKernel.cpp b/src/core/CL/kernels/CLRangeKernel.cpp index d46cdd78da..a4c30b63c2 100644 --- a/src/core/CL/kernels/CLRangeKernel.cpp +++ b/src/core/CL/kernels/CLRangeKernel.cpp @@ -24,9 +24,11 @@ #include "arm_compute/core/CL/kernels/CLRangeKernel.h" #include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Utils.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" using namespace arm_compute; diff --git a/src/core/CL/kernels/CLReductionOperationKernel.cpp b/src/core/CL/kernels/CLReductionOperationKernel.cpp index 0ba63cc4e0..325e4b994c 100644 --- a/src/core/CL/kernels/CLReductionOperationKernel.cpp +++ b/src/core/CL/kernels/CLReductionOperationKernel.cpp @@ -23,17 +23,18 @@ */ #include "arm_compute/core/CL/kernels/CLReductionOperationKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" diff --git a/src/core/CL/kernels/CLRemapKernel.cpp b/src/core/CL/kernels/CLRemapKernel.cpp index fe8c81a3b9..8d3f41b35f 100644 --- a/src/core/CL/kernels/CLRemapKernel.cpp +++ b/src/core/CL/kernels/CLRemapKernel.cpp @@ -23,15 +23,14 @@ */ #include "arm_compute/core/CL/kernels/CLRemapKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/CL/kernels/CLReorgLayerKernel.cpp b/src/core/CL/kernels/CLReorgLayerKernel.cpp index ab81a8fca3..ade7761b91 100644 --- a/src/core/CL/kernels/CLReorgLayerKernel.cpp +++ b/src/core/CL/kernels/CLReorgLayerKernel.cpp @@ -26,12 +26,12 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" #include diff --git a/src/core/CL/kernels/CLReshapeLayerKernel.cpp b/src/core/CL/kernels/CLReshapeLayerKernel.cpp index 3daf21a9a7..e08970992e 100644 --- a/src/core/CL/kernels/CLReshapeLayerKernel.cpp +++ b/src/core/CL/kernels/CLReshapeLayerKernel.cpp @@ -23,18 +23,18 @@ */ #include "arm_compute/core/CL/kernels/CLReshapeLayerKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" -#include "arm_compute/core/Window.h" -#include "arm_compute/core/utils/misc/Cast.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/WindowHelpers.h" +#include "support/Cast.h" #include diff --git a/src/core/CL/kernels/CLReverseKernel.cpp b/src/core/CL/kernels/CLReverseKernel.cpp index 6546ced72e..f8240984d1 100644 --- a/src/core/CL/kernels/CLReverseKernel.cpp +++ b/src/core/CL/kernels/CLReverseKernel.cpp @@ -25,12 +25,12 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Window.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" namespace arm_compute diff --git a/src/core/CL/kernels/CLScaleKernel.cpp b/src/core/CL/kernels/CLScaleKernel.cpp index 2e7ee36bcb..8233f210b4 100644 --- a/src/core/CL/kernels/CLScaleKernel.cpp +++ b/src/core/CL/kernels/CLScaleKernel.cpp @@ -23,16 +23,17 @@ */ #include "arm_compute/core/CL/kernels/CLScaleKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLKernel.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" #include "src/core/utils/ScaleUtils.h" diff --git a/src/core/CL/kernels/CLScharr3x3Kernel.cpp b/src/core/CL/kernels/CLScharr3x3Kernel.cpp index 3172966b8f..1e33af3047 100644 --- a/src/core/CL/kernels/CLScharr3x3Kernel.cpp +++ b/src/core/CL/kernels/CLScharr3x3Kernel.cpp @@ -26,11 +26,9 @@ #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/CL/kernels/CLSelectKernel.cpp b/src/core/CL/kernels/CLSelectKernel.cpp index dcac78cca3..d9a1044e1f 100644 --- a/src/core/CL/kernels/CLSelectKernel.cpp +++ b/src/core/CL/kernels/CLSelectKernel.cpp @@ -25,12 +25,13 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" -#include "arm_compute/core/Window.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" diff --git a/src/core/CL/kernels/CLSobel3x3Kernel.cpp b/src/core/CL/kernels/CLSobel3x3Kernel.cpp index 86dcf22258..89e5207c44 100644 --- a/src/core/CL/kernels/CLSobel3x3Kernel.cpp +++ b/src/core/CL/kernels/CLSobel3x3Kernel.cpp @@ -26,11 +26,9 @@ #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" #include diff --git a/src/core/CL/kernels/CLSobel5x5Kernel.cpp b/src/core/CL/kernels/CLSobel5x5Kernel.cpp index e010fdda75..3e765e47fb 100644 --- a/src/core/CL/kernels/CLSobel5x5Kernel.cpp +++ b/src/core/CL/kernels/CLSobel5x5Kernel.cpp @@ -26,11 +26,9 @@ #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" #include diff --git a/src/core/CL/kernels/CLSobel7x7Kernel.cpp b/src/core/CL/kernels/CLSobel7x7Kernel.cpp index c2b4bec494..37ceaba502 100644 --- a/src/core/CL/kernels/CLSobel7x7Kernel.cpp +++ b/src/core/CL/kernels/CLSobel7x7Kernel.cpp @@ -26,11 +26,9 @@ #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" #include diff --git a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp index c7881b9f5f..5c0acda41a 100644 --- a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp +++ b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp @@ -23,18 +23,19 @@ */ #include "arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/KernelDescriptors.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" -#include "arm_compute/core/Window.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" #include diff --git a/src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp b/src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp index 3e0ac74f69..c6f70c3c09 100644 --- a/src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp +++ b/src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp @@ -24,9 +24,11 @@ #include "arm_compute/core/CL/kernels/CLSpaceToBatchLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" using namespace arm_compute::misc::shape_calculator; diff --git a/src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp b/src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp index 4b6c1be8c2..2d46aade34 100644 --- a/src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp +++ b/src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp @@ -24,9 +24,11 @@ #include "arm_compute/core/CL/kernels/CLSpaceToDepthLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" using namespace arm_compute::misc::shape_calculator; diff --git a/src/core/CL/kernels/CLStackLayerKernel.cpp b/src/core/CL/kernels/CLStackLayerKernel.cpp index c283c440a3..5055065779 100644 --- a/src/core/CL/kernels/CLStackLayerKernel.cpp +++ b/src/core/CL/kernels/CLStackLayerKernel.cpp @@ -25,13 +25,14 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" -#include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" diff --git a/src/core/CL/kernels/CLStridedSliceKernel.cpp b/src/core/CL/kernels/CLStridedSliceKernel.cpp index f7b7290a3f..b632e05d84 100644 --- a/src/core/CL/kernels/CLStridedSliceKernel.cpp +++ b/src/core/CL/kernels/CLStridedSliceKernel.cpp @@ -24,10 +24,12 @@ #include "arm_compute/core/CL/kernels/CLStridedSliceKernel.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/utils/helpers/bit_ops.h" #include "arm_compute/core/utils/helpers/tensor_transform.h" -#include "arm_compute/core/utils/misc/Cast.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" +#include "src/core/utils/helpers/bit_ops.h" +#include "support/Cast.h" #include "support/StringSupport.h" namespace arm_compute diff --git a/src/core/CL/kernels/CLTileKernel.cpp b/src/core/CL/kernels/CLTileKernel.cpp index bba152530c..43c8953363 100644 --- a/src/core/CL/kernels/CLTileKernel.cpp +++ b/src/core/CL/kernels/CLTileKernel.cpp @@ -23,8 +23,9 @@ */ #include "arm_compute/core/CL/kernels/CLTileKernel.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" namespace arm_compute diff --git a/src/core/CL/kernels/CLTransposeKernel.cpp b/src/core/CL/kernels/CLTransposeKernel.cpp index a47d956620..bd910196e9 100644 --- a/src/core/CL/kernels/CLTransposeKernel.cpp +++ b/src/core/CL/kernels/CLTransposeKernel.cpp @@ -23,18 +23,18 @@ */ #include "arm_compute/core/CL/kernels/CLTransposeKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" -#include "arm_compute/core/AccessWindowTranspose.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/Utils.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/AccessWindowTranspose.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/CL/kernels/CLUpsampleLayerKernel.cpp b/src/core/CL/kernels/CLUpsampleLayerKernel.cpp index 101055001c..a4fc10f26a 100644 --- a/src/core/CL/kernels/CLUpsampleLayerKernel.cpp +++ b/src/core/CL/kernels/CLUpsampleLayerKernel.cpp @@ -23,16 +23,18 @@ */ #include "arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" namespace arm_compute diff --git a/src/core/CL/kernels/CLWarpAffineKernel.cpp b/src/core/CL/kernels/CLWarpAffineKernel.cpp index e8da803628..95a7c1b875 100644 --- a/src/core/CL/kernels/CLWarpAffineKernel.cpp +++ b/src/core/CL/kernels/CLWarpAffineKernel.cpp @@ -23,7 +23,6 @@ */ #include "arm_compute/core/CL/kernels/CLWarpAffineKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" @@ -32,6 +31,8 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" #include diff --git a/src/core/CL/kernels/CLWarpPerspectiveKernel.cpp b/src/core/CL/kernels/CLWarpPerspectiveKernel.cpp index dc7c359849..2fe1feb485 100644 --- a/src/core/CL/kernels/CLWarpPerspectiveKernel.cpp +++ b/src/core/CL/kernels/CLWarpPerspectiveKernel.cpp @@ -23,7 +23,6 @@ */ #include "arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" @@ -32,6 +31,8 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/CL/kernels/CLWeightsReshapeKernel.cpp b/src/core/CL/kernels/CLWeightsReshapeKernel.cpp index 267957e51a..f69967265a 100644 --- a/src/core/CL/kernels/CLWeightsReshapeKernel.cpp +++ b/src/core/CL/kernels/CLWeightsReshapeKernel.cpp @@ -25,6 +25,8 @@ #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" namespace arm_compute diff --git a/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp b/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp index 76100c2a63..27c650894c 100644 --- a/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp +++ b/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp @@ -23,16 +23,16 @@ */ #include "arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Utils.h" -#include "arm_compute/core/Window.h" -#include "arm_compute/core/utils/helpers/tensor_info.h" -#include "arm_compute/core/utils/misc/Cast.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/WindowHelpers.h" +#include "src/core/utils/helpers/tensor_info.h" +#include "support/Cast.h" #include "support/StringSupport.h" diff --git a/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp b/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp index 0377eb76b1..5ef2cc46ee 100644 --- a/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp +++ b/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp @@ -23,17 +23,16 @@ */ #include "arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Utils.h" -#include "arm_compute/core/Window.h" -#include "arm_compute/core/utils/helpers/tensor_info.h" -#include "arm_compute/core/utils/misc/Cast.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/WindowHelpers.h" +#include "src/core/utils/helpers/tensor_info.h" +#include "support/Cast.h" #include "support/StringSupport.h" diff --git a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp index d40597fbb5..8b0aaf3227 100644 --- a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp +++ b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp @@ -25,13 +25,12 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Utils.h" -#include "arm_compute/core/Window.h" -#include "arm_compute/core/utils/misc/Cast.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/WindowHelpers.h" +#include "support/Cast.h" #include "support/StringSupport.h" diff --git a/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp b/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp index 4a1c48a258..7fab208221 100644 --- a/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp +++ b/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp @@ -23,10 +23,8 @@ */ #include "arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/IAccessWindow.h" @@ -36,6 +34,10 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" diff --git a/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp b/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp index c4c2b08a81..15c239e849 100644 --- a/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp +++ b/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp @@ -23,10 +23,8 @@ */ #include "arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" #include "arm_compute/core/Error.h" @@ -34,6 +32,10 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" using namespace arm_compute; diff --git a/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp b/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp index 19f61b19b3..8ae0255319 100644 --- a/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp +++ b/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp @@ -23,10 +23,8 @@ */ #include "arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/IAccessWindow.h" @@ -36,6 +34,10 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" diff --git a/src/core/CL/kernels/CLYOLOLayerKernel.cpp b/src/core/CL/kernels/CLYOLOLayerKernel.cpp index 132d5d1cd0..0c7588d740 100644 --- a/src/core/CL/kernels/CLYOLOLayerKernel.cpp +++ b/src/core/CL/kernels/CLYOLOLayerKernel.cpp @@ -23,18 +23,20 @@ */ #include "arm_compute/core/CL/kernels/CLYOLOLayerKernel.h" +#include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Window.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/Types.h" #include "support/StringSupport.h" namespace arm_compute diff --git a/src/core/CPP/ICPPSimpleKernel.cpp b/src/core/CPP/ICPPSimpleKernel.cpp index 126bf548e2..9e4df5ec8a 100644 --- a/src/core/CPP/ICPPSimpleKernel.cpp +++ b/src/core/CPP/ICPPSimpleKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,6 +26,7 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/ITensor.h" +#include "src/core/helpers/WindowHelpers.h" namespace arm_compute { @@ -71,4 +72,4 @@ Status ICPPSimpleKernel::validate(const ITensorInfo *input, const ITensorInfo *o return Status{}; } -} // namespace arm_compute \ No newline at end of file +} // namespace arm_compute diff --git a/src/core/CPP/Validate.h b/src/core/CPP/Validate.h new file mode 100644 index 0000000000..9e95f72c3f --- /dev/null +++ b/src/core/CPP/Validate.h @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CPP_VALIDATE_H +#define ARM_COMPUTE_CPP_VALIDATE_H + +#include "arm_compute/core/Validate.h" + +namespace arm_compute +{ +/** Return an error if the data type of the passed tensor info is FP16 and FP16 support is not compiled in. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] tensor_info Tensor info to validate. + * + * @return Status + */ +inline Status error_on_unsupported_cpu_fp16(const char *function, const char *file, const int line, + const ITensorInfo *tensor_info) +{ + ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info == nullptr, function, file, line); +#ifndef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(tensor_info->data_type() == DataType::F16, + function, file, line, "This CPU architecture does not support F16 data type, you need v8.2 or above"); +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ + return Status {}; +} + +/** Return an error if the data type of the passed tensor info is BFLOAT16 and BFLOAT16 support is not compiled in. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] tensor_info Tensor info to validate. + * + * @return Status + */ +inline Status error_on_unsupported_cpu_bf16(const char *function, const char *file, const int line, + const ITensorInfo *tensor_info) +{ + ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info == nullptr, function, file, line); +#if !(defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16)) + ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(tensor_info->data_type() == DataType::BFLOAT16, + function, file, line, "This CPU architecture does not support BFloat16 data type, you need v8.6 or above"); +#endif /* !(defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16)) */ + return Status {}; +} + +/** Return an error if the data type of the passed tensor is FP16 and FP16 support is not compiled in. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] tensor Tensor to validate. + * + * @return Status + */ +inline Status error_on_unsupported_cpu_fp16(const char *function, const char *file, const int line, + const ITensor *tensor) +{ + ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line); + ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_cpu_fp16(function, file, line, tensor->info())); + return Status{}; +} + +/** Return an error if the data type of the passed tensor is BFLOAT16 and BFLOAT16 support is not compiled in. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] tensor Tensor to validate. + * + * @return Status + */ +inline Status error_on_unsupported_cpu_bf16(const char *function, const char *file, const int line, + const ITensor *tensor) +{ + ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line); + ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_cpu_bf16(function, file, line, tensor->info())); + return Status{}; +} + +#define ARM_COMPUTE_ERROR_ON_CPU_F16_UNSUPPORTED(tensor) \ + ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_unsupported_cpu_fp16(__func__, __FILE__, __LINE__, tensor)) + +#define ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(tensor) \ + ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_cpu_fp16(__func__, __FILE__, __LINE__, tensor)) + +#define ARM_COMPUTE_ERROR_ON_CPU_BF16_UNSUPPORTED(tensor) \ + ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_unsupported_cpu_bf16(__func__, __FILE__, __LINE__, tensor)) + +#define ARM_COMPUTE_RETURN_ERROR_ON_CPU_BF16_UNSUPPORTED(tensor) \ + ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_cpu_bf16(__func__, __FILE__, __LINE__, tensor)) +} // namespace arm_compute +#endif /* ARM_COMPUTE_CPP_VALIDATE_H */ diff --git a/src/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.cpp b/src/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.cpp index 917a6ad08b..fb1754247c 100644 --- a/src/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.cpp +++ b/src/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,8 +23,8 @@ */ #include "arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/CPP/kernels/CPPCornerCandidatesKernel.cpp b/src/core/CPP/kernels/CPPCornerCandidatesKernel.cpp index a0cfb3ba8b..d7af9c9e7a 100644 --- a/src/core/CPP/kernels/CPPCornerCandidatesKernel.cpp +++ b/src/core/CPP/kernels/CPPCornerCandidatesKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,17 +23,9 @@ */ #include "arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h" -#include "arm_compute/core/Coordinates.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" -#include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" - -#include "support/Mutex.h" +#include "src/core/helpers/WindowHelpers.h" using namespace arm_compute; diff --git a/src/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.cpp b/src/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.cpp index ec03b72b6b..3166faba48 100644 --- a/src/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.cpp +++ b/src/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,7 +23,6 @@ */ #include "arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include diff --git a/src/core/CPP/kernels/CPPNonMaximumSuppressionKernel.cpp b/src/core/CPP/kernels/CPPNonMaximumSuppressionKernel.cpp index 89e3058520..c1187ff2b3 100644 --- a/src/core/CPP/kernels/CPPNonMaximumSuppressionKernel.cpp +++ b/src/core/CPP/kernels/CPPNonMaximumSuppressionKernel.cpp @@ -23,10 +23,12 @@ */ #include "arm_compute/core/CPP/kernels/CPPNonMaximumSuppressionKernel.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" + #include namespace arm_compute diff --git a/src/core/CPP/kernels/CPPPermuteKernel.cpp b/src/core/CPP/kernels/CPPPermuteKernel.cpp index 1d1f0cd30e..054c7bf05a 100644 --- a/src/core/CPP/kernels/CPPPermuteKernel.cpp +++ b/src/core/CPP/kernels/CPPPermuteKernel.cpp @@ -23,13 +23,10 @@ */ #include "arm_compute/core/CPP/kernels/CPPPermuteKernel.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/CPP/kernels/CPPTopKVKernel.cpp b/src/core/CPP/kernels/CPPTopKVKernel.cpp index 7ba8d7cdd0..d2b54e412e 100644 --- a/src/core/CPP/kernels/CPPTopKVKernel.cpp +++ b/src/core/CPP/kernels/CPPTopKVKernel.cpp @@ -22,16 +22,14 @@ * SOFTWARE. */ #include "arm_compute/core/CPP/kernels/CPPTopKVKernel.h" -#include "arm_compute/core/Coordinates.h" -#include "arm_compute/core/Error.h" + #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/Traits.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" + namespace arm_compute { namespace diff --git a/src/core/CPP/kernels/CPPUpsampleKernel.cpp b/src/core/CPP/kernels/CPPUpsampleKernel.cpp index ff4ffb6124..7ef83fb2c4 100644 --- a/src/core/CPP/kernels/CPPUpsampleKernel.cpp +++ b/src/core/CPP/kernels/CPPUpsampleKernel.cpp @@ -23,13 +23,8 @@ */ #include "arm_compute/core/CPP/kernels/CPPUpsampleKernel.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/GLES_COMPUTE/IGCSimpleKernel.cpp b/src/core/GLES_COMPUTE/IGCSimpleKernel.cpp index 6609f457e2..fb31ac8377 100644 --- a/src/core/GLES_COMPUTE/IGCSimpleKernel.cpp +++ b/src/core/GLES_COMPUTE/IGCSimpleKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -27,6 +27,7 @@ #include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/helpers/WindowHelpers.h" using namespace arm_compute; diff --git a/src/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.cpp index f0a500398b..5e8accc95d 100644 --- a/src/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.cpp +++ b/src/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.cpp @@ -32,6 +32,7 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" #include diff --git a/src/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.cpp index 1c02f41286..0173b81cf8 100644 --- a/src/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.cpp +++ b/src/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.cpp @@ -32,6 +32,8 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" #include diff --git a/src/core/GLES_COMPUTE/kernels/GCArithmeticAdditionKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCArithmeticAdditionKernel.cpp index 06c34863d7..f31c8ca156 100644 --- a/src/core/GLES_COMPUTE/kernels/GCArithmeticAdditionKernel.cpp +++ b/src/core/GLES_COMPUTE/kernels/GCArithmeticAdditionKernel.cpp @@ -34,6 +34,8 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" #include diff --git a/src/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.cpp index 3bd34acb92..9281ce5ffb 100644 --- a/src/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.cpp +++ b/src/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.cpp @@ -23,7 +23,6 @@ */ #include "arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" #include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" #include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" @@ -31,6 +30,9 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" diff --git a/src/core/GLES_COMPUTE/kernels/GCCol2ImKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCCol2ImKernel.cpp index 4fe6484cf8..5781c564ea 100644 --- a/src/core/GLES_COMPUTE/kernels/GCCol2ImKernel.cpp +++ b/src/core/GLES_COMPUTE/kernels/GCCol2ImKernel.cpp @@ -24,7 +24,6 @@ #include "arm_compute/core/GLES_COMPUTE/kernels/GCCol2ImKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" #include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" @@ -33,6 +32,9 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" using namespace arm_compute; @@ -42,7 +44,7 @@ GCCol2ImKernel::GCCol2ImKernel() { } -void GCCol2ImKernel::configure(const IGCTensor *input, IGCTensor *output, +void GCCol2ImKernel::configure(const IGCTensor *input, IGCTensor *output, std::pair convolved_dims) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); diff --git a/src/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.cpp index 458cb639a3..3256f11e74 100644 --- a/src/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.cpp +++ b/src/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.cpp @@ -32,6 +32,8 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" diff --git a/src/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.cpp index cb70dae3ec..95d487b4dd 100644 --- a/src/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.cpp +++ b/src/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.cpp @@ -23,7 +23,6 @@ */ #include "arm_compute/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" #include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" @@ -34,6 +33,9 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" using namespace arm_compute; diff --git a/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp index 302b21be0d..9ce8acea09 100644 --- a/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp +++ b/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp @@ -23,7 +23,6 @@ */ #include "arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" #include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" @@ -33,6 +32,9 @@ #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" using namespace arm_compute; @@ -44,7 +46,7 @@ GCDirectConvolutionLayerKernel::GCDirectConvolutionLayerKernel() } template -BorderSize GCDirectConvolutionLayerKernel::border_size() const +BorderSize GCDirectConvolutionLayerKernel::border_size() const { return _border_size; } @@ -70,8 +72,8 @@ void GCDirectConvolutionLayerKernel::configure(const IGCTensor *inp } // Get convolved dimensions - unsigned int owidth = 0; - unsigned int oheight = 0; + unsigned int owidth = 0; + unsigned int oheight = 0; std::tie(owidth, oheight) = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), kernel_size, kernel_size, conv_info); TensorShape output_shape = input->info()->tensor_shape(); @@ -238,20 +240,20 @@ void GCDirectConvolutionLayerKernel::configure(const IGCTensor *inp num_elems_written_per_iteration_x = 4; #elif defined(PROCESS_4X_2Y_1Z) options.emplace("#define PROCESS_4X_2Y_1Z"); - num_elems_read_per_iteration_x = 4; - num_elems_read_per_iteration_y = 2; + num_elems_read_per_iteration_x = 4; + num_elems_read_per_iteration_y = 2; num_elems_written_per_iteration_x = 4; num_elems_written_per_iteration_y = 2; #elif defined(PROCESS_4X_3Y_1Z) options.emplace("#define PROCESS_4X_3Y_1Z"); - num_elems_read_per_iteration_x = 4; - num_elems_read_per_iteration_y = 3; + num_elems_read_per_iteration_x = 4; + num_elems_read_per_iteration_y = 3; num_elems_written_per_iteration_x = 4; num_elems_written_per_iteration_y = 3; #elif defined(PROCESS_4X_4Y_1Z) options.emplace("#define PROCESS_4X_4Y_1Z"); - num_elems_read_per_iteration_x = 4; - num_elems_read_per_iteration_y = 4; + num_elems_read_per_iteration_x = 4; + num_elems_read_per_iteration_y = 4; num_elems_written_per_iteration_x = 4; num_elems_written_per_iteration_y = 4; #elif defined(PROCESS_4X_2Y_2Z) diff --git a/src/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.cpp index 5c6722af6a..bda6599f86 100644 --- a/src/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.cpp +++ b/src/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.cpp @@ -32,6 +32,8 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" #include diff --git a/src/core/GLES_COMPUTE/kernels/GCFillBorderKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCFillBorderKernel.cpp index 3b3118bc3d..7ffcdd2f3f 100644 --- a/src/core/GLES_COMPUTE/kernels/GCFillBorderKernel.cpp +++ b/src/core/GLES_COMPUTE/kernels/GCFillBorderKernel.cpp @@ -32,6 +32,8 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" #include diff --git a/src/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.cpp b/src/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.cpp index e0f7e957d8..d395759558 100644 --- a/src/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.cpp +++ b/src/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.cpp @@ -33,6 +33,8 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" using namespace arm_compute; diff --git a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.cpp index c9eb4337fa..66fdde5473 100644 --- a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.cpp +++ b/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.cpp @@ -23,7 +23,6 @@ */ #include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" #include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" @@ -33,6 +32,9 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" using namespace arm_compute; diff --git a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.cpp index e8298bc327..daad70bba9 100644 --- a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.cpp +++ b/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.cpp @@ -32,6 +32,8 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" using namespace arm_compute; diff --git a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.cpp index dd03faf2df..2f69728b61 100644 --- a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.cpp +++ b/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.cpp @@ -23,8 +23,6 @@ */ #include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" -#include "arm_compute/core/AccessWindowTranspose.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" #include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" @@ -37,6 +35,10 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/AccessWindowTranspose.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" #include diff --git a/src/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.cpp index 4190163694..1d6ef3d0e8 100644 --- a/src/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.cpp +++ b/src/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.cpp @@ -23,7 +23,6 @@ */ #include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.h" -#include "arm_compute/core/AccessWindowTranspose.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" #include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" @@ -33,6 +32,9 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/AccessWindowTranspose.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" #include diff --git a/src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp index 64f2d63fec..c12dd38cb4 100644 --- a/src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp +++ b/src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp @@ -24,7 +24,6 @@ #include "arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" #include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" @@ -35,6 +34,9 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" #include @@ -91,7 +93,7 @@ void GCIm2ColKernel::configure(const IGCTensor *input, IGCTensor *output, const int stride_y = 0; std::tie(stride_x, stride_y) = conv_info.stride(); - _kernel_dims = std::make_pair(kernel_dims.width, kernel_dims.height); + _kernel_dims = std::make_pair(kernel_dims.width, kernel_dims.height); const bool run_img2col_reduced = (output->info()->dimension(0) == (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2))) && (TensorShape::num_max_dimensions >= 4) && (std::equal(input->info()->tensor_shape().cbegin() + 3, @@ -109,9 +111,9 @@ void GCIm2ColKernel::configure(const IGCTensor *input, IGCTensor *output, const } build_opts.emplace("#define IM2COL_GENERIC"); - _convolved_dims = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), - kernel_dims.width, kernel_dims.height, - conv_info, dilation); + _convolved_dims = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), + kernel_dims.width, kernel_dims.height, + conv_info, dilation); _num_elems_processed_per_iteration = (input->info()->data_type() == DataType::F32) ? 1 : 2; build_opts.emplace("#define KERNEL_WIDTH " + support::cpp11::to_string(kernel_dims.width)); diff --git a/src/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.cpp index 5fa1987bf1..c29d9fc4d5 100644 --- a/src/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.cpp +++ b/src/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.cpp @@ -31,6 +31,8 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" #include diff --git a/src/core/GLES_COMPUTE/kernels/GCNormalizePlanarYUVLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCNormalizePlanarYUVLayerKernel.cpp index 6a79990484..971b540a83 100644 --- a/src/core/GLES_COMPUTE/kernels/GCNormalizePlanarYUVLayerKernel.cpp +++ b/src/core/GLES_COMPUTE/kernels/GCNormalizePlanarYUVLayerKernel.cpp @@ -23,7 +23,6 @@ */ #include "arm_compute/core/GLES_COMPUTE/kernels/GCNormalizePlanarYUVLayerKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" #include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" #include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" @@ -31,6 +30,9 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" diff --git a/src/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.cpp index 45aa06cc2d..76559146ae 100644 --- a/src/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.cpp +++ b/src/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.cpp @@ -32,6 +32,8 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" #include diff --git a/src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp index a592c09cc0..13efd10532 100644 --- a/src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp +++ b/src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp @@ -23,7 +23,6 @@ */ #include "arm_compute/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" #include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" #include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" @@ -33,6 +32,9 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" #include diff --git a/src/core/GLES_COMPUTE/kernels/GCScaleKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCScaleKernel.cpp index cf10b92dd1..a0795c668f 100644 --- a/src/core/GLES_COMPUTE/kernels/GCScaleKernel.cpp +++ b/src/core/GLES_COMPUTE/kernels/GCScaleKernel.cpp @@ -23,7 +23,6 @@ */ #include "arm_compute/core/GLES_COMPUTE/kernels/GCScaleKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" #include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" @@ -33,6 +32,9 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" #include diff --git a/src/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.cpp index f4ed9617fa..39d586da72 100644 --- a/src/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.cpp +++ b/src/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.cpp @@ -23,7 +23,6 @@ */ #include "arm_compute/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" #include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" #include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" @@ -33,6 +32,9 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" #include diff --git a/src/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.cpp index d06be9b8a6..78b008484e 100644 --- a/src/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.cpp +++ b/src/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.cpp @@ -23,7 +23,6 @@ */ #include "arm_compute/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" #include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" @@ -33,6 +32,9 @@ #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" using namespace arm_compute; diff --git a/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp index 66b4a55bd8..3bec05b5f1 100644 --- a/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp +++ b/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp @@ -23,7 +23,6 @@ */ #include "arm_compute/core/GLES_COMPUTE/kernels/GCTransposeKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" #include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" @@ -31,6 +30,9 @@ #include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Types.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" #include diff --git a/src/core/GLES_COMPUTE/kernels/GCWeightsReshapeKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCWeightsReshapeKernel.cpp index 9a430b43cb..bcdbfb60dc 100644 --- a/src/core/GLES_COMPUTE/kernels/GCWeightsReshapeKernel.cpp +++ b/src/core/GLES_COMPUTE/kernels/GCWeightsReshapeKernel.cpp @@ -32,6 +32,8 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" #include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" diff --git a/src/core/Helpers.cpp b/src/core/Helpers.cpp index 6701ff4483..e692cc1e7c 100644 --- a/src/core/Helpers.cpp +++ b/src/core/Helpers.cpp @@ -25,162 +25,6 @@ namespace arm_compute { -Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size) -{ - if(!skip_border) - { - border_size = BorderSize(0); - } - - const Coordinates &anchor = valid_region.anchor; - const TensorShape &shape = valid_region.shape; - - Window window; - - window.set(0, Window::Dimension( - // Skip the border left of the image - anchor[0] + border_size.left, - // Skip the border right of the image - // Make sure the window width is a multiple of the step size - anchor[0] + border_size.left + ceil_to_multiple(std::max(0, static_cast(shape[0]) - static_cast(border_size.left) - static_cast(border_size.right)), steps[0]), - steps[0])); - - size_t n = 1; - - if(anchor.num_dimensions() > 1) - { - window.set(1, Window::Dimension( - // Skip the border above the image - anchor[1] + border_size.top, - // Skip the border below the image - anchor[1] + border_size.top + ceil_to_multiple(std::max(0, static_cast(shape[1]) - static_cast(border_size.top) - static_cast(border_size.bottom)), steps[1]), - steps[1])); - - ++n; - } - - if(anchor.num_dimensions() > 2) - { - window.set(2, Window::Dimension(anchor[2], std::max(1, shape[2]), steps[2])); - - ++n; - } - - for(; n < anchor.num_dimensions(); ++n) - { - window.set(n, Window::Dimension(anchor[n], std::max(1, shape[n]))); - } - - for(; n < Coordinates::num_max_dimensions; ++n) - { - window.set(n, Window::Dimension(0, 1)); - } - - return window; -} - -Window calculate_max_enlarged_window(const ValidRegion &valid_region, const Steps &steps, BorderSize border_size) -{ - const Coordinates &anchor = valid_region.anchor; - const TensorShape &shape = valid_region.shape; - - Window window; - - window.set(0, Window::Dimension( - // move the anchor to the start from the border - anchor[0] - border_size.left, - // move the anchor to include the right end border - // Make sure the window width is a multiple of the step size - anchor[0] - border_size.left + ceil_to_multiple(shape[0] + border_size.left + border_size.right, steps[0]), - steps[0])); - - size_t n = 1; - - if(anchor.num_dimensions() > 1) - { - window.set(1, Window::Dimension( - // Include the border above the image - anchor[1] - border_size.top, - // Include the border below the image - anchor[1] - border_size.top + ceil_to_multiple(shape[1] + border_size.top + border_size.bottom, steps[1]), - steps[1])); - - ++n; - } - - if(anchor.num_dimensions() > 2) - { - window.set(2, Window::Dimension(0, std::max(1, shape[n]), steps[2])); - - ++n; - } - - for(; n < anchor.num_dimensions(); ++n) - { - window.set(n, Window::Dimension(anchor[n], std::max(1, shape[n]))); - } - - for(; n < Coordinates::num_max_dimensions; ++n) - { - window.set(n, Window::Dimension(0, 1)); - } - - return window; -} - -Window calculate_max_window_horizontal(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size) -{ - if(skip_border) - { - border_size.top = 0; - border_size.bottom = 0; - } - else - { - border_size.left = 0; - border_size.right = 0; - } - - const Coordinates &anchor = valid_region.anchor; - const TensorShape &shape = valid_region.shape; - - Window window; - - window.set(0, Window::Dimension( - // Skip the border left of the image - anchor[0] + border_size.left, - // Skip the border right of the image - // Make sure the window width is a multiple of the step size - anchor[0] + border_size.left + ceil_to_multiple(std::max(0, static_cast(shape[0]) - static_cast(border_size.left) - static_cast(border_size.right)), steps[0]), - steps[0])); - - size_t n = 1; - - if(anchor.num_dimensions() > 1) - { - window.set(1, Window::Dimension( - // Skip the border above the image - anchor[1] - border_size.top, - // Skip the border below the image - anchor[1] + shape[1] + border_size.bottom, - 1)); - - ++n; - } - - for(; n < anchor.num_dimensions(); ++n) - { - window.set(n, Window::Dimension(anchor[n], std::max(1, shape[n]))); - } - - for(; n < Coordinates::num_max_dimensions; ++n) - { - window.set(n, Window::Dimension(0, 1)); - } - - return window; -} - ValidRegion calculate_valid_region_scale(const ITensorInfo &src_info, const TensorShape &dst_shape, InterpolationPolicy interpolate_policy, SamplingPolicy sampling_policy, bool border_undefined) { @@ -256,19 +100,4 @@ ValidRegion calculate_valid_region_scale(const ITensorInfo &src_info, const Tens return valid_region; } - -PermutationVector get_permutation_vector_from_softmax_axis(size_t actual_axis) -{ - switch(actual_axis) - { - case 1: - return PermutationVector(1U, 0U, 2U, 3U); - case 2: - return PermutationVector(2U, 1U, 0U, 3U); - case 3: - return PermutationVector(3U, 1U, 2U, 0U); - default: - ARM_COMPUTE_ERROR("Axis not supported"); - } -} } // namespace arm_compute \ No newline at end of file diff --git a/src/core/NEON/NETracePoint.cpp b/src/core/NEON/NETracePoint.cpp index cb0dc1400a..4a6bffa54e 100644 --- a/src/core/NEON/NETracePoint.cpp +++ b/src/core/NEON/NETracePoint.cpp @@ -24,8 +24,8 @@ #include "arm_compute/core/TracePoint.h" #include "arm_compute/core/NEON/kernels/NELKTrackerKernel.h" -#include "arm_compute/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h" -#include "arm_compute/core/NEON/kernels/convolution/common/convolution.hpp" +#include "src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h" +#include "src/core/NEON/kernels/convolution/common/convolution.hpp" #include "utils/TypePrinter.h" #include diff --git a/src/core/NEON/kernels/NEAbsoluteDifferenceKernel.cpp b/src/core/NEON/kernels/NEAbsoluteDifferenceKernel.cpp index 3d4800fe15..acea0af02d 100644 --- a/src/core/NEON/kernels/NEAbsoluteDifferenceKernel.cpp +++ b/src/core/NEON/kernels/NEAbsoluteDifferenceKernel.cpp @@ -30,6 +30,8 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEAccumulateKernel.cpp b/src/core/NEON/kernels/NEAccumulateKernel.cpp index 7c85f698ae..73ef7eb66f 100644 --- a/src/core/NEON/kernels/NEAccumulateKernel.cpp +++ b/src/core/NEON/kernels/NEAccumulateKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -28,6 +28,8 @@ #include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/NEON/kernels/NEActivationLayerKernel.cpp index d80aab7069..9616f4faca 100644 --- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEActivationLayerKernel.cpp @@ -23,15 +23,17 @@ */ #include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Window.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/NEAsymm.h" #include "src/core/NEON/NESymm.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp index 525e2866f2..7f1a35fb55 100644 --- a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp +++ b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp @@ -23,12 +23,14 @@ */ #include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Validate.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp index a3da7508ab..49e503fac4 100644 --- a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp +++ b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp @@ -23,12 +23,14 @@ */ #include "arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/NEAsymm.h" #include "src/core/NEON/NESymm.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" namespace arm_compute { diff --git a/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp index c7169d8932..65ac996f46 100644 --- a/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp +++ b/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp @@ -32,6 +32,8 @@ #include "arm_compute/core/Window.h" #include "src/core/NEON/NEAsymm.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" namespace arm_compute { diff --git a/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp b/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp index 50e46474b5..bda396662f 100644 --- a/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp @@ -23,14 +23,16 @@ */ #include "arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/NEFixedPoint.h" #include "src/core/NEON/NEMath.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "src/core/NEON/kernels/detail/NEActivationFunctionDetail.h" #include "src/core/NEON/wrapper/wrapper.h" diff --git a/src/core/NEON/kernels/NEBatchToSpaceLayerKernel.cpp b/src/core/NEON/kernels/NEBatchToSpaceLayerKernel.cpp index eb28ce0a8b..e24d7b6c0a 100644 --- a/src/core/NEON/kernels/NEBatchToSpaceLayerKernel.cpp +++ b/src/core/NEON/kernels/NEBatchToSpaceLayerKernel.cpp @@ -28,6 +28,8 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" using namespace arm_compute::misc::shape_calculator; diff --git a/src/core/NEON/kernels/NEBitwiseAndKernel.cpp b/src/core/NEON/kernels/NEBitwiseAndKernel.cpp index caaa6c22e8..2d49ff825e 100644 --- a/src/core/NEON/kernels/NEBitwiseAndKernel.cpp +++ b/src/core/NEON/kernels/NEBitwiseAndKernel.cpp @@ -28,6 +28,8 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEBitwiseNotKernel.cpp b/src/core/NEON/kernels/NEBitwiseNotKernel.cpp index 4da07f93b0..eed9b273ae 100644 --- a/src/core/NEON/kernels/NEBitwiseNotKernel.cpp +++ b/src/core/NEON/kernels/NEBitwiseNotKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -27,6 +27,8 @@ #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEBitwiseOrKernel.cpp b/src/core/NEON/kernels/NEBitwiseOrKernel.cpp index 591acf50e1..f96117e860 100644 --- a/src/core/NEON/kernels/NEBitwiseOrKernel.cpp +++ b/src/core/NEON/kernels/NEBitwiseOrKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -27,6 +27,8 @@ #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEBitwiseXorKernel.cpp b/src/core/NEON/kernels/NEBitwiseXorKernel.cpp index b0aec4078f..45d2b0a0db 100644 --- a/src/core/NEON/kernels/NEBitwiseXorKernel.cpp +++ b/src/core/NEON/kernels/NEBitwiseXorKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -27,6 +27,8 @@ #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp b/src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp index 56444dcbc0..5a18e88321 100644 --- a/src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp +++ b/src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp @@ -23,12 +23,14 @@ */ #include "arm_compute/core/NEON/kernels/NEBoundingBoxTransformKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Window.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CPP/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEBox3x3Kernel.cpp b/src/core/NEON/kernels/NEBox3x3Kernel.cpp index d5d03a9def..1177f6f1dd 100644 --- a/src/core/NEON/kernels/NEBox3x3Kernel.cpp +++ b/src/core/NEON/kernels/NEBox3x3Kernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -29,6 +29,9 @@ #include "arm_compute/core/ITensor.h" #include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" + #include using namespace arm_compute; diff --git a/src/core/NEON/kernels/NECannyEdgeKernel.cpp b/src/core/NEON/kernels/NECannyEdgeKernel.cpp index 0278bb08e1..da33c1b1ea 100644 --- a/src/core/NEON/kernels/NECannyEdgeKernel.cpp +++ b/src/core/NEON/kernels/NECannyEdgeKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,7 +23,6 @@ */ #include "arm_compute/core/NEON/kernels/NECannyEdgeKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" @@ -31,6 +30,11 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEChannelCombineKernel.cpp b/src/core/NEON/kernels/NEChannelCombineKernel.cpp index 0de6c4326a..7bd380831b 100644 --- a/src/core/NEON/kernels/NEChannelCombineKernel.cpp +++ b/src/core/NEON/kernels/NEChannelCombineKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -33,6 +33,8 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEChannelExtractKernel.cpp b/src/core/NEON/kernels/NEChannelExtractKernel.cpp index 800c63606f..86245acd05 100644 --- a/src/core/NEON/kernels/NEChannelExtractKernel.cpp +++ b/src/core/NEON/kernels/NEChannelExtractKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -34,6 +34,8 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp b/src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp index 88cd0ae514..6d04d71534 100644 --- a/src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp +++ b/src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,7 +23,6 @@ */ #include "arm_compute/core/NEON/kernels/NEChannelShuffleLayerKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" @@ -31,6 +30,9 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/CPP/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" namespace arm_compute { diff --git a/src/core/NEON/kernels/NECol2ImKernel.cpp b/src/core/NEON/kernels/NECol2ImKernel.cpp index 6a07defd79..f3192370a6 100644 --- a/src/core/NEON/kernels/NECol2ImKernel.cpp +++ b/src/core/NEON/kernels/NECol2ImKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,6 +30,8 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEColorConvertKernel.cpp b/src/core/NEON/kernels/NEColorConvertKernel.cpp index bc8c77543a..f933a2a898 100644 --- a/src/core/NEON/kernels/NEColorConvertKernel.cpp +++ b/src/core/NEON/kernels/NEColorConvertKernel.cpp @@ -33,6 +33,8 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "src/core/NEON/kernels/detail/NEColorConvertHelper.inl" diff --git a/src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.cpp b/src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.cpp index 97bb8ccb8a..8716cfd9b5 100644 --- a/src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.cpp +++ b/src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,6 +25,8 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Types.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" namespace arm_compute { diff --git a/src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.cpp b/src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.cpp index f40f1215d3..bd8ea30fb3 100644 --- a/src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.cpp +++ b/src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.cpp @@ -30,6 +30,8 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" namespace arm_compute { diff --git a/src/core/NEON/kernels/NEConvolutionKernel.cpp b/src/core/NEON/kernels/NEConvolutionKernel.cpp index 7103fa1618..69b65b2816 100644 --- a/src/core/NEON/kernels/NEConvolutionKernel.cpp +++ b/src/core/NEON/kernels/NEConvolutionKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -32,6 +32,8 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NECopyKernel.cpp b/src/core/NEON/kernels/NECopyKernel.cpp index 3d00139263..b299957b57 100644 --- a/src/core/NEON/kernels/NECopyKernel.cpp +++ b/src/core/NEON/kernels/NECopyKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,6 +30,8 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" namespace arm_compute { diff --git a/src/core/NEON/kernels/NECropKernel.cpp b/src/core/NEON/kernels/NECropKernel.cpp index 7c65e71727..5fb55d95a9 100644 --- a/src/core/NEON/kernels/NECropKernel.cpp +++ b/src/core/NEON/kernels/NECropKernel.cpp @@ -23,17 +23,18 @@ */ #include "arm_compute/core/NEON/kernels/NECropKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Window.h" - #include "arm_compute/core/Types.h" -#include "arm_compute/core/utils/helpers/bit_ops.h" +#include "arm_compute/core/Window.h" #include "arm_compute/core/utils/helpers/tensor_transform.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" +#include "src/core/utils/helpers/bit_ops.h" namespace arm_compute { diff --git a/src/core/NEON/kernels/NECumulativeDistributionKernel.cpp b/src/core/NEON/kernels/NECumulativeDistributionKernel.cpp index cec0e1ce60..5628802783 100644 --- a/src/core/NEON/kernels/NECumulativeDistributionKernel.cpp +++ b/src/core/NEON/kernels/NECumulativeDistributionKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,6 +30,8 @@ #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp index 6066326fec..b500268477 100644 --- a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp @@ -34,6 +34,8 @@ #include "src/core/NEON/NEAsymm.h" #include "src/core/NEON/NEFixedPoint.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp b/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp index ee23909bd6..259ece7c6f 100644 --- a/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp @@ -23,17 +23,18 @@ */ #include "arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/misc/SaturateCast.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/NEFixedPoint.h" #include "src/core/NEON/NEMath.h" - #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" +#include "support/SaturateCast.h" using namespace arm_compute; diff --git a/src/core/NEON/kernels/NEDepthToSpaceLayerKernel.cpp b/src/core/NEON/kernels/NEDepthToSpaceLayerKernel.cpp index 6465848999..403e7aac9f 100644 --- a/src/core/NEON/kernels/NEDepthToSpaceLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDepthToSpaceLayerKernel.cpp @@ -29,6 +29,9 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" + #include #include diff --git a/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp b/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp index 0a34ee6a07..533b374594 100644 --- a/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp +++ b/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp @@ -23,13 +23,15 @@ */ #include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/kernels/convolution/depthwise/impl_qa8_qa8.hpp" #include "src/core/NEON/wrapper/traits.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/ToolchainSupport.h" namespace arm_compute @@ -48,7 +50,6 @@ constexpr size_t vector_size = 8; struct DepthwiseConvolutionRunInfo { -public: const size_t num_read_elements_per_iteration; const uint32_t x_start; const uint32_t x_end; diff --git a/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp b/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp index 9352088b1f..2f3c6f431c 100644 --- a/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp @@ -23,16 +23,18 @@ */ #include "arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/NEAsymm.h" #include "src/core/NEON/NESymm.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEDerivativeKernel.cpp b/src/core/NEON/kernels/NEDerivativeKernel.cpp index ad590e9f2b..5d3fc01bd2 100644 --- a/src/core/NEON/kernels/NEDerivativeKernel.cpp +++ b/src/core/NEON/kernels/NEDerivativeKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -29,6 +29,8 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEDilateKernel.cpp b/src/core/NEON/kernels/NEDilateKernel.cpp index c30dab22c6..cc781c699f 100644 --- a/src/core/NEON/kernels/NEDilateKernel.cpp +++ b/src/core/NEON/kernels/NEDilateKernel.cpp @@ -28,6 +28,8 @@ #include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp index c22fa6a2b3..56cd6e62d0 100644 --- a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp @@ -26,8 +26,6 @@ #include "src/core/NEON/kernels/detail/NEDirectConvolutionDetail.h" #include "src/core/NEON/wrapper/wrapper.h" -#include "arm_compute/core/AccessWindowStatic.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/IAccessWindow.h" @@ -36,7 +34,11 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/NEFixedPoint.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp index 8c11574755..abaaf12e92 100644 --- a/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp +++ b/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp @@ -23,8 +23,6 @@ */ #include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" @@ -32,9 +30,13 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/Traits.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/NEAsymm.h" #include "src/core/NEON/NEFixedPoint.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp index f862d04b22..efe6161096 100644 --- a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp +++ b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp @@ -23,12 +23,14 @@ */ #include "arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/IAccessWindow.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/NEAsymm.h" #include "src/core/NEON/NEFixedPoint.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEElementwiseUnaryKernel.cpp b/src/core/NEON/kernels/NEElementwiseUnaryKernel.cpp index 40430bdb81..8e4b7eda30 100644 --- a/src/core/NEON/kernels/NEElementwiseUnaryKernel.cpp +++ b/src/core/NEON/kernels/NEElementwiseUnaryKernel.cpp @@ -23,12 +23,14 @@ */ #include "arm_compute/core/NEON/kernels/NEElementwiseUnaryKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Validate.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/ToolchainSupport.h" namespace arm_compute diff --git a/src/core/NEON/kernels/NEErodeKernel.cpp b/src/core/NEON/kernels/NEErodeKernel.cpp index 4b93c3b4d1..31b0f487d6 100644 --- a/src/core/NEON/kernels/NEErodeKernel.cpp +++ b/src/core/NEON/kernels/NEErodeKernel.cpp @@ -28,6 +28,8 @@ #include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEFFTDigitReverseKernel.cpp b/src/core/NEON/kernels/NEFFTDigitReverseKernel.cpp index d5b20d278d..d8036f2f60 100644 --- a/src/core/NEON/kernels/NEFFTDigitReverseKernel.cpp +++ b/src/core/NEON/kernels/NEFFTDigitReverseKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -28,6 +28,8 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEFFTRadixStageKernel.cpp b/src/core/NEON/kernels/NEFFTRadixStageKernel.cpp index de8ba3f484..1b0af488a2 100644 --- a/src/core/NEON/kernels/NEFFTRadixStageKernel.cpp +++ b/src/core/NEON/kernels/NEFFTRadixStageKernel.cpp @@ -28,15 +28,16 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Window.h" +#include "src/core/NEON/wrapper/traits.h" +#include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include #include #include -#include "src/core/NEON/wrapper/traits.h" -#include "src/core/NEON/wrapper/wrapper.h" - namespace arm_compute { namespace diff --git a/src/core/NEON/kernels/NEFFTScaleKernel.cpp b/src/core/NEON/kernels/NEFFTScaleKernel.cpp index d99ff953fc..0cb8b84db8 100644 --- a/src/core/NEON/kernels/NEFFTScaleKernel.cpp +++ b/src/core/NEON/kernels/NEFFTScaleKernel.cpp @@ -29,6 +29,8 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEFastCornersKernel.cpp b/src/core/NEON/kernels/NEFastCornersKernel.cpp index 7b1d81e12c..99312f5134 100644 --- a/src/core/NEON/kernels/NEFastCornersKernel.cpp +++ b/src/core/NEON/kernels/NEFastCornersKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -27,6 +27,8 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEFillArrayKernel.cpp b/src/core/NEON/kernels/NEFillArrayKernel.cpp index 6b22dadd08..93798db6c3 100644 --- a/src/core/NEON/kernels/NEFillArrayKernel.cpp +++ b/src/core/NEON/kernels/NEFillArrayKernel.cpp @@ -28,6 +28,7 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/WindowHelpers.h" using namespace arm_compute; diff --git a/src/core/NEON/kernels/NEFillBorderKernel.cpp b/src/core/NEON/kernels/NEFillBorderKernel.cpp index dbaec83d04..c1dd5cf81f 100644 --- a/src/core/NEON/kernels/NEFillBorderKernel.cpp +++ b/src/core/NEON/kernels/NEFillBorderKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,6 +30,7 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEFlattenLayerKernel.cpp b/src/core/NEON/kernels/NEFlattenLayerKernel.cpp index 35ebc5b70b..e6b34b6165 100644 --- a/src/core/NEON/kernels/NEFlattenLayerKernel.cpp +++ b/src/core/NEON/kernels/NEFlattenLayerKernel.cpp @@ -23,13 +23,15 @@ */ #include "arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/CPP/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" diff --git a/src/core/NEON/kernels/NEFloorKernel.cpp b/src/core/NEON/kernels/NEFloorKernel.cpp index 301dc7a422..48f964c6a2 100644 --- a/src/core/NEON/kernels/NEFloorKernel.cpp +++ b/src/core/NEON/kernels/NEFloorKernel.cpp @@ -23,12 +23,14 @@ */ #include "arm_compute/core/NEON/kernels/NEFloorKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/Validate.h" +#include "src/core/CPP/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "src/core/NEON/kernels/floor/impl/list.h" #include "src/core/common/Registrars.h" diff --git a/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp b/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp index 00d251f79e..e353df1c39 100644 --- a/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp +++ b/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp @@ -23,14 +23,16 @@ */ #include "arm_compute/core/NEON/kernels/NEFuseBatchNormalizationKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp b/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp index 8b4ad0da23..2997c1d003 100644 --- a/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp +++ b/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp @@ -31,6 +31,8 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp index f3ba2901cb..acc519012b 100644 --- a/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp @@ -31,6 +31,10 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" + #include using namespace arm_compute; @@ -1052,5 +1056,3 @@ void NEGEMMLowpMatrixMultiplyKernel::run(const Window &window, const ThreadInfo } } } // namespace arm_compute - - diff --git a/src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.cpp index 4ac33d1e29..1c76926546 100644 --- a/src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.cpp @@ -23,7 +23,6 @@ */ #include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" @@ -32,6 +31,9 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.cpp index 8d0d7c26a3..6a7d225167 100644 --- a/src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.cpp @@ -23,7 +23,6 @@ */ #include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" @@ -32,8 +31,11 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/AccessWindowStatic.h" #include "src/core/NEON/NEAsymm.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.cpp index 023b798b9a..659c4105c1 100644 --- a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.cpp @@ -23,7 +23,6 @@ */ #include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" @@ -32,7 +31,10 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" +#include "src/core/AccessWindowStatic.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp index 68f16c5fc7..afa8cec76f 100644 --- a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp @@ -23,7 +23,6 @@ */ #include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" @@ -33,7 +32,10 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/AccessWindowStatic.h" #include "src/core/NEON/NESymm.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp index 2ef32c4e81..83416e03e9 100644 --- a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp @@ -23,7 +23,6 @@ */ #include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" @@ -33,7 +32,10 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/AccessWindowStatic.h" #include "src/core/NEON/NEAsymm.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp index 8fc33dcc82..1e8aa0cc0a 100644 --- a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp @@ -23,7 +23,6 @@ */ #include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" @@ -33,7 +32,10 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/AccessWindowStatic.h" #include "src/core/NEON/NEAsymm.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEGEMMLowpReductionKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpReductionKernel.cpp index 1494cd459c..566872f02c 100644 --- a/src/core/NEON/kernels/NEGEMMLowpReductionKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMLowpReductionKernel.cpp @@ -23,11 +23,13 @@ */ #include "arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/KernelDescriptors.h" #include "arm_compute/core/TensorInfo.h" +#include "src/core/AccessWindowStatic.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" namespace arm_compute { diff --git a/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp index bd931469a3..9aee26ca55 100644 --- a/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp @@ -23,12 +23,14 @@ */ #include "arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/NEFixedPoint.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp index 6f74e3fc06..a9236890e3 100644 --- a/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp @@ -23,7 +23,6 @@ */ #include "arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" @@ -32,9 +31,13 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" -#include "arm_compute/core/utils/helpers/float_ops.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/NEFixedPoint.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" +#include "src/core/utils/helpers/float_ops.h" #include diff --git a/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp b/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp index a8adc45645..b9b4fe9e9c 100644 --- a/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp @@ -23,12 +23,14 @@ */ #include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEGatherKernel.cpp b/src/core/NEON/kernels/NEGatherKernel.cpp index 906e8a053e..193fe98c7b 100644 --- a/src/core/NEON/kernels/NEGatherKernel.cpp +++ b/src/core/NEON/kernels/NEGatherKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,7 +23,6 @@ */ #include "arm_compute/core/NEON/kernels/NEGatherKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" @@ -32,6 +31,9 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/CPP/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" namespace arm_compute { diff --git a/src/core/NEON/kernels/NEGaussian3x3Kernel.cpp b/src/core/NEON/kernels/NEGaussian3x3Kernel.cpp index 18dd80e283..5ff5db7266 100644 --- a/src/core/NEON/kernels/NEGaussian3x3Kernel.cpp +++ b/src/core/NEON/kernels/NEGaussian3x3Kernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -28,6 +28,8 @@ #include "arm_compute/core/ITensor.h" #include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEGaussian5x5Kernel.cpp b/src/core/NEON/kernels/NEGaussian5x5Kernel.cpp index 99b5d4b093..5bb3e76ded 100644 --- a/src/core/NEON/kernels/NEGaussian5x5Kernel.cpp +++ b/src/core/NEON/kernels/NEGaussian5x5Kernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,6 +30,8 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEGaussianPyramidKernel.cpp b/src/core/NEON/kernels/NEGaussianPyramidKernel.cpp index 83d2877836..62cf414df2 100644 --- a/src/core/NEON/kernels/NEGaussianPyramidKernel.cpp +++ b/src/core/NEON/kernels/NEGaussianPyramidKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -32,6 +32,8 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp b/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp index c3b105919b..483f204b04 100644 --- a/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp +++ b/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,12 +23,14 @@ */ #include "arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Window.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CPP/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEHOGDescriptorKernel.cpp b/src/core/NEON/kernels/NEHOGDescriptorKernel.cpp index 84bb59ef0e..00f4087cbc 100644 --- a/src/core/NEON/kernels/NEHOGDescriptorKernel.cpp +++ b/src/core/NEON/kernels/NEHOGDescriptorKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -28,6 +28,8 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEHOGDetectorKernel.cpp b/src/core/NEON/kernels/NEHOGDetectorKernel.cpp index eb0d45000a..d5dfa4195d 100644 --- a/src/core/NEON/kernels/NEHOGDetectorKernel.cpp +++ b/src/core/NEON/kernels/NEHOGDetectorKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -28,6 +28,8 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEHarrisCornersKernel.cpp b/src/core/NEON/kernels/NEHarrisCornersKernel.cpp index 340c694a7c..be68b9c44b 100644 --- a/src/core/NEON/kernels/NEHarrisCornersKernel.cpp +++ b/src/core/NEON/kernels/NEHarrisCornersKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -31,6 +31,8 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp index fc7b819f6a..a50712598a 100644 --- a/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp +++ b/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp @@ -33,6 +33,8 @@ #include "arm_compute/core/Window.h" #include "src/core/NEON/NEAsymm.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEHistogramKernel.cpp b/src/core/NEON/kernels/NEHistogramKernel.cpp index 0f8397f117..12d1bb8e7e 100644 --- a/src/core/NEON/kernels/NEHistogramKernel.cpp +++ b/src/core/NEON/kernels/NEHistogramKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,6 +30,8 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Window.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEIm2ColKernel.cpp b/src/core/NEON/kernels/NEIm2ColKernel.cpp index 6eae0541aa..915ea75431 100644 --- a/src/core/NEON/kernels/NEIm2ColKernel.cpp +++ b/src/core/NEON/kernels/NEIm2ColKernel.cpp @@ -23,7 +23,6 @@ */ #include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" @@ -31,6 +30,9 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/CPP/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" diff --git a/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp b/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp index 78acbc399d..7aa23de6eb 100644 --- a/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp @@ -23,7 +23,6 @@ */ #include "arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" @@ -32,8 +31,11 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/NEMath.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEIntegralImageKernel.cpp b/src/core/NEON/kernels/NEIntegralImageKernel.cpp index 58ee3b4bea..5fc6ca65e3 100644 --- a/src/core/NEON/kernels/NEIntegralImageKernel.cpp +++ b/src/core/NEON/kernels/NEIntegralImageKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -28,6 +28,8 @@ #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp b/src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp index d99def53ba..a216981f0f 100644 --- a/src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp +++ b/src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp @@ -31,6 +31,8 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "src/core/NEON/NEMath.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "src/core/NEON/wrapper/wrapper.h" #include diff --git a/src/core/NEON/kernels/NELKTrackerKernel.cpp b/src/core/NEON/kernels/NELKTrackerKernel.cpp index 533c241b9b..6567a8d206 100644 --- a/src/core/NEON/kernels/NELKTrackerKernel.cpp +++ b/src/core/NEON/kernels/NELKTrackerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,7 +23,6 @@ */ #include "arm_compute/core/NEON/kernels/NELKTrackerKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" @@ -31,6 +30,9 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp b/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp index 9eafe18020..b8e6a6d763 100644 --- a/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp +++ b/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp @@ -23,7 +23,6 @@ */ #include "arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/IAccessWindow.h" @@ -33,7 +32,10 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/NEFixedPoint.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp b/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp index a0c1dbc668..8d82e1abd6 100644 --- a/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp +++ b/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -28,6 +28,8 @@ #include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.cpp b/src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.cpp index 821bf53817..87caf00477 100644 --- a/src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.cpp +++ b/src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.cpp @@ -23,11 +23,13 @@ */ #include "arm_compute/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/CPP/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/ToolchainSupport.h" diff --git a/src/core/NEON/kernels/NEMeanStdDevKernel.cpp b/src/core/NEON/kernels/NEMeanStdDevKernel.cpp index 914a21c0a0..c4e036a8b9 100644 --- a/src/core/NEON/kernels/NEMeanStdDevKernel.cpp +++ b/src/core/NEON/kernels/NEMeanStdDevKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -29,6 +29,8 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp b/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp index bcce843638..8ee9ff6f40 100644 --- a/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp +++ b/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp @@ -23,14 +23,16 @@ */ #include "arm_compute/core/NEON/kernels/NEMeanStdDevNormalizationKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Window.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/NEMath.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" namespace arm_compute { diff --git a/src/core/NEON/kernels/NEMedian3x3Kernel.cpp b/src/core/NEON/kernels/NEMedian3x3Kernel.cpp index 72225a4f43..86fcc30e91 100644 --- a/src/core/NEON/kernels/NEMedian3x3Kernel.cpp +++ b/src/core/NEON/kernels/NEMedian3x3Kernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -29,6 +29,8 @@ #include "arm_compute/core/ITensor.h" #include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEMemsetKernel.cpp b/src/core/NEON/kernels/NEMemsetKernel.cpp index 3870fa57f0..fd427cc8c5 100644 --- a/src/core/NEON/kernels/NEMemsetKernel.cpp +++ b/src/core/NEON/kernels/NEMemsetKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,12 +23,14 @@ */ #include "arm_compute/core/NEON/kernels/NEMemsetKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" namespace arm_compute { diff --git a/src/core/NEON/kernels/NEMinMaxLayerKernel.cpp b/src/core/NEON/kernels/NEMinMaxLayerKernel.cpp index b1c2b1c376..f675c391ed 100644 --- a/src/core/NEON/kernels/NEMinMaxLayerKernel.cpp +++ b/src/core/NEON/kernels/NEMinMaxLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -33,6 +33,8 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEMinMaxLocationKernel.cpp b/src/core/NEON/kernels/NEMinMaxLocationKernel.cpp index e956f9a8d0..e1691dc8ff 100644 --- a/src/core/NEON/kernels/NEMinMaxLocationKernel.cpp +++ b/src/core/NEON/kernels/NEMinMaxLocationKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -33,6 +33,8 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/Utility.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NENonLinearFilterKernel.cpp b/src/core/NEON/kernels/NENonLinearFilterKernel.cpp index f20e869272..31919ead03 100644 --- a/src/core/NEON/kernels/NENonLinearFilterKernel.cpp +++ b/src/core/NEON/kernels/NENonLinearFilterKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -29,6 +29,8 @@ #include "arm_compute/core/ITensor.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp b/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp index 3e4c6e29d3..9566ced768 100644 --- a/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp +++ b/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,6 +30,8 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NENormalizationLayerKernel.cpp b/src/core/NEON/kernels/NENormalizationLayerKernel.cpp index 7b888266fb..1b72a3e277 100644 --- a/src/core/NEON/kernels/NENormalizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NENormalizationLayerKernel.cpp @@ -23,17 +23,20 @@ */ #include "arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/NEFixedPoint.h" #include "src/core/NEON/NEMath.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/NormalizationHelpers.h" +#include "src/core/helpers/WindowHelpers.h" namespace arm_compute { diff --git a/src/core/NEON/kernels/NEPadLayerKernel.cpp b/src/core/NEON/kernels/NEPadLayerKernel.cpp index 1b52117bbe..ca9c5419e0 100644 --- a/src/core/NEON/kernels/NEPadLayerKernel.cpp +++ b/src/core/NEON/kernels/NEPadLayerKernel.cpp @@ -31,6 +31,8 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" namespace arm_compute { diff --git a/src/core/NEON/kernels/NEPermuteKernel.cpp b/src/core/NEON/kernels/NEPermuteKernel.cpp index 3f447f90b9..eab11ebfff 100644 --- a/src/core/NEON/kernels/NEPermuteKernel.cpp +++ b/src/core/NEON/kernels/NEPermuteKernel.cpp @@ -30,10 +30,12 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" namespace { -#include "arm_compute/core/NEON/kernels/convolution/common/shims.hpp" +#include "src/core/NEON/kernels/convolution/common/shims.hpp" } // namespace namespace arm_compute diff --git a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp index c5320b9dbf..0847cb1f23 100644 --- a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp +++ b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp @@ -23,11 +23,13 @@ */ #include "arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/TensorInfo.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/NEAsymm.h" #include "src/core/NEON/NESymm.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp index 397eae94ea..f9636dcb8d 100644 --- a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp +++ b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp @@ -23,8 +23,6 @@ */ #include "arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" @@ -33,9 +31,13 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/NEAsymm.h" #include "src/core/NEON/NEFixedPoint.h" #include "src/core/NEON/NEMath.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/ToolchainSupport.h" #include "src/core/NEON/wrapper/wrapper.h" diff --git a/src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp b/src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp index 808b68a0d7..06a1f14e5f 100644 --- a/src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp +++ b/src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp @@ -27,6 +27,8 @@ #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.cpp b/src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.cpp index 6a038f8f44..55585b4e00 100644 --- a/src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.cpp +++ b/src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.cpp @@ -23,16 +23,18 @@ */ #include "arm_compute/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/NEFixedPoint.h" #include "src/core/NEON/NEMath.h" #include "src/core/NEON/NESymm.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "src/core/NEON/kernels/detail/NEActivationFunctionDetail.h" diff --git a/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp b/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp index 6d5202d6b5..990e4b67bc 100644 --- a/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp @@ -31,8 +31,10 @@ #include "src/core/NEON/NEAsymm.h" #include "src/core/NEON/NEMath.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" -#include "arm_compute/core/CPP/Validate.h" +#include "src/core/CPP/Validate.h" #include #include diff --git a/src/core/NEON/kernels/NEROIAlignLayerKernel.cpp b/src/core/NEON/kernels/NEROIAlignLayerKernel.cpp index 955cdc2074..79f7888eba 100644 --- a/src/core/NEON/kernels/NEROIAlignLayerKernel.cpp +++ b/src/core/NEON/kernels/NEROIAlignLayerKernel.cpp @@ -23,14 +23,16 @@ */ #include "arm_compute/core/NEON/kernels/NEROIAlignLayerKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/misc/Utility.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CPP/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp b/src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp index 6a960c74dc..a3171d9aa6 100644 --- a/src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp +++ b/src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp @@ -23,10 +23,12 @@ */ #include "arm_compute/core/NEON/kernels/NEROIPoolingLayerKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/CPP/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/ToolchainSupport.h" #include diff --git a/src/core/NEON/kernels/NERangeKernel.cpp b/src/core/NEON/kernels/NERangeKernel.cpp index 7d8fbb1ec1..3466794b11 100644 --- a/src/core/NEON/kernels/NERangeKernel.cpp +++ b/src/core/NEON/kernels/NERangeKernel.cpp @@ -31,6 +31,8 @@ #include "arm_compute/core/Validate.h" #include "src/core/NEON/NEAsymm.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "arm_compute/core/Utils.h" diff --git a/src/core/NEON/kernels/NEReductionOperationKernel.cpp b/src/core/NEON/kernels/NEReductionOperationKernel.cpp index 9af7f2ab10..716b092396 100644 --- a/src/core/NEON/kernels/NEReductionOperationKernel.cpp +++ b/src/core/NEON/kernels/NEReductionOperationKernel.cpp @@ -23,7 +23,6 @@ */ #include "arm_compute/core/NEON/kernels/NEReductionOperationKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/IAccessWindow.h" @@ -32,9 +31,12 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/misc/SaturateCast.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/NEMath.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" +#include "support/SaturateCast.h" #include "src/core/NEON/wrapper/wrapper.h" #include diff --git a/src/core/NEON/kernels/NERemapKernel.cpp b/src/core/NEON/kernels/NERemapKernel.cpp index 2881161d7f..f698439507 100644 --- a/src/core/NEON/kernels/NERemapKernel.cpp +++ b/src/core/NEON/kernels/NERemapKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,13 +23,16 @@ */ #include "arm_compute/core/NEON/kernels/NERemapKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/ScaleHelpers.h" +#include "src/core/helpers/WindowHelpers.h" #include #include @@ -175,6 +178,8 @@ void NERemapKernel::remap_nearest(const Window &window) void NERemapKernel::remap_bilinear(const Window &window) { + using namespace scale_helpers; + // Don't increment in X and Y direction for the input tensor // A pointer to the start of this plane is needed as base for the precomputed offsets Window win_in(window); diff --git a/src/core/NEON/kernels/NEReorgLayerKernel.cpp b/src/core/NEON/kernels/NEReorgLayerKernel.cpp index 317bc25967..1c48a5c93d 100644 --- a/src/core/NEON/kernels/NEReorgLayerKernel.cpp +++ b/src/core/NEON/kernels/NEReorgLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,6 +30,8 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEReshapeLayerKernel.cpp b/src/core/NEON/kernels/NEReshapeLayerKernel.cpp index 23b349b443..7946812811 100644 --- a/src/core/NEON/kernels/NEReshapeLayerKernel.cpp +++ b/src/core/NEON/kernels/NEReshapeLayerKernel.cpp @@ -23,8 +23,6 @@ */ #include "arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/IAccessWindow.h" @@ -33,6 +31,10 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CPP/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEReverseKernel.cpp b/src/core/NEON/kernels/NEReverseKernel.cpp index 0c44a7e0c9..2c081cb917 100644 --- a/src/core/NEON/kernels/NEReverseKernel.cpp +++ b/src/core/NEON/kernels/NEReverseKernel.cpp @@ -27,6 +27,8 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" namespace arm_compute { diff --git a/src/core/NEON/kernels/NEScaleKernel.cpp b/src/core/NEON/kernels/NEScaleKernel.cpp index 94f5a18102..e07fcad0ab 100644 --- a/src/core/NEON/kernels/NEScaleKernel.cpp +++ b/src/core/NEON/kernels/NEScaleKernel.cpp @@ -23,15 +23,17 @@ */ #include "arm_compute/core/NEON/kernels/NEScaleKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Window.h" -#include "arm_compute/core/utils/misc/Rounding.h" #include "arm_compute/core/utils/misc/Utility.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/wrapper/wrapper.h" - +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/ScaleHelpers.h" +#include "src/core/helpers/WindowHelpers.h" #include "src/core/utils/ScaleUtils.h" +#include "support/Rounding.h" #include #include @@ -336,6 +338,8 @@ void NEScaleKernel::scale_bilinear_nchw(const Window &window) void NEScaleKernel::scale_area_nchw_u8(const Window &window) { + using namespace scale_helpers; + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(_input, 1, DataType::U8); // Don't increment in width/height/channels for the input tensor diff --git a/src/core/NEON/kernels/NEScharr3x3Kernel.cpp b/src/core/NEON/kernels/NEScharr3x3Kernel.cpp index dcc9362cf0..eb1dc65c0f 100644 --- a/src/core/NEON/kernels/NEScharr3x3Kernel.cpp +++ b/src/core/NEON/kernels/NEScharr3x3Kernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -29,6 +29,8 @@ #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NESelectKernel.cpp b/src/core/NEON/kernels/NESelectKernel.cpp index 286b8a63c8..2f36db2ddb 100644 --- a/src/core/NEON/kernels/NESelectKernel.cpp +++ b/src/core/NEON/kernels/NESelectKernel.cpp @@ -23,7 +23,6 @@ */ #include "arm_compute/core/NEON/kernels/NESelectKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/IAccessWindow.h" @@ -31,7 +30,10 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "utils/TypePrinter.h" #include diff --git a/src/core/NEON/kernels/NESobel3x3Kernel.cpp b/src/core/NEON/kernels/NESobel3x3Kernel.cpp index eb9d3c3020..1c7089b641 100644 --- a/src/core/NEON/kernels/NESobel3x3Kernel.cpp +++ b/src/core/NEON/kernels/NESobel3x3Kernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -29,6 +29,8 @@ #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NESobel5x5Kernel.cpp b/src/core/NEON/kernels/NESobel5x5Kernel.cpp index fc8ccc803d..2421ea72ad 100644 --- a/src/core/NEON/kernels/NESobel5x5Kernel.cpp +++ b/src/core/NEON/kernels/NESobel5x5Kernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,6 +30,8 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NESobel7x7Kernel.cpp b/src/core/NEON/kernels/NESobel7x7Kernel.cpp index 95ab12b6cd..779d67a044 100644 --- a/src/core/NEON/kernels/NESobel7x7Kernel.cpp +++ b/src/core/NEON/kernels/NESobel7x7Kernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,6 +30,8 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp index e71818f213..13f0a54275 100644 --- a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp +++ b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp @@ -23,8 +23,6 @@ */ #include "arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" @@ -32,10 +30,14 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" -#include "arm_compute/core/utils/misc/SaturateCast.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/NEFixedPoint.h" #include "src/core/NEON/NEMath.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" +#include "support/SaturateCast.h" #include #include diff --git a/src/core/NEON/kernels/NESpaceToBatchLayerKernel.cpp b/src/core/NEON/kernels/NESpaceToBatchLayerKernel.cpp index ccad92a685..3293466979 100644 --- a/src/core/NEON/kernels/NESpaceToBatchLayerKernel.cpp +++ b/src/core/NEON/kernels/NESpaceToBatchLayerKernel.cpp @@ -29,6 +29,9 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" + #include #include diff --git a/src/core/NEON/kernels/NESpaceToDepthLayerKernel.cpp b/src/core/NEON/kernels/NESpaceToDepthLayerKernel.cpp index 2667611d2c..7c9cc4996b 100644 --- a/src/core/NEON/kernels/NESpaceToDepthLayerKernel.cpp +++ b/src/core/NEON/kernels/NESpaceToDepthLayerKernel.cpp @@ -29,6 +29,9 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" + #include #include diff --git a/src/core/NEON/kernels/NEStackLayerKernel.cpp b/src/core/NEON/kernels/NEStackLayerKernel.cpp index 1d44be60a0..ad7f1b1300 100644 --- a/src/core/NEON/kernels/NEStackLayerKernel.cpp +++ b/src/core/NEON/kernels/NEStackLayerKernel.cpp @@ -33,6 +33,8 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" using namespace arm_compute; using namespace arm_compute::misc::shape_calculator; diff --git a/src/core/NEON/kernels/NEStridedSliceKernel.cpp b/src/core/NEON/kernels/NEStridedSliceKernel.cpp index 243a60f249..13b2cb5a10 100644 --- a/src/core/NEON/kernels/NEStridedSliceKernel.cpp +++ b/src/core/NEON/kernels/NEStridedSliceKernel.cpp @@ -23,16 +23,17 @@ */ #include "arm_compute/core/NEON/kernels/NEStridedSliceKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Window.h" - #include "arm_compute/core/Types.h" -#include "arm_compute/core/utils/helpers/bit_ops.h" +#include "arm_compute/core/Window.h" #include "arm_compute/core/utils/helpers/tensor_transform.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/CPP/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" +#include "src/core/utils/helpers/bit_ops.h" namespace arm_compute { diff --git a/src/core/NEON/kernels/NEThresholdKernel.cpp b/src/core/NEON/kernels/NEThresholdKernel.cpp index 9e8ec5c106..aad440b120 100644 --- a/src/core/NEON/kernels/NEThresholdKernel.cpp +++ b/src/core/NEON/kernels/NEThresholdKernel.cpp @@ -27,6 +27,8 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "src/core/NEON/wrapper/wrapper.h" diff --git a/src/core/NEON/kernels/NETileKernel.cpp b/src/core/NEON/kernels/NETileKernel.cpp index cc7655a479..99651c8b8a 100644 --- a/src/core/NEON/kernels/NETileKernel.cpp +++ b/src/core/NEON/kernels/NETileKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,6 +30,8 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" namespace arm_compute { diff --git a/src/core/NEON/kernels/NETransposeKernel.cpp b/src/core/NEON/kernels/NETransposeKernel.cpp index 7118e45f1e..6037810a44 100644 --- a/src/core/NEON/kernels/NETransposeKernel.cpp +++ b/src/core/NEON/kernels/NETransposeKernel.cpp @@ -23,14 +23,16 @@ */ #include "arm_compute/core/NEON/kernels/NETransposeKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" -#include "arm_compute/core/AccessWindowTranspose.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/AccessWindowTranspose.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEUpsampleLayerKernel.cpp b/src/core/NEON/kernels/NEUpsampleLayerKernel.cpp index 69324c1693..129c83c695 100644 --- a/src/core/NEON/kernels/NEUpsampleLayerKernel.cpp +++ b/src/core/NEON/kernels/NEUpsampleLayerKernel.cpp @@ -23,7 +23,6 @@ */ #include "arm_compute/core/NEON/kernels/NEUpsampleLayerKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" @@ -31,7 +30,10 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEWarpKernel.cpp b/src/core/NEON/kernels/NEWarpKernel.cpp index d8191dce53..891304f02c 100644 --- a/src/core/NEON/kernels/NEWarpKernel.cpp +++ b/src/core/NEON/kernels/NEWarpKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,7 +23,6 @@ */ #include "arm_compute/core/NEON/kernels/NEWarpKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" @@ -31,6 +30,10 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/ScaleHelpers.h" +#include "src/core/helpers/WindowHelpers.h" #include @@ -184,7 +187,7 @@ void NEWarpAffineKernel::warp_undefined(const Window &window) *out.ptr() = nearest_interpolation(in.ptr(), x0, y0, stride); break; case InterpolationPolicy::BILINEAR: - *out.ptr() = pixel_bilinear_c1(in.ptr(), stride, x0, y0); + *out.ptr() = scale_helpers::pixel_bilinear_c1(in.ptr(), stride, x0, y0); break; default: ARM_COMPUTE_ERROR("Interpolation not supported"); @@ -271,7 +274,7 @@ void NEWarpAffineKernel::warp_constant(const Window &window) *out.ptr() = nearest_interpolation(in.ptr(), x0, y0, stride); break; case InterpolationPolicy::BILINEAR: - *out.ptr() = pixel_bilinear_c1(in.ptr(), stride, x0, y0); + *out.ptr() = scale_helpers::pixel_bilinear_c1(in.ptr(), stride, x0, y0); break; default: ARM_COMPUTE_ERROR("Interpolation not supported"); @@ -386,7 +389,7 @@ void NEWarpAffineKernel::warp_replicate(const Window &window) *out.ptr() = nearest_interpolation(in.ptr(), x0, y0, stride); break; case InterpolationPolicy::BILINEAR: - *out.ptr() = pixel_bilinear_c1(in.ptr(), stride, x0, y0); + *out.ptr() = scale_helpers::pixel_bilinear_c1(in.ptr(), stride, x0, y0); break; default: ARM_COMPUTE_ERROR("Interpolation not supported"); @@ -519,7 +522,7 @@ void NEWarpPerspectiveKernel::warp_undefined(const Window &window *out.ptr() = nearest_interpolation(in.ptr(), xn, yn, stride); break; case InterpolationPolicy::BILINEAR: - *out.ptr() = pixel_bilinear_c1(in.ptr(), stride, xn, yn); + *out.ptr() = scale_helpers::pixel_bilinear_c1(in.ptr(), stride, xn, yn); break; default: ARM_COMPUTE_ERROR("Interpolation not supported"); @@ -620,7 +623,7 @@ void NEWarpPerspectiveKernel::warp_constant(const Window &window) *out.ptr() = nearest_interpolation(in.ptr(), xn, yn, stride); break; case InterpolationPolicy::BILINEAR: - *out.ptr() = pixel_bilinear_c1(in.ptr(), stride, xn, yn); + *out.ptr() = scale_helpers::pixel_bilinear_c1(in.ptr(), stride, xn, yn); break; default: ARM_COMPUTE_ERROR("Interpolation not supported"); @@ -752,7 +755,7 @@ void NEWarpPerspectiveKernel::warp_replicate(const Window &window *out.ptr() = nearest_interpolation(in.ptr(), xn, yn, stride); break; case InterpolationPolicy::BILINEAR: - *out.ptr() = pixel_bilinear_c1(in.ptr(), stride, xn, yn); + *out.ptr() = scale_helpers::pixel_bilinear_c1(in.ptr(), stride, xn, yn); break; default: ARM_COMPUTE_ERROR("Interpolation not supported"); diff --git a/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp b/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp index 6a74914ff7..c7fa2d2365 100644 --- a/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp +++ b/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp @@ -25,6 +25,8 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" namespace arm_compute { diff --git a/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp index d12b10c69e..90afbd6a19 100644 --- a/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp +++ b/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp @@ -33,6 +33,8 @@ #include "arm_compute/core/Window.h" #include "src/core/NEON/NEAsymm.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp b/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp index bfe97bfbdb..211ebdec90 100644 --- a/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp +++ b/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp @@ -23,16 +23,18 @@ */ #include "src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/kernels/convolution/common/utils.hpp" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/NEON/kernels/convolution/common/utils.hpp" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/MemorySupport.h" #include "src/core/NEON/kernels/convolution/winograd/winograd_layer.hpp" diff --git a/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h b/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h index 94df4f6952..bf5d77fc43 100644 --- a/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h +++ b/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h @@ -25,8 +25,8 @@ #define ARM_COMPUTE_NEGEMMWINOGRADCONVOLUTIONLAYERKERNEL_H #include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/NEON/kernels/convolution/common/convolution.hpp" -#include "arm_compute/core/NEON/kernels/convolution/common/tensor.hpp" +#include "src/core/NEON/kernels/convolution/common/convolution.hpp" +#include "src/core/NEON/kernels/convolution/common/tensor.hpp" #include "src/core/NEON/kernels/convolution/winograd/winograd_layer.hpp" diff --git a/src/core/NEON/kernels/NEYOLOLayerKernel.cpp b/src/core/NEON/kernels/NEYOLOLayerKernel.cpp index 591aa1e5e6..48c0616b35 100644 --- a/src/core/NEON/kernels/NEYOLOLayerKernel.cpp +++ b/src/core/NEON/kernels/NEYOLOLayerKernel.cpp @@ -23,16 +23,18 @@ */ #include "arm_compute/core/NEON/kernels/NEYOLOLayerKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/NEAsymm.h" #include "src/core/NEON/NEFixedPoint.h" #include "src/core/NEON/NEMath.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "src/core/NEON/kernels/detail/NEActivationFunctionDetail.h" diff --git a/src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.cpp b/src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.cpp index b071be3749..760274dba1 100644 --- a/src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.cpp +++ b/src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,7 +22,7 @@ * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h" +#include "src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h b/src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h new file mode 100644 index 0000000000..030f1aad12 --- /dev/null +++ b/src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_INEGEMMWRAPPERKERNEL_H +#define SRC_INEGEMMWRAPPERKERNEL_H + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Common interface for all the arm_gemm Gemms + */ +class INEGEMMWrapperKernel : public INEKernel +{ +public: + /** Parameters defining the dimensions of the matrices being multiplied */ + struct Params + { + unsigned int M{ 0 }; /**< Rows in output matrix C (and input matrix A). */ + unsigned int N{ 0 }; /**< Columns in output matrix C (and input matrix B). */ + unsigned int K{ 0 }; /**< Columns of input matrix A (= rows of input matrix B). */ + unsigned int batches{ 0 }; /**< Number of "batched" GEMMs (unique A and C, shared B). */ + unsigned int multis{ 0 }; /**< Number of "multi" GEMMs (unique A, B and C). */ + }; + + static Params extract_parameters(const ITensor *a, const ITensor *b, const ITensor *c, const GEMMInfo &gemm_info); + + /** Constructor */ + INEGEMMWrapperKernel(); + /** Prevent instances of this class from being copied */ + INEGEMMWrapperKernel(const INEGEMMWrapperKernel &) = delete; + /** Prevent instances of this class from being copied */ + INEGEMMWrapperKernel &operator=(const INEGEMMWrapperKernel &) = delete; + /** Allow instances of this class to be moved */ + INEGEMMWrapperKernel(INEGEMMWrapperKernel &&) = default; + /** Allow instances of this class to be moved */ + INEGEMMWrapperKernel &operator=(INEGEMMWrapperKernel &&) = default; + /** Initialise the kernel's input and output. + * + * @note The input and output tensor must have the same dimensions + * + * @param[in] a Input tensor (Matrix A) + * @param[in] b Input tensor (Matrix B) + * @param[out] c Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0. + * @param[in] alpha Scalar multiplier to apply to AB matrix product. + * @param[in] beta Scalar multiplier to apply to input C matrix before adding product. + * @param[in] gemm_info GEMM meta-data + */ + void configure(const ITensor *a, const ITensor *b, ITensor *c, float alpha, float beta, const GEMMInfo &gemm_info); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +protected: + /** Called as part of configure() after _a, _b, _c and _params have been set. + * + * @param[in] alpha Scalar multiplier to apply to AB matrix product. + * @param[in] beta Scalar multiplier to apply to input C matrix before adding product. + * + * @return A 3D execution window. + */ + virtual Window configure_internal(float alpha, float beta) = 0; + + /** Run the kernel from the start to the end offset in window. + * + * @param[in] window Window to use for the iteration + * @param[in] start_offset Where to start iterating from (In Window coordinates) + * @param[in] end_offset Where to stop iterating (In Window coordinates). + * @param[in] info Info about executing thread and CPU. + */ + virtual void run_internal(const Window &window, const Coordinates &start_offset, const Coordinates &end_offset, const ThreadInfo &info) = 0; + + const ITensor *_a; + const ITensor *_b; + ITensor *_c; + Params _params; + GEMMInfo _gemm_info; + +private: + Window _window3d; + TensorShape _window_shape; +}; + +} // namespace arm_compute + +#endif /* SRC_INEGEMMRAPPERKERNEL_H */ diff --git a/src/core/NEON/kernels/assembly/NEDepthwiseConvolutionAssemblyKernelWrapper.h b/src/core/NEON/kernels/assembly/NEDepthwiseConvolutionAssemblyKernelWrapper.h new file mode 100644 index 0000000000..a2f7e3bd59 --- /dev/null +++ b/src/core/NEON/kernels/assembly/NEDepthwiseConvolutionAssemblyKernelWrapper.h @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_ASSEMBLY_DEPTHWISE_CONVOLUTION_ASSEMBLY_WRAPPER_KERNEL_H +#define SRC_ASSEMBLY_DEPTHWISE_CONVOLUTION_ASSEMBLY_WRAPPER_KERNEL_H + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Utils.h" +#include "arm_compute/core/Validate.h" + +#include "src/core/NEON/kernels/convolution/depthwise/depthwise.hpp" + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** This class is a wrapper for the depthwise convolution assembly kernels. */ +class NEDepthwiseConvolutionAssemblyKernelWrapper final : public INEKernel +{ +public: + const char *name() const override + { + return "NEDepthwiseConvolutionAssemblyKernelWrapper"; + } + + /** Default constructor */ + NEDepthwiseConvolutionAssemblyKernelWrapper() + : _kernel(nullptr) + { + } + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDepthwiseConvolutionAssemblyKernelWrapper(const NEDepthwiseConvolutionAssemblyKernelWrapper &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDepthwiseConvolutionAssemblyKernelWrapper &operator=(const NEDepthwiseConvolutionAssemblyKernelWrapper &) = delete; + /** Default Move Constructor. */ + NEDepthwiseConvolutionAssemblyKernelWrapper(NEDepthwiseConvolutionAssemblyKernelWrapper &&) = default; + /** Default move assignment operator */ + NEDepthwiseConvolutionAssemblyKernelWrapper &operator=(NEDepthwiseConvolutionAssemblyKernelWrapper &&) = default; + + /** Initialise the kernel's input and output. + * + * @param[in] kernel Pointer to an assembly kernel implementation. + */ + void configure(depthwise::IDepthwiseConvolution *kernel) + { + ARM_COMPUTE_ERROR_ON_NULLPTR((reinterpret_cast(kernel))); + _kernel = kernel; + Window win; + win.set(Window::DimX, Window::Dimension(0, _kernel->get_window(), 1)); + INEKernel::configure(win); + } + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override + { + ARM_COMPUTE_ERROR_ON_NULLPTR((reinterpret_cast(_kernel))); + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + auto first = window.x().start(); + auto last = window.x().end(); + _kernel->run(first, last, info.thread_id); + } + +private: + depthwise::IDepthwiseConvolution *_kernel; +}; +} // namespace arm_compute +#endif /* SRC_ASSEMBLY_DEPTHWISE_CONVOLUTION_ASSEMBLY_WRAPPER_KERNEL_H */ diff --git a/src/core/NEON/kernels/assembly/arm_gemm_local.hpp b/src/core/NEON/kernels/assembly/arm_gemm_local.hpp new file mode 100644 index 0000000000..4715f2500a --- /dev/null +++ b/src/core/NEON/kernels/assembly/arm_gemm_local.hpp @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#pragma once + +/* This file is used to configure integration-specific aspects of arm_gemm into ACL */ + +#include "arm_compute/core/CPP/CPPTypes.h" + +namespace arm_gemm +{ +using CPUModel = arm_compute::CPUModel; +using CPUInfo = arm_compute::CPUInfo; +} // namespace arm_compute diff --git a/src/core/NEON/kernels/convolution/common/activation.hpp b/src/core/NEON/kernels/convolution/common/activation.hpp new file mode 100644 index 0000000000..0c9b7c1368 --- /dev/null +++ b/src/core/NEON/kernels/convolution/common/activation.hpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2019 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#pragma once + +namespace neon_convolution_kernels +{ + +enum class ActivationFunction +{ + None, + ReLU, + ReLU6, +}; + +} diff --git a/src/core/NEON/kernels/convolution/common/alloc.hpp b/src/core/NEON/kernels/convolution/common/alloc.hpp new file mode 100644 index 0000000000..7be3cdaaf5 --- /dev/null +++ b/src/core/NEON/kernels/convolution/common/alloc.hpp @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2017 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#pragma once + +#ifdef ALLOC_ALIGN +#define ALLOCATE(x) aligned_alloc(ALLOC_ALIGN, x) +#else +#define ALLOCATE(x) malloc(x) +#endif diff --git a/src/core/NEON/kernels/convolution/common/arm.hpp b/src/core/NEON/kernels/convolution/common/arm.hpp new file mode 100644 index 0000000000..b19bf98252 --- /dev/null +++ b/src/core/NEON/kernels/convolution/common/arm.hpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2017 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/** Sets the macro __arm_any__ if compiling for Aarch32 or Aarch64. + * Includes `arm_neon.h` if compiling for either architecture. + */ + +#ifdef __arm__ +#define __arm_any__ +#endif // __arm__ + +#ifdef __aarch64__ +#define __arm_any__ +#endif // __aarch64__ + +#ifdef __arm_any__ +#include +#endif // __arm_any__ diff --git a/src/core/NEON/kernels/convolution/common/convolution.hpp b/src/core/NEON/kernels/convolution/common/convolution.hpp new file mode 100644 index 0000000000..b1413527c3 --- /dev/null +++ b/src/core/NEON/kernels/convolution/common/convolution.hpp @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2017 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#pragma once + +enum PaddingType { + PADDING_SAME, PADDING_VALID +}; diff --git a/src/core/NEON/kernels/convolution/common/padding.hpp b/src/core/NEON/kernels/convolution/common/padding.hpp new file mode 100644 index 0000000000..b6f95872c0 --- /dev/null +++ b/src/core/NEON/kernels/convolution/common/padding.hpp @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2019 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#pragma once + +#include + +// Utilities for copying tensor tiles and adding/removing padding. +namespace padding +{ + +/* Copy a tile and apply padding to the output copy. + */ +template +void copy_and_pad_tile( + unsigned int tile_rows, + unsigned int tile_cols, + unsigned int n_channels, + const T *inptr, + unsigned int in_row_stride, + unsigned int in_col_stride, + T* outptr, + unsigned int out_row_stride, + unsigned int out_col_stride, + unsigned int pad_top, + unsigned int pad_left, + unsigned int pad_bottom, + unsigned int pad_right, + T pad_value=static_cast(0) +); + +/** Copy a tile and remove padding elements in the output. + */ +template +class CopyCropped +{ + public: + static void execute( + size_t size, // Amount of data to copy + const void *inptr, + size_t in_row_stride, + size_t in_col_stride, + void *outptr, + size_t out_row_stride, + size_t out_col_stride, + unsigned int pad_top, + unsigned int pad_left, + unsigned int pad_bottom, + unsigned int pad_right + ); +}; + +template +void crop_and_copy_tile( + unsigned int tile_rows, + unsigned int tile_cols, + unsigned int n_channels, + const T *inptr, + unsigned int in_row_stride, + unsigned int in_col_stride, + T *outptr, + unsigned int out_row_stride, + unsigned int out_col_stride, + unsigned int crop_top, + unsigned int crop_left, + unsigned int crop_bottom, + unsigned int crop_right +); + +} diff --git a/src/core/NEON/kernels/convolution/common/perf.h b/src/core/NEON/kernels/convolution/common/perf.h new file mode 100644 index 0000000000..fbae4dcdfa --- /dev/null +++ b/src/core/NEON/kernels/convolution/common/perf.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2018 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#pragma once + +/* Prototypes from perf.c */ + +void start_counter(int fd); +long long get_counter(int fd); +long long stop_counter(int fd); +int open_instruction_counter(void); +int open_cycle_counter(void); diff --git a/src/core/NEON/kernels/convolution/common/qasymm8.hpp b/src/core/NEON/kernels/convolution/common/qasymm8.hpp new file mode 100644 index 0000000000..88ef7327c0 --- /dev/null +++ b/src/core/NEON/kernels/convolution/common/qasymm8.hpp @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2019 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#pragma once +#include + +namespace qasymm8 +{ + +struct QAsymm8Params +{ + uint8_t quantize(float value) const; + float dequantize(uint8_t value) const; + + uint8_t offset; + float scale; +}; + +struct QAsymm8RescaleParams +{ + static QAsymm8RescaleParams make_rescale_params( + const QAsymm8Params& weight_quant, + const QAsymm8Params& input_quant, + const QAsymm8Params& output_quant + ); + + QAsymm8RescaleParams(int32_t shift, int32_t multiplier, float rescale); + + const int32_t shift, multiplier; + const float rescale; +}; + +} diff --git a/src/core/NEON/kernels/convolution/common/qsymm8.hpp b/src/core/NEON/kernels/convolution/common/qsymm8.hpp new file mode 100644 index 0000000000..726a02ccfd --- /dev/null +++ b/src/core/NEON/kernels/convolution/common/qsymm8.hpp @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2019 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#pragma once +#include +#include +#include "qasymm8.hpp" + + +namespace qsymm8 { + +struct QSymm8Params { + int8_t quantize(float value) const; + float dequantize(int8_t value) const; + + float scale; +}; + +struct QSymm8RescaleParams { + static QSymm8RescaleParams + make_rescale_params(const QSymm8Params &weight_quant, + const QSymm8Params &input_quant, + const QSymm8Params &output_quant); + + QSymm8RescaleParams(int32_t shift, int32_t multiplier, float rescale); + + const int32_t shift, multiplier; + const float rescale; +}; + +struct QSymm8PerChannelParams { + int8_t quantize(float value, float scale) const; + float dequantize(int8_t value, float scale) const; + + std::vector scales; +}; + +struct QSymm8PerChannelRescaleParams { + static QSymm8PerChannelRescaleParams + make_rescale_params(const QSymm8PerChannelParams &weight_quant, + const QSymm8PerChannelParams &input_quant, + const QSymm8PerChannelParams &output_quant); + + static QSymm8PerChannelRescaleParams + make_rescale_params(const QSymm8PerChannelParams &weight_quant, + const qasymm8::QAsymm8Params &input_quant, + const qasymm8::QAsymm8Params &output_quant); + + QSymm8PerChannelRescaleParams(std::vector& shift, std::vector& multiplier, std::vector& rescale); + + std::vector shifts, multipliers; + std::vector rescales; +}; + +} // namespace qsymm8 diff --git a/src/core/NEON/kernels/convolution/common/shims.hpp b/src/core/NEON/kernels/convolution/common/shims.hpp new file mode 100644 index 0000000000..310bd47b82 --- /dev/null +++ b/src/core/NEON/kernels/convolution/common/shims.hpp @@ -0,0 +1,749 @@ +/* + * Copyright (c) 2017 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#pragma once +#ifndef DOXYGEN_SKIP_THIS +#include +#endif /* DOXYGEN_SKIP_THIS */ +#include "arm.hpp" + +namespace reorder { +/** Re-order a tensor from NCHW format to NHWC. + * + * @note The stride parameters are optional and are provided to allow padding in either input or output tensors. + * + * @param[in] in Input tensor in NCHW format. + * @param[out] out Output tensor, to be written in NHWC format. + * @param n_batches Number of batches in the tensors. + * @param n_channels Number of channels in the tensors + * @param n_rows Height of the tensor + * @param n_cols Width of the tensor + * @param in_batch_stride Stride over batches in the input tensor. If `0` defaults to `n_channels * in_channel_stride`. + * @param in_channel_stride Stride over channels in the input tensor. If `0` defaults to `n_rows * in_row_stride`. + * @param in_row_stride Stride over rows in the input tensor. If `0` defaults to `n_cols`. + * @param out_batch_stride Stride over batches in the output tensor. If `0` defaults to `n_rows * out_row_stride`. + * @param out_row_stride Stride over rows in the output tensor. If `0` defaults to `n_cols * out_col_stride`. + * @param out_col_stride Stride over columns in the output tensor. If `0` defaults to `n_channels`. + */ +template +inline void nchw_to_nhwc( + const T* const in, + T* const out, + const int n_batches, + const int n_channels, + const int n_rows, + const int n_cols, + int in_batch_stride=0, + int in_channel_stride=0, + int in_row_stride=0, + int out_batch_stride=0, + int out_row_stride=0, + int out_col_stride=0 +); + +/** Re-order a tensor from NHWC format to NCHW. + * + * @note The stride parameters are optional and are provided to allow padding in either input or output tensors. + * + * @param[in] in Input tensor in NHWC format. + * @param[out] out Output tensor, to be written in NCHW format. + * @param n_batches Number of batches in the tensors. + * @param n_rows Height of the tensor + * @param n_cols Width of the tensor + * @param n_channels Number of channels in the tensors + * @param in_batch_stride Stride over batches in the input tensor. If `0` defaults to `n_rows * in_row_stride`. + * @param in_row_stride Stride over rows in the input tensor. If `0` defaults to `n_cols * in_col_stride`. + * @param in_col_stride Stride over columns in the input tensor. If `0` defaults to `n_channels`. + * @param out_batch_stride Stride over batches in the output tensor. If `0` defaults to `n_channels * out_channel_stride`. + * @param out_channel_stride Stride over channels in the output tensor. If `0` defaults to `n_rows * out_row_stride`. + * @param out_row_stride Stride over rows in the output tensor. If `0` defaults to `n_cols`. + */ +template +inline void nhwc_to_nchw( + const T* const in, // Input data in NHWC form + T* const out, // Output data in NCHW form + const int n_batches, + const int n_rows, + const int n_cols, + const int n_channels, + int in_batch_stride=0, + int in_row_stride=0, + int in_col_stride=0, + int out_batch_stride=0, + int out_channel_stride=0, + int out_row_stride=0 +); + +/** Re-order a weight tensor from [Output feature map x Input feature map x + * Height x Width] format to [Height x Width x Input feature map x Output + * feature map] format. + */ +template +inline void ofm_ifm_h_w_to_h_w_ifm_ofm( + const T* const in, // Input in [Output x Input x Height x Width] form + T* const out, // Output in [Height x Width x Input x Output] form + const int n_output_feature_maps, + const int n_input_feature_maps, + const int n_rows, + const int n_cols, + int in_output_feature_map_stride=0, + int in_input_feature_map_stride=0, + int in_row_stride=0, + int out_row_stride=0, + int out_col_stride=0, + int out_input_feature_map_stride=0 +); + +/** Re-order a weight tensor from [Height x Width x Input feature map x Output + * feature map] format to [Output feature map x Input feature map x Height x + * Width] format. + */ +template +inline void h_w_ifm_ofm_to_ofm_ifm_h_w( + const T* const in, // Input in [Height x Width x Input x Output] form + T* const out, // Output in [Output x Input x Height x Width] form + const int n_rows, + const int n_cols, + const int n_input_feature_maps, + const int n_output_feature_maps, + int in_row_stride=0, + int in_col_stride=0, + int in_input_feature_map_stride=0, + int out_output_feature_map_stride=0, + int out_input_feature_map_stride=0, + int out_row_stride=0 +); + +/*****************************************************************************/ +/* 32-bit implementation : NCHW -> NHWC + */ +template <> +inline void nchw_to_nhwc( + const int32_t* const in, + int32_t* const out, + const int n_batches, + const int n_channels, + const int n_rows, + const int n_cols, + int in_batch_stride, + int in_channel_stride, + int in_row_stride, + int out_batch_stride, + int out_row_stride, + int out_col_stride +) +{ + typedef int32_t T; + + // Fill in the stride values + in_row_stride = (in_row_stride) ? in_row_stride : n_cols; + in_channel_stride = (in_channel_stride) ? in_channel_stride + : n_rows * in_row_stride; + in_batch_stride = (in_batch_stride) ? in_batch_stride + : n_channels * in_channel_stride; + + out_col_stride = (out_col_stride) ? out_col_stride : n_channels; + out_row_stride = (out_row_stride) ? out_row_stride : n_cols * out_col_stride; + out_batch_stride = (out_batch_stride) ? out_batch_stride + : n_rows * out_row_stride; + + // Perform the re-ordering + for (int n = 0; n < n_batches; n++) + { + const T* const in_batch = in + n*in_batch_stride; + T* const out_batch = out + n*out_batch_stride; + + for (int i = 0; i < n_rows; i++) + { + const T* const in_row = in_batch + i*in_row_stride; + T* const out_row = out_batch + i*out_row_stride; + + int j = 0, j_remaining = n_cols; +#ifdef __arm_any__ + for (; j_remaining >= 4; j += 4, j_remaining -= 4) + { + int c = 0, c_remaining = n_channels; + for (; c_remaining >= 4; c += 4, c_remaining -= 4) + { + // Read 4 channels worth of 4 columns, then zip to produce 4 columns + // worth of 4 channels. + int32x4_t channel_pixels[4]; + channel_pixels[0] = vld1q_s32(in_row + (c + 0)*in_channel_stride + j); + channel_pixels[1] = vld1q_s32(in_row + (c + 1)*in_channel_stride + j); + channel_pixels[2] = vld1q_s32(in_row + (c + 2)*in_channel_stride + j); + channel_pixels[3] = vld1q_s32(in_row + (c + 3)*in_channel_stride + j); + + const auto zip1 = vzipq_s32(channel_pixels[0], channel_pixels[2]); + const auto zip2 = vzipq_s32(channel_pixels[1], channel_pixels[3]); + const auto out_0 = vzipq_s32(zip1.val[0], zip2.val[0]); + const auto out_1 = vzipq_s32(zip1.val[1], zip2.val[1]); + + vst1q_s32(out_row + (j + 0)*out_col_stride + c, out_0.val[0]); + vst1q_s32(out_row + (j + 1)*out_col_stride + c, out_0.val[1]); + vst1q_s32(out_row + (j + 2)*out_col_stride + c, out_1.val[0]); + vst1q_s32(out_row + (j + 3)*out_col_stride + c, out_1.val[1]); + } + for (; c_remaining; c++, c_remaining--) + { + for (int _j = 0; _j < 4; _j++) + { + const T* const in_col = in_row + j + _j; + T* const out_col = out_row + (j + _j)*out_col_stride; + const T* const in_channel = in_col + c*in_channel_stride; + out_col[c] = *(in_channel); + } + } + } + for (; j_remaining >= 2; j += 2, j_remaining -= 2) + { + int c = 0, c_remaining = n_channels; + for (; c_remaining >= 2; c += 2, c_remaining -= 2) + { + // Read 2 channels worth of 2 columns, then zip to produce 2 columns + // worth of 2 channels. + int32x2_t channel_pixels[2]; + channel_pixels[0] = vld1_s32(in_row + (c + 0)*in_channel_stride + j); + channel_pixels[1] = vld1_s32(in_row + (c + 1)*in_channel_stride + j); + + const auto output = vzip_s32(channel_pixels[0], channel_pixels[1]); + + vst1_s32(out_row + (j + 0)*out_col_stride + c, output.val[0]); + vst1_s32(out_row + (j + 1)*out_col_stride + c, output.val[1]); + } + for (; c_remaining; c++, c_remaining--) + { + for (int _j = 0; _j < 2; _j++) + { + const T* const in_col = in_row + j + _j; + T* const out_col = out_row + (j + _j)*out_col_stride; + const T* const in_channel = in_col + c*in_channel_stride; + out_col[c] = *(in_channel); + } + } + } +#endif // __arm_any__ + for (; j_remaining; j++, j_remaining--) + { + const T* const in_col = in_row + j; + T* const out_col = out_row + j*out_col_stride; + + for (int c = 0; c < n_channels; c++) + { + const T* const in_channel = in_col + c*in_channel_stride; + out_col[c] = *(in_channel); + } + } + } + } +} + +template <> +inline void nchw_to_nhwc( + const uint32_t* const in, + uint32_t* const out, + const int n_batches, + const int n_channels, + const int n_rows, + const int n_cols, + int in_batch_stride, + int in_channel_stride, + int in_row_stride, + int out_batch_stride, + int out_row_stride, + int out_col_stride +) +{ + nchw_to_nhwc( + reinterpret_cast(in), + reinterpret_cast(out), + n_batches, n_channels, n_rows, n_cols, + in_batch_stride, in_channel_stride, in_row_stride, + out_batch_stride, out_row_stride, out_col_stride + ); +} + +template <> +inline void nchw_to_nhwc( + const float* const in, + float* const out, + const int n_batches, + const int n_channels, + const int n_rows, + const int n_cols, + int in_batch_stride, + int in_channel_stride, + int in_row_stride, + int out_batch_stride, + int out_row_stride, + int out_col_stride +) +{ + nchw_to_nhwc( + reinterpret_cast(in), + reinterpret_cast(out), + n_batches, n_channels, n_rows, n_cols, + in_batch_stride, in_channel_stride, in_row_stride, + out_batch_stride, out_row_stride, out_col_stride + ); +} + +/*****************************************************************************/ +/* Generic implementation : NCHW -> NHWC + */ +template +inline void nchw_to_nhwc( + const T* const in, + T* const out, + const int n_batches, + const int n_channels, + const int n_rows, + const int n_cols, + int in_batch_stride, + int in_channel_stride, + int in_row_stride, + int out_batch_stride, + int out_row_stride, + int out_col_stride +) +{ + // Fill in the stride values + in_row_stride = (in_row_stride) ? in_row_stride : n_cols; + in_channel_stride = (in_channel_stride) ? in_channel_stride + : n_rows * in_row_stride; + in_batch_stride = (in_batch_stride) ? in_batch_stride + : n_channels * in_channel_stride; + + out_col_stride = (out_col_stride) ? out_col_stride : n_channels; + out_row_stride = (out_row_stride) ? out_row_stride : n_cols * out_col_stride; + out_batch_stride = (out_batch_stride) ? out_batch_stride + : n_rows * out_row_stride; + + // Perform the re-ordering + for (int n = 0; n < n_batches; n++) + { + const T* const in_batch = in + n*in_batch_stride; + T* const out_batch = out + n*out_batch_stride; + + for (int i = 0; i < n_rows; i++) + { + const T* const in_row = in_batch + i*in_row_stride; + T* const out_row = out_batch + i*out_row_stride; + + for (int j = 0; j < n_cols; j++) + { + const T* const in_col = in_row + j; + T* const out_col = out_row + j*out_col_stride; + + for (int c = 0; c < n_channels; c++) + { + const T* const in_channel = in_col + c*in_channel_stride; + out_col[c] = *(in_channel); + } + } + } + } +} + +/*****************************************************************************/ +/* 32-bit implementation : NHWC -> NCHW + */ +template <> +inline void nhwc_to_nchw( + const int32_t* const in, // Input data in NHWC form + int32_t* const out, // Output data in NCHW form + const int n_batches, + const int n_rows, + const int n_cols, + const int n_channels, + int in_batch_stride, + int in_row_stride, + int in_col_stride, + int out_batch_stride, + int out_channel_stride, + int out_row_stride +) +{ + typedef int32_t T; + + // Fill in stride values + in_col_stride = (in_col_stride) ? in_col_stride : n_channels; + in_row_stride = (in_row_stride) ? in_row_stride : n_cols * in_col_stride; + in_batch_stride = (in_batch_stride) ? in_batch_stride + : n_rows * in_row_stride; + + out_row_stride = (out_row_stride) ? out_row_stride : n_cols; + out_channel_stride = (out_channel_stride) ? out_channel_stride + : n_rows * out_row_stride; + out_batch_stride = (out_batch_stride) ? out_batch_stride + : n_channels * out_channel_stride; + + // Perform the re-ordering + // For every batch + for (int n = 0; n < n_batches; n++) + { + const T* const in_batch = in + n*in_batch_stride; + T* const out_batch = out + n*out_batch_stride; + + // For every row + for (int i = 0; i < n_rows; i++) + { + const T* const in_i = in_batch + i*in_row_stride; + T* const out_i = out_batch + i*out_row_stride; + + // For every column, beginning with chunks of 4 + int j = 0, j_remaining = n_cols; +#ifdef __arm_any__ + for (; j_remaining >= 4; j += 4, j_remaining -=4) + { + // For every channel, beginning with chunks of 4 + int c = 0, c_remaining = n_channels; + for (; c_remaining >= 4; c += 4, c_remaining -= 4) + { + // Read 4 columns worth of 4 channels then zip to produce 4 channels + // worth of 4 columns. + int32x4_t pixel_channels[4]; + pixel_channels[0] = vld1q_s32(in_i + (j + 0)*in_col_stride + c); + pixel_channels[1] = vld1q_s32(in_i + (j + 1)*in_col_stride + c); + pixel_channels[2] = vld1q_s32(in_i + (j + 2)*in_col_stride + c); + pixel_channels[3] = vld1q_s32(in_i + (j + 3)*in_col_stride + c); + + const auto zip1 = vzipq_s32(pixel_channels[0], pixel_channels[2]); + const auto zip2 = vzipq_s32(pixel_channels[1], pixel_channels[3]); + const auto out_0 = vzipq_s32(zip1.val[0], zip2.val[0]); + const auto out_1 = vzipq_s32(zip1.val[1], zip2.val[1]); + + vst1q_s32(out_i + j + (c + 0)*out_channel_stride, out_0.val[0]); + vst1q_s32(out_i + j + (c + 1)*out_channel_stride, out_0.val[1]); + vst1q_s32(out_i + j + (c + 2)*out_channel_stride, out_1.val[0]); + vst1q_s32(out_i + j + (c + 3)*out_channel_stride, out_1.val[1]); + } + for (; c_remaining; c++, c_remaining--) + { + for (int _j = 0; _j < 4; _j++) + { + const T* const in_j = in_i + (j + _j)*in_col_stride; + T* const out_j = out_i + (j + _j); + + const T* const in_channel = in_j + c; + T* const out_channel = out_j + c*out_channel_stride; + *(out_channel) = *(in_channel); + } + } + } + for (; j_remaining >= 2; j += 2, j_remaining -=2) + { + int c = 0, c_remaining = n_channels; + for (; c_remaining >= 2; c += 2, c_remaining -= 2) + { + // Read 2 columns worth of 2 channels then zip to produce 2 channels + // worth of 2 columns. + int32x2_t pixel_channels[2]; + pixel_channels[0] = vld1_s32(in_i + (j + 0)*in_col_stride + c); + pixel_channels[1] = vld1_s32(in_i + (j + 1)*in_col_stride + c); + + const auto output = vzip_s32(pixel_channels[0], pixel_channels[1]); + + vst1_s32(out_i + j + (c + 0)*out_channel_stride, output.val[0]); + vst1_s32(out_i + j + (c + 1)*out_channel_stride, output.val[1]); + } + for (; c_remaining; c++, c_remaining--) + { + for (int _j = 0; _j < 2; _j++) + { + const T* const in_j = in_i + (j + _j)*in_col_stride; + T* const out_j = out_i + (j + _j); + + const T* const in_channel = in_j + c; + T* const out_channel = out_j + c*out_channel_stride; + *(out_channel) = *(in_channel); + } + } + } +#endif // __arm_any__ + for (; j_remaining; j++, j_remaining--) + { + const T* const in_j = in_i + j*in_col_stride; + T* const out_j = out_i + j; + + // For every channel + for (int c = 0; c < n_channels; c++) + { + const T* const in_channel = in_j + c; + T* const out_channel = out_j + c*out_channel_stride; + *(out_channel) = *(in_channel); + } + } + } + } +} + +template <> +inline void nhwc_to_nchw( + const uint32_t* const in, // Input data in NHWC form + uint32_t* const out, // Output data in NCHW form + const int n_batches, + const int n_rows, + const int n_cols, + const int n_channels, + int in_batch_stride, + int in_row_stride, + int in_col_stride, + int out_batch_stride, + int out_channel_stride, + int out_row_stride +) +{ + // Redirect to generic 32-bit implementation + nhwc_to_nchw( + reinterpret_cast(in), + reinterpret_cast(out), + n_batches, n_rows, n_cols, n_channels, + in_batch_stride, in_row_stride, in_col_stride, + out_batch_stride, out_channel_stride, out_row_stride + ); +} + +template <> +inline void nhwc_to_nchw( + const float* const in, // Input data in NHWC form + float* const out, // Output data in NCHW form + const int n_batches, + const int n_rows, + const int n_cols, + const int n_channels, + int in_batch_stride, + int in_row_stride, + int in_col_stride, + int out_batch_stride, + int out_channel_stride, + int out_row_stride +) +{ + // Redirect to generic 32-bit implementation + nhwc_to_nchw( + reinterpret_cast(in), + reinterpret_cast(out), + n_batches, n_rows, n_cols, n_channels, + in_batch_stride, in_row_stride, in_col_stride, + out_batch_stride, out_channel_stride, out_row_stride + ); +} + +/*****************************************************************************/ +/* Generic implementation : NHWC -> NCHW + */ +template +inline void nhwc_to_nchw( + const T* const in, // Input data in NHWC form + T* const out, // Output data in NCHW form + const int n_batches, + const int n_rows, + const int n_cols, + const int n_channels, + int in_batch_stride, + int in_row_stride, + int in_col_stride, + int out_batch_stride, + int out_channel_stride, + int out_row_stride +) +{ + // Fill in stride values + in_col_stride = (in_col_stride) ? in_col_stride : n_channels; + in_row_stride = (in_row_stride) ? in_row_stride : n_cols * in_col_stride; + in_batch_stride = (in_batch_stride) ? in_batch_stride + : n_rows * in_row_stride; + + out_row_stride = (out_row_stride) ? out_row_stride : n_cols; + out_channel_stride = (out_channel_stride) ? out_channel_stride + : n_rows * out_row_stride; + out_batch_stride = (out_batch_stride) ? out_batch_stride + : n_channels * out_channel_stride; + + // Perform the re-ordering + // For every batch + for (int n = 0; n < n_batches; n++) + { + const T* const in_batch = in + n*in_batch_stride; + T* const out_batch = out + n*out_batch_stride; + + // For every row + for (int i = 0; i < n_rows; i++) + { + const T* const in_i = in_batch + i*in_row_stride; + T* const out_i = out_batch + i*out_row_stride; + + // For every column + for (int j = 0; j < n_cols; j++) + { + const T* const in_j = in_i + j*in_col_stride; + T* const out_j = out_i + j; + + // For every channel + for (int c = 0; c < n_channels; c++) + { + const T* const in_channel = in_j + c; + T* const out_channel = out_j + c*out_channel_stride; + *(out_channel) = *(in_channel); + } + } + } + } +} + +/*****************************************************************************/ +/* Generic weight re-order implementation. + */ +template +inline void ofm_ifm_h_w_to_h_w_ifm_ofm( + const T* const in, // Input in [Output x Input x Height x Width] form + T* const out, // Output in [Height x Width x Input x Output] form + const int n_output_feature_maps, + const int n_input_feature_maps, + const int n_rows, + const int n_cols, + int in_output_feature_map_stride, + int in_input_feature_map_stride, + int in_row_stride, + int out_row_stride, + int out_col_stride, + int out_input_feature_map_stride +) +{ + // Fill in stride values + in_row_stride = (in_row_stride) + ? in_row_stride + : n_cols; + in_input_feature_map_stride = (in_input_feature_map_stride) + ? in_input_feature_map_stride + : n_rows * in_row_stride; + in_output_feature_map_stride = (in_output_feature_map_stride) + ? in_output_feature_map_stride + : n_input_feature_maps * in_input_feature_map_stride; + + out_input_feature_map_stride = (out_input_feature_map_stride) + ? out_input_feature_map_stride + : n_output_feature_maps; + out_col_stride = (out_col_stride) + ? out_col_stride + : n_input_feature_maps * out_input_feature_map_stride; + out_row_stride = (out_row_stride) + ? out_row_stride + : n_cols * out_col_stride; + + // Perform the re-ordering + for (int i = 0; i < n_rows; i++) + { + const T* const in_row = in + i * in_row_stride; + T* out_row = out + i * out_row_stride; + + for (int j = 0; j < n_cols; j++) + { + const T* const in_col = in_row + j; + T* const out_col = out_row + j * out_col_stride; + + for (int ifm = 0; ifm < n_input_feature_maps; ifm++) + { + const T* const in_ifm = in_col + ifm * in_input_feature_map_stride; + T* const out_ifm = out_col + ifm * out_input_feature_map_stride; + + for (int ofm = 0; ofm < n_output_feature_maps; ofm++) + { + const T* const in_ofm = in_ifm + ofm * in_output_feature_map_stride; + T* const out_ofm = out_ifm + ofm; + *(out_ofm) = *(in_ofm); + } + } + } + } +} + +/*****************************************************************************/ +/* Generic weight re-order implementation. + */ +template +inline void h_w_ifm_ofm_to_ofm_ifm_h_w( + const T* const in, // Input in [Height x Width x Input x Output] form + T* const out, // Output in [Output x Input x Height x Width] form + const int n_rows, + const int n_cols, + const int n_input_feature_maps, + const int n_output_feature_maps, + int in_row_stride, + int in_col_stride, + int in_input_feature_map_stride, + int out_output_feature_map_stride, + int out_input_feature_map_stride, + int out_row_stride +) +{ + // Fill in the stride values + in_input_feature_map_stride = (in_input_feature_map_stride) + ? in_input_feature_map_stride + : n_output_feature_maps; + in_col_stride = (in_col_stride) + ? in_col_stride + : n_input_feature_maps * in_input_feature_map_stride; + in_row_stride = (in_row_stride) + ? in_row_stride + : n_cols * in_col_stride; + + out_row_stride = (out_row_stride) + ? out_row_stride + : n_cols; + out_input_feature_map_stride = (out_input_feature_map_stride) + ? out_input_feature_map_stride + : n_rows * out_row_stride; + out_output_feature_map_stride = (out_output_feature_map_stride) + ? out_output_feature_map_stride + : n_input_feature_maps * out_input_feature_map_stride; + + // Perform the re-ordering + for (int i = 0; i < n_rows; i++) + { + const T* const in_row = in + i * in_row_stride; + T* const out_row = out + i * out_row_stride; + + for (int j = 0; j < n_cols; j++) + { + const T* const in_col = in_row + j * in_col_stride; + T* const out_col = out_row + j; + + for (int ifm = 0; ifm < n_input_feature_maps; ifm++) + { + const T* const in_ifm = in_col + ifm * in_input_feature_map_stride; + T* const out_ifm = out_col + ifm * out_input_feature_map_stride; + + for (int ofm = 0; ofm < n_output_feature_maps; ofm++) + { + const T* const in_ofm = in_ifm + ofm; + T* const out_ofm = out_ifm + ofm * out_output_feature_map_stride; + *(out_ofm) = *(in_ofm); + } + } + } + } +} + +} // namespace reorder diff --git a/src/core/NEON/kernels/convolution/common/tensor.hpp b/src/core/NEON/kernels/convolution/common/tensor.hpp new file mode 100644 index 0000000000..7738cdb349 --- /dev/null +++ b/src/core/NEON/kernels/convolution/common/tensor.hpp @@ -0,0 +1,178 @@ +/* + * Copyright (c) 2017-2019 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#pragma once +#include +#include + +#include "alloc.hpp" + +enum TensorOrder +{ + NHWC, ///< [Batch x Height x Width x Channels] + NCHW, ///< [Batch x Channels x Height x Width] +}; + +struct Tensor4DShape +{ + int n_batches, n_rows, n_cols, n_channels; + TensorOrder ordering; + + // Create a new tensor with the default (NHWC) ordering + inline Tensor4DShape( + const int n_batches, + const int n_rows, + const int n_cols, + const int n_channels, + const TensorOrder ordering=NHWC + ) : n_batches(n_batches), + n_rows(n_rows), + n_cols(n_cols), + n_channels(n_channels), + ordering(ordering) + { + } + + inline int index(const int n, const int i, const int j, const int c) const + { + if (this->ordering == NHWC) + { + return ((n*this->n_rows + i)*this->n_cols + j)*this->n_channels + c; + } + else // NCHW + { + return ((n*this->n_channels + c)*this->n_rows + i)*this->n_cols + j; + } + } + + inline int size() const + { + return n_batches * n_rows * n_cols * n_channels; + } + + inline bool TestEq(const Tensor4DShape& other) const + { + return (n_batches == other.n_batches && + n_rows == other.n_rows && + n_cols == other.n_cols && + n_channels == other.n_channels); + } +}; + + +enum WeightOrder +{ + HWIO, ///< [Height x Width x Input channels x Output channels] + OIHW, ///< [Output channels x Input channels x Height x Width] +}; + +struct KernelShape +{ + int n_output_channels, n_rows, n_cols, n_input_channels; + WeightOrder ordering; + + inline KernelShape( + const int n_output_channels, + const int n_rows, + const int n_cols, + const int n_input_channels, + const WeightOrder ordering=HWIO + ) : n_output_channels(n_output_channels), + n_rows(n_rows), + n_cols(n_cols), + n_input_channels(n_input_channels), + ordering(ordering) + { + } + + inline int index(int oc, int i, int j, int ic) const + { + if (this->ordering == HWIO) + { + return ((i*this->n_cols + j)*this->n_input_channels + ic)*this->n_output_channels + oc; + } + else // OIHW + { + return ((oc*this->n_input_channels + ic)*this->n_rows + i)*this->n_cols + j; + } + } + + inline int size(void) const + { + return n_output_channels * n_rows * n_cols * n_input_channels; + } +}; + + +template +class Tensor4D final +{ + public: + Tensor4D(ShapeT shape) : + shape(shape), + _data(reinterpret_cast(ALLOCATE(size_bytes()))) + { + Clear(); + } + + Tensor4D(const Tensor4D&) = delete; + Tensor4D operator=(const Tensor4D&) = delete; + + ~Tensor4D() { + free(_data); + } + + inline T* ptr() const { + return _data; + } + + inline size_t size_bytes() const { + return shape.size() * sizeof(T); + } + + /* Extract an element of the tensor. + * + * If the shape is a Tensor4DShape then the index is given as batch, row, + * column and channel. If the shape is a KernelShape then the index is + * given as output channel, row, column and input channel. + */ + inline T& element(const int a, const int b, const int c, const int d) const + { + return _data[shape.index(a, b, c, d)]; + } + + inline void Clear() { + Fill(static_cast(0)); + } + + inline void Fill(T val) { + for (int i = 0; i < shape.size(); i++) + _data[i] = val; + } + + const ShapeT shape; + + private: + T* const _data; +}; diff --git a/src/core/NEON/kernels/convolution/common/tensor_utils.hpp b/src/core/NEON/kernels/convolution/common/tensor_utils.hpp new file mode 100644 index 0000000000..82619f4799 --- /dev/null +++ b/src/core/NEON/kernels/convolution/common/tensor_utils.hpp @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2017 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#pragma once +#include "tensor.hpp" + +// Methods to print tensors and weights +void PrintTensor(const Tensor4D& tensor); +void PrintWeights(const Tensor4D& weights); + +// Test the equivalence of two tensors +// Counts the instances that |a - b|/|a| > max_err +bool CmpTensors( + const Tensor4D& a, + const Tensor4D& b, + const float max_err=0.0f +); + +// Fill the tensor with a test pattern +void TestPattern(Tensor4D& tensor); +void TestPattern(Tensor4D& weights); + +// Fill the tensor with random values +void Randomise(Tensor4D& tensor, const int seed=0); +void Randomise(Tensor4D& weights, const int seed=0); diff --git a/src/core/NEON/kernels/convolution/common/utils.hpp b/src/core/NEON/kernels/convolution/common/utils.hpp new file mode 100644 index 0000000000..b7a9517c65 --- /dev/null +++ b/src/core/NEON/kernels/convolution/common/utils.hpp @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2017-2018 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#pragma once + +#include + +void PrintMatrix(const float *const m, const int M, const int N, const int row_stride); + +constexpr inline int iceildiv(const int a, const int b) +{ + return (a + b - 1) / b; +} + +template +inline T roundup(const T a, const T b) +{ + return b * iceildiv(a, b); +} + +template +struct TypeBounds +{ + static constexpr T lower() noexcept { return std::numeric_limits::has_infinity + ? -std::numeric_limits::infinity() + : std::numeric_limits::lowest(); }; + static constexpr T upper() noexcept { return std::numeric_limits::has_infinity + ? std::numeric_limits::infinity() + : std::numeric_limits::max(); }; +}; + +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +template<> +struct TypeBounds<__fp16> +{ + static constexpr __fp16 lower() noexcept { return -std::numeric_limits::infinity(); }; + static constexpr __fp16 upper() noexcept { return std::numeric_limits::infinity(); } +}; +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ diff --git a/src/core/NEON/kernels/convolution/depthwise/depthwise.hpp b/src/core/NEON/kernels/convolution/depthwise/depthwise.hpp new file mode 100644 index 0000000000..70d6689731 --- /dev/null +++ b/src/core/NEON/kernels/convolution/depthwise/depthwise.hpp @@ -0,0 +1,551 @@ +/* + * Copyright (c) 2018-2019 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#pragma once + +#include +#include "activation.hpp" +#include "padding.hpp" + +namespace depthwise +{ + +namespace nck = neon_convolution_kernels; + +class IDepthwiseConvolution +{ + public: + virtual ~IDepthwiseConvolution() = default; + + virtual int output_size( + int dim_size, + unsigned int padding_before, + unsigned int padding_after + ) const = 0; + + /* Set input tensor and stride. */ + virtual void set_input(const void *inptr) = 0; + virtual void set_input(const void *inptr, int column_stride) = 0; + virtual void set_input(const void *inptr, int row_stride, int column_stride) = 0; + virtual void set_input(const void *inptr, int batch_stride, int row_stride, int column_stride) = 0; + + /* Set output tensor and stride. */ + virtual void set_output(void *outptr) = 0; + virtual void set_output(void *outptr, int column_stride) = 0; + virtual void set_output(void *outptr, int row_stride, int column_stride) = 0; + virtual void set_output(void *outptr, int batch_stride, int row_stride, int column_stride) = 0; + + /* Weights and biases are re-ordered to improve memory access patterns. Use + * these methods to determine the size of the re-pack buffer and to set the + * address (and implicitly reorder the weights and biases into) the buffer. + */ + virtual size_t get_packed_params_size(void) const = 0; + virtual void set_packed_params_buffer(void *) = 0; + + virtual void pack_params(const void *weights, const void *biases=nullptr) const = 0; + virtual void pack_params(void *buffer, const void *weights, const void *biases=nullptr) const = 0; + virtual void pack_params( + void *buffer, + const void* weights, + unsigned int weight_row_stride, + unsigned int weight_col_stride, + const void *biases=nullptr + ) const = 0; + + /* Working space is used to pad tensors on the fly. Before running any + * inference check the amount of space required, allocate and provide a + * pointer to the convolution engine. + */ + virtual size_t get_working_space_size(unsigned int nthreads=1) const = 0; + virtual void set_working_space(void *) = 0; + + virtual unsigned int get_window(void) const = 0; + virtual void run( + unsigned int start, + unsigned int stop, + unsigned int threadid=0 + ) = 0; +}; + +template < + unsigned int OutputTileRows, unsigned int OutputTileCols, + unsigned int KernelRows, unsigned int KernelCols, + unsigned int StrideRows, unsigned int StrideCols, + typename TIn, typename TBias, typename TOut, + typename Derived +> +class DepthwiseConvolutionBase : public IDepthwiseConvolution +{ + public: + // Information about the specific convolution instance + using InputType = TIn; + using BiasType = TBias; + using OutputType = TOut; + static constexpr int output_tile_rows = OutputTileRows; + static constexpr int output_tile_cols = OutputTileCols; + static constexpr int kernel_rows = KernelRows; + static constexpr int kernel_cols = KernelCols; + static constexpr int stride_rows = StrideRows; + static constexpr int stride_cols = StrideCols; + static constexpr int inner_tile_rows = stride_rows * (output_tile_rows - 1) + kernel_rows; + static constexpr int inner_tile_cols = stride_cols * (output_tile_cols - 1) + kernel_cols; + + /** Create a new depthwise convolution engine. + * + * @param[in] n_batches Number of batches tensors. + * @param[in] n_input_rows Number of rows in input tensor. + * @param[in] n_input_cols Number of columns in input tensor. + * @param[in] n_channels Number of channels in input and output tensors. + */ + DepthwiseConvolutionBase( + int n_batches, int n_input_rows, int n_input_cols, int n_channels, + nck::ActivationFunction activation, + unsigned int padding_top, + unsigned int padding_left, + unsigned int padding_bottom, + unsigned int padding_right + ); + + /** Create a new depthwise convolution engine. + * + * @param[in] n_batches Number of batches tensors. + * @param[in] n_input_rows Number of rows in input tensor. + * @param[in] n_input_cols Number of columns in input tensor. + * @param[in] n_channels Number of channels in input and output tensors. + */ + DepthwiseConvolutionBase( + int n_batches, int n_input_rows, int n_input_cols, int n_channels, + int n_output_rows, int n_output_cols, + nck::ActivationFunction activation, + unsigned int padding_top, + unsigned int padding_left, + unsigned int padding_bottom, + unsigned int padding_right + ); + + // Cannot copy or move a DepthwiseConvolution. + DepthwiseConvolutionBase(DepthwiseConvolutionBase&) = delete; + DepthwiseConvolutionBase operator=(DepthwiseConvolutionBase&) = delete; + + /* Set input tensor and stride. */ + void set_input(const void *inptr) override; + void set_input(const void *inptr, int column_stride) override; + void set_input(const void *inptr, int row_stride, int column_stride) override; + void set_input(const void *inptr, int batch_stride, int row_stride, int column_stride) override; + + /* Set output tensor and stride. */ + void set_output(void *outptr) override; + void set_output(void *outptr, int column_stride) override; + void set_output(void *outptr, int row_stride, int column_stride) override; + void set_output(void *outptr, int batch_stride, int row_stride, int column_stride) override; + + /** Get the number of output rows/columns. + * + * @param[in] dim_size Number of elements in the dimension (rows/columns) + * @param[in] same_padding True if the padding is SAME, otherwise false. + */ + static int get_output_size( + int dim_size, unsigned int padding_before, unsigned int padding_after + ); + + int output_size( + int dim_size, unsigned int padding_before, unsigned int padding_after + ) const override; + + /* Determine how much memory is required to store the packed weights and + * biases. + */ + size_t get_packed_params_size(void) const override; + + /* Set the buffer for the packed weights and biases, and perform the + * packing. + */ + void set_packed_params_buffer(void *buffer) override; + + void pack_params(const void *weights, const void *biases=nullptr) const override; + + void pack_params( + void *buffer, + const void *weights, + const void *biases=nullptr + ) const override; + + void pack_params( + void *buffer, + const void *weights, + unsigned int weight_row_stride, + unsigned int weight_col_stride, + const void *biases=nullptr + ) const override; + + /** Query the amount of working space required. + * @param[in] The largest number of threads which will be used to execute + * the kernel. + */ + size_t get_working_space_size(unsigned int n_threads=1) const override; + + /** Set the working space buffer. + */ + void set_working_space(void *buffer) override; + + /** Get the window of work to be performed by an instance of the operator. + */ + unsigned int get_window(void) const override; + + /** Perform a portion of the work associated with the operator. + * + * Will perform the window of work described by $[start, stop)$. + * + * @param[in] start Start of the window of work to perform. + * @param[in] stop End of the work to perform. + * @param[in] ID of the thread performing the work. + */ + void run( + unsigned int start, + unsigned int stop, + unsigned int threadid=0 + ) override; + + protected: + /** Get the value to use to pad the tensor. + */ + TIn _input_padding_value(void) const; + + /** Implementation of the parameter packing. + */ + void _pack_params( + void *buffer, + const void *weights, + unsigned int weight_row_stride, + unsigned int weight_col_stride, + const void *biases=nullptr + ) const; + + /** Process a tile-row of the tensors. + */ + void process_tile_row( + unsigned int threadid, + int n_channels, + const void* packed_params, + const InputType* inptr, + OutputType* outptr, + int row_pad_in_top, + int row_pad_in_left, + int row_pad_in_bottom, + int row_pad_out_bottom, + int n_tiles, + int n_input_cols, + int n_output_cols + ); + + /** Process a single tile of the tensor. + * + * This method will apply input/output padding (if required) and call the + * depthwise tile implementation. + */ + void process_tile( + unsigned int threadid, + int n_channels, + const void* packed_params, + const InputType* inptr, + OutputType* outptr, + int pad_in_top, + int pad_in_left, + int pad_in_bottom, + int pad_in_right, + int pad_out_bottom, + int pad_out_right + ); + + /** Perform depthwise convolution on a single tile. + */ + template + void execute_tile( + int n_channels, + const void* packed_params, + const InputType* inptr, + unsigned int in_row_stride, + unsigned int in_col_stride, + OutputType* outptr, + unsigned int out_row_stride, + unsigned int out_col_stride + ); + + template + void execute_tile( + int n_channels, + const void* packed_params, + const InputType* inptrs[inner_tile_rows][inner_tile_cols], + OutputType* outptrs[output_tile_rows][output_tile_cols] + ); + + int n_channels(void) const; + + private: + // Member variables of instances of a convolution engine. + const InputType* _input; + OutputType* _output; + void* _packed_parameters; + void* _working_space; // Per-thread working space + const int _n_batches, _n_input_rows, _n_input_cols, _n_channels, + _n_output_rows, _n_output_cols, _n_tile_rows, _n_tile_cols; + const unsigned int _padding_top, _padding_left, _padding_bottom, _padding_right; + const nck::ActivationFunction _activation; + + // Stride information for a convolution instance + int _input_col_stride, _input_row_stride, _input_batch_stride; + int _output_col_stride, _output_row_stride, _output_batch_stride; + + // Methods for getting access to working space + size_t _get_input_working_space_size(void) const; + size_t _get_output_working_space_size(void) const; + + void *_get_input_working_space(unsigned int threadid) const; + void *_get_output_working_space(unsigned int threadid) const; +}; + + +template < + unsigned int OutputTileRows, unsigned int OutputTileCols, + unsigned int KernelRows, unsigned int KernelCols, + unsigned int StrideRows, unsigned int StrideCols, + typename TIn, typename TBias, typename TOut +> +class DepthwiseConvolution : public DepthwiseConvolutionBase< + OutputTileRows, OutputTileCols, + KernelRows, KernelCols, + StrideRows, StrideCols, + TIn, TBias, TOut, + DepthwiseConvolution< + OutputTileRows, OutputTileCols, + KernelRows, KernelCols, + StrideRows, StrideCols, + TIn, TBias, TOut + > +> +{ + using Base = DepthwiseConvolutionBase< + OutputTileRows, OutputTileCols, + KernelRows, KernelCols, + StrideRows, StrideCols, + TIn, TBias, TOut, + DepthwiseConvolution< + OutputTileRows, OutputTileCols, + KernelRows, KernelCols, + StrideRows, StrideCols, + TIn, TBias, TOut + > >; + friend Base; + using InputType = typename Base::InputType; + using OutputType = typename Base::OutputType; + + public: + using Base::DepthwiseConvolutionBase; + + protected: + template + void execute_tile( + int n_channels, + const void* packed_params, + const TIn* inptr, + unsigned int in_row_stride, + unsigned int in_col_stride, + TOut* outptr, + unsigned int out_row_stride, + unsigned int out_col_stride + ); + + template + void execute_tile( + int n_channels, + const void* packed_params, + const InputType* inptrs[Base::inner_tile_rows][Base::inner_tile_cols], + OutputType* outptrs[Base::output_tile_rows][Base::output_tile_cols] + ); +}; + + +template < + unsigned int OutputTileRows, unsigned int OutputTileCols, + unsigned int KernelRows, unsigned int KernelCols, + unsigned int StrideRows, unsigned int StrideCols +> +class DepthwiseConvolution< + OutputTileRows, OutputTileCols, + KernelRows, KernelCols, + StrideRows, StrideCols, + float, float, float +> : public DepthwiseConvolutionBase< + OutputTileRows, OutputTileCols, + KernelRows, KernelCols, + StrideRows, StrideCols, + float, float, float, + DepthwiseConvolution< + OutputTileRows, OutputTileCols, + KernelRows, KernelCols, + StrideRows, StrideCols, + float, float, float + > +> +{ + using Base = DepthwiseConvolutionBase< + OutputTileRows, OutputTileCols, + KernelRows, KernelCols, + StrideRows, StrideCols, + float, float, float, + DepthwiseConvolution< + OutputTileRows, OutputTileCols, + KernelRows, KernelCols, + StrideRows, StrideCols, + float, float, float + > >; + friend Base; + using InputType = typename Base::InputType; + using OutputType = typename Base::OutputType; + + public: + DepthwiseConvolution( + int n_batches, int n_input_rows, int n_input_cols, int n_channels, + nck::ActivationFunction activation, + unsigned int padding_top, + unsigned int padding_left, + unsigned int padding_bottom, + unsigned int padding_right + ); + + DepthwiseConvolution( + int n_batches, int n_input_rows, int n_input_cols, int n_channels, + int n_output_rows, int n_output_cols, + nck::ActivationFunction activation, + unsigned int padding_top, + unsigned int padding_left, + unsigned int padding_bottom, + unsigned int padding_right + ); + + protected: + template + void execute_tile( + int n_channels, + const void* packed_params, + const float* inptr, + unsigned int in_row_stride, + unsigned int in_col_stride, + float* outptr, + unsigned int out_row_stride, + unsigned int out_col_stride + ); + + template + void execute_tile( + int n_channels, + const void* packed_params, + const float* inptrs[Base::inner_tile_rows][Base::inner_tile_cols], + float* outptrs[Base::output_tile_rows][Base::output_tile_cols] + ); +}; + +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +template < + unsigned int OutputTileRows, unsigned int OutputTileCols, + unsigned int KernelRows, unsigned int KernelCols, + unsigned int StrideRows, unsigned int StrideCols +> +class DepthwiseConvolution< + OutputTileRows, OutputTileCols, + KernelRows, KernelCols, + StrideRows, StrideCols, + float16_t, float16_t, float16_t +> : public DepthwiseConvolutionBase< + OutputTileRows, OutputTileCols, + KernelRows, KernelCols, + StrideRows, StrideCols, + float16_t, float16_t, float16_t, + DepthwiseConvolution< + OutputTileRows, OutputTileCols, + KernelRows, KernelCols, + StrideRows, StrideCols, + float16_t, float16_t, float16_t + > +> +{ + using Base = DepthwiseConvolutionBase< + OutputTileRows, OutputTileCols, + KernelRows, KernelCols, + StrideRows, StrideCols, + float16_t, float16_t, float16_t, + DepthwiseConvolution< + OutputTileRows, OutputTileCols, + KernelRows, KernelCols, + StrideRows, StrideCols, + float16_t, float16_t, float16_t + > >; + friend Base; + using InputType = typename Base::InputType; + using OutputType = typename Base::OutputType; + + public: + DepthwiseConvolution( + int n_batches, int n_input_rows, int n_input_cols, int n_channels, + nck::ActivationFunction activation, + unsigned int padding_top, + unsigned int padding_left, + unsigned int padding_bottom, + unsigned int padding_right + ); + + DepthwiseConvolution( + int n_batches, int n_input_rows, int n_input_cols, int n_channels, + int n_output_rows, int n_output_cols, + nck::ActivationFunction activation, + unsigned int padding_top, + unsigned int padding_left, + unsigned int padding_bottom, + unsigned int padding_right + ); + + protected: + template + void execute_tile( + int n_channels, + const void* packed_params, + const float16_t* inptr, + unsigned int in_row_stride, + unsigned int in_col_stride, + float16_t* outptr, + unsigned int out_row_stride, + unsigned int out_col_stride + ); + + template + void execute_tile( + int n_channels, + const void* packed_params, + const float16_t* inptrs[Base::inner_tile_rows][Base::inner_tile_cols], + float16_t* outptrs[Base::output_tile_rows][Base::output_tile_cols] + ); +}; +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + +} // namespace depthwise diff --git a/src/core/NEON/kernels/convolution/depthwise/depthwise_dilated.hpp b/src/core/NEON/kernels/convolution/depthwise/depthwise_dilated.hpp new file mode 100644 index 0000000000..1bae815613 --- /dev/null +++ b/src/core/NEON/kernels/convolution/depthwise/depthwise_dilated.hpp @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2019 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#pragma once + +#include +#include +#include + +#include "depthwise.hpp" + +namespace depthwise +{ + +template < + unsigned int OutputTileRows, unsigned int OutputTileCols, + unsigned int KernelRows, unsigned int KernelCols, + unsigned int StrideRows, unsigned int StrideCols, + typename TIn, typename TBias, typename TOut +> +class DilatedDepthwiseConvolution : public IDepthwiseConvolution +{ + public: + /** Create a new dilated depthwise convolution engine. + */ + DilatedDepthwiseConvolution( + int n_batches, int n_input_rows, int n_input_cols, int n_channels, + int dilation_factor, + nck::ActivationFunction activation, + unsigned int padding_top, + unsigned int padding_left, + unsigned int padding_bottom, + unsigned int padding_right + ); + + /** Create a new dilated depthwise convolution engine. + */ + DilatedDepthwiseConvolution( + int n_batches, int n_input_rows, int n_input_cols, int n_channels, + int dilation_factor, int n_output_rows, int n_output_cols, + nck::ActivationFunction activation, + unsigned int padding_top, + unsigned int padding_left, + unsigned int padding_bottom, + unsigned int padding_right + ); + + // Cannot copy or move a DilatedDepthwiseConvolution. + DilatedDepthwiseConvolution(DilatedDepthwiseConvolution&) = delete; + DilatedDepthwiseConvolution operator=(DilatedDepthwiseConvolution&) = delete; + + /* Set input tensor and stride. */ + void set_input(const void *inptr) override; + void set_input(const void *inptr, int column_stride) override; + void set_input(const void *inptr, int row_stride, int column_stride) override; + void set_input(const void *inptr, int batch_stride, int row_stride, int column_stride) override; + + /* Set output tensor and stride. */ + void set_output(void *outptr) override; + void set_output(void *outptr, int column_stride) override; + void set_output(void *outptr, int row_stride, int column_stride) override; + void set_output(void *outptr, int batch_stride, int row_stride, int column_stride) override; + + static int get_output_size( + int dim_size, + unsigned int padding_before, + unsigned int padding_after, + int dilation_factor + ); + + int output_size( + int dim_size, unsigned int padding_before, unsigned int padding_after + ) const override; + + /* Weights and biases are re-ordered to improve memory access patterns. Use + * these methods to determine the size of the re-pack buffer and to set the + * address (and implicitly reorder the weights and biases into) the buffer. + */ + size_t get_packed_params_size(void) const override; + void set_packed_params_buffer(void *) override; + + void pack_params(const void *weights, const void *biases=nullptr) const override; + void pack_params(void *buffer, const void *weights, const void *biases=nullptr) const override; + void pack_params( + void *buffer, + const void* weights, + unsigned int weight_row_stride, + unsigned int weight_col_stride, + const void *biases=nullptr + ) const override; + + /* Working space is used to pad tensors on the fly. Before running any + * inference check the amount of space required, allocate and provide a + * pointer to the convolution engine. + */ + size_t get_working_space_size(unsigned int nthreads=1) const override; + void set_working_space(void *) override; + + unsigned int get_window(void) const override; + void run(unsigned int start, unsigned int stop, unsigned int threadid=0) override; + + protected: + /** Protected constructor which also accepts a function to construct a new + * subconvolution + */ + DilatedDepthwiseConvolution( + int n_batches, int n_input_rows, int n_input_cols, int n_channels, + int dilation_factor, int n_output_rows, int n_output_cols, + nck::ActivationFunction activation, + unsigned int padding_top, + unsigned int padding_left, + unsigned int padding_bottom, + unsigned int padding_right, + std::function subconvfn + ); + + const int _dilation_factor; + const int _n_input_rows, _n_input_cols, _n_channels; + const int _padding_top, _padding_left; + const int _n_output_rows, _n_output_cols; + + /* Dilated depthwise convolution is performed through repeated calls to + * non-dilated convolutions. If the dilation factor is $n$, then we perform + * $(n + 1)^2$ depthwise convolutions. + */ + using BaseDepthwise = DepthwiseConvolution< + OutputTileRows, OutputTileCols, + KernelRows, KernelCols, + StrideRows, StrideCols, + TIn, TBias, TOut + >; + std::deque>> _convs; +}; + +} // namespace depthwise diff --git a/src/core/NEON/kernels/convolution/depthwise/depthwise_quantized.hpp b/src/core/NEON/kernels/convolution/depthwise/depthwise_quantized.hpp new file mode 100644 index 0000000000..4343f6ad45 --- /dev/null +++ b/src/core/NEON/kernels/convolution/depthwise/depthwise_quantized.hpp @@ -0,0 +1,291 @@ +/* + * Copyright (c) 2018-2019 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#pragma once +#include "depthwise.hpp" +#include "qasymm8.hpp" +#include "qsymm8.hpp" +#pragma once + +using namespace neon_convolution_kernels; +using namespace qasymm8; + +inline int32x4_t saturating_doubling_high_mul(const int32x4_t& a, const int32x4_t& b) +{ + return vqrdmulhq_s32(a, b); +} + +inline int32x4_t saturating_doubling_high_mul(const int32x4_t& a, const int32_t& b) +{ + return vqrdmulhq_n_s32(a, b); +} + +inline int32_t saturating_doubling_high_mul(const int32_t& a, const int32_t& b) +{ + return vget_lane_s32(vqrdmulh_n_s32(vdup_n_s32(a), b), 0); +} + +inline int32x4_t rounding_divide_by_exp2(const int32x4_t& x, const int32x4_t shift) +{ + const int32x4_t fixup = vshrq_n_s32(vandq_s32(x, shift), 31); + const int32x4_t fixed = vqaddq_s32(x, fixup); + return vrshlq_s32(fixed, shift); +} + +inline int32x4_t rounding_divide_by_exp2(const int32x4_t& x, const int exponent) +{ + const int32x4_t shift = vdupq_n_s32(-exponent); + const int32x4_t fixup = vshrq_n_s32(vandq_s32(x, shift), 31); + const int32x4_t fixed = vqaddq_s32(x, fixup); + return vrshlq_s32(fixed, shift); +} + +inline int32x2_t rounding_divide_by_exp2(const int32x2_t& x, const int exponent) +{ + const int32x2_t shift = vdup_n_s32(-exponent); + const int32x2_t fixup = vshr_n_s32(vand_s32(x, shift), 31); + const int32x2_t fixed = vqadd_s32(x, fixup); + return vrshl_s32(fixed, shift); +} + +inline int32_t rounding_divide_by_exp2(const int32_t& x, const int exponent) +{ + const int32x2_t xs = vdup_n_s32(x); + return vget_lane_s32(rounding_divide_by_exp2(xs, exponent), 0); +} + +namespace depthwise +{ + +namespace nck = neon_convolution_kernels; + +template < + unsigned int OutputTileRows, unsigned int OutputTileCols, + unsigned int KernelRows, unsigned int KernelCols, + unsigned int StrideRows, unsigned int StrideCols +> +class QAsymm8DepthwiseConvolution : public DepthwiseConvolutionBase< + OutputTileRows, OutputTileCols, + KernelRows, KernelCols, + StrideRows, StrideCols, + uint8_t, int32_t, uint8_t, + QAsymm8DepthwiseConvolution +> +{ + using Base = DepthwiseConvolutionBase< + OutputTileRows, OutputTileCols, + KernelRows, KernelCols, + StrideRows, StrideCols, + uint8_t, int32_t, uint8_t, + QAsymm8DepthwiseConvolution + >; + friend Base; + using InputType = typename Base::InputType; + using OutputType = typename Base::OutputType; + + public: + QAsymm8DepthwiseConvolution( + int n_batches, int n_input_rows, int n_input_cols, int n_channels, + nck::ActivationFunction activation, + const qasymm8::QAsymm8Params& weight_quantisation, + const qasymm8::QAsymm8Params& input_quantisation, + const qasymm8::QAsymm8Params& output_quantisation, + unsigned int padding_top, + unsigned int padding_left, + unsigned int padding_bottom, + unsigned int padding_right + ); + + QAsymm8DepthwiseConvolution( + int n_batches, int n_input_rows, int n_input_cols, int n_channels, + int n_output_rows, int n_output_cols, + nck::ActivationFunction activation, + const qasymm8::QAsymm8Params& weight_quantisation, + const qasymm8::QAsymm8Params& input_quantisation, + const qasymm8::QAsymm8Params& output_quantisation, + unsigned int padding_top, + unsigned int padding_left, + unsigned int padding_bottom, + unsigned int padding_right + ); + + QAsymm8DepthwiseConvolution( + int n_batches, int n_input_rows, int n_input_cols, int n_channels, + nck::ActivationFunction activation, + const qasymm8::QAsymm8Params& weight_quantisation, + const qasymm8::QAsymm8Params& input_quantisation, + const qasymm8::QAsymm8Params& output_quantisation, + const qasymm8::QAsymm8RescaleParams& rescale_parameters, + unsigned int padding_top, + unsigned int padding_left, + unsigned int padding_bottom, + unsigned int padding_right + ); + + QAsymm8DepthwiseConvolution( + int n_batches, int n_input_rows, int n_input_cols, int n_channels, + int n_output_rows, int n_output_cols, + nck::ActivationFunction activation, + const qasymm8::QAsymm8Params& weight_quantisation, + const qasymm8::QAsymm8Params& input_quantisation, + const qasymm8::QAsymm8Params& output_quantisation, + const qasymm8::QAsymm8RescaleParams& rescale_parameters, + unsigned int padding_top, + unsigned int padding_left, + unsigned int padding_bottom, + unsigned int padding_right + ); + + protected: + uint8_t _input_padding_value(void) const; + + void _pack_params( + void *buffer, + const void *weights, + unsigned int weight_row_stride, + unsigned int weight_col_stride, + const void *biases=nullptr + ) const; + + template + void execute_tile( + int n_channels, + const void* packed_params, + const uint8_t* inptr, + unsigned int in_row_stride, + unsigned int in_col_stride, + uint8_t* outptr, + unsigned int out_row_stride, + unsigned int out_col_stride + ); + + template + void execute_tile( + int n_channels, + const void* packed_params, + const uint8_t* inptrs[Base::inner_tile_rows][Base::inner_tile_cols], + uint8_t* outptrs[Base::output_tile_rows][Base::output_tile_cols] + ); + + private: + // Quantization parameters + const qasymm8::QAsymm8Params _weights_quant, _inputs_quant, _output_quant; + const qasymm8::QAsymm8RescaleParams rescale_parameters; +}; + +template < + unsigned int OutputTileRows, unsigned int OutputTileCols, + unsigned int KernelRows, unsigned int KernelCols, + unsigned int StrideRows, unsigned int StrideCols +> +class QSymm8HybridPerChannelDepthwiseConvolution : public DepthwiseConvolutionBase< + OutputTileRows, OutputTileCols, + KernelRows, KernelCols, + StrideRows, StrideCols, + uint8_t, int32_t, uint8_t, + QSymm8HybridPerChannelDepthwiseConvolution +> +{ + using Base = DepthwiseConvolutionBase< + OutputTileRows, OutputTileCols, + KernelRows, KernelCols, + StrideRows, StrideCols, + uint8_t, int32_t, uint8_t, + QSymm8HybridPerChannelDepthwiseConvolution + >; + friend Base; + using InputType = typename Base::InputType; + using OutputType = typename Base::OutputType; + + public: + QSymm8HybridPerChannelDepthwiseConvolution( + int n_batches, int n_input_rows, int n_input_cols, int n_channels, + nck::ActivationFunction activation, + const qsymm8::QSymm8PerChannelParams& weight_quantisation, + const qasymm8::QAsymm8Params& input_quantisation, + const qasymm8::QAsymm8Params& output_quantisation, + unsigned int padding_top, + unsigned int padding_left, + unsigned int padding_bottom, + unsigned int padding_right + ); + + QSymm8HybridPerChannelDepthwiseConvolution( + int n_batches, int n_input_rows, int n_input_cols, int n_channels, + nck::ActivationFunction activation, + const qsymm8::QSymm8PerChannelParams& weight_quantisation, + const qasymm8::QAsymm8Params& input_quantisation, + const qasymm8::QAsymm8Params& output_quantisation, + const qsymm8::QSymm8PerChannelRescaleParams& rescale_parameters, + unsigned int padding_top, + unsigned int padding_left, + unsigned int padding_bottom, + unsigned int padding_right + ); + + size_t get_packed_params_size(void) const override + { + return this->n_channels() * (sizeof(int8_t)*KernelRows*KernelCols + 3*sizeof(int32_t)); + + } + + protected: + uint8_t _input_padding_value(void) const; + + void _pack_params( + void *buffer, + const void *weights, + unsigned int weight_row_stride, + unsigned int weight_col_stride, + const void *biases=nullptr + ) const; + + template + void execute_tile( + int n_channels, + const void* packed_params, + const uint8_t* inptr, + unsigned int in_row_stride, + unsigned int in_col_stride, + uint8_t* outptr, + unsigned int out_row_stride, + unsigned int out_col_stride + ); + + template + void execute_tile( + int n_channels, + const void* packed_params, + const uint8_t* inptrs[Base::inner_tile_rows][Base::inner_tile_cols], + uint8_t* outptrs[Base::output_tile_rows][Base::output_tile_cols] + ); + + private: + // Quantization parameters + const qsymm8::QSymm8PerChannelParams _weights_quant; + const qasymm8::QAsymm8Params _input_quant, _output_quant; + const qsymm8::QSymm8PerChannelRescaleParams _rescale_parameters; +}; + +} // namespace depthwise diff --git a/src/core/NEON/kernels/convolution/depthwise/depthwise_quantized_dilated.hpp b/src/core/NEON/kernels/convolution/depthwise/depthwise_quantized_dilated.hpp new file mode 100644 index 0000000000..a11b0981c9 --- /dev/null +++ b/src/core/NEON/kernels/convolution/depthwise/depthwise_quantized_dilated.hpp @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2019 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#pragma once +#include "depthwise_dilated.hpp" +#include "depthwise_quantized.hpp" + +namespace depthwise { + +template +class QAsymm8DilatedDepthwiseConvolution + : public DilatedDepthwiseConvolution< + OutputTileRows, OutputTileCols, KernelRows, KernelCols, StrideRows, + StrideCols, uint8_t, int32_t, uint8_t> { +public: + /** Create a new dilated depthwise convolution engine. + */ + QAsymm8DilatedDepthwiseConvolution( + int n_batches, int n_input_rows, int n_input_cols, int n_channels, + int dilation_factor, nck::ActivationFunction activation, + const qasymm8::QAsymm8Params &weight_quantisation, + const qasymm8::QAsymm8Params &input_quantisation, + const qasymm8::QAsymm8Params &output_quantisation, + unsigned int padding_top, unsigned int padding_left, + unsigned int padding_bottom, unsigned int padding_right); + + /** Create a new dilated depthwise convolution engine. + */ + QAsymm8DilatedDepthwiseConvolution( + int n_batches, int n_input_rows, int n_input_cols, int n_channels, + int dilation_factor, int n_output_rows, int n_output_cols, + nck::ActivationFunction activation, + const qasymm8::QAsymm8Params &weight_quantisation, + const qasymm8::QAsymm8Params &input_quantisation, + const qasymm8::QAsymm8Params &output_quantisation, + unsigned int padding_top, unsigned int padding_left, + unsigned int padding_bottom, unsigned int padding_right); + + /** Create a new dilated depthwise convolution engine. + */ + QAsymm8DilatedDepthwiseConvolution( + int n_batches, int n_input_rows, int n_input_cols, int n_channels, + int dilation_factor, nck::ActivationFunction activation, + const qasymm8::QAsymm8Params &weight_quantisation, + const qasymm8::QAsymm8Params &input_quantisation, + const qasymm8::QAsymm8Params &output_quantisation, + const qasymm8::QAsymm8RescaleParams &rescale_parameters, + unsigned int padding_top, unsigned int padding_left, + unsigned int padding_bottom, unsigned int padding_right); + + /** Create a new dilated depthwise convolution engine. + */ + QAsymm8DilatedDepthwiseConvolution( + int n_batches, int n_input_rows, int n_input_cols, int n_channels, + int dilation_factor, int n_output_rows, int n_output_cols, + nck::ActivationFunction activation, + const qasymm8::QAsymm8Params &weight_quantisation, + const qasymm8::QAsymm8Params &input_quantisation, + const qasymm8::QAsymm8Params &output_quantisation, + const qasymm8::QAsymm8RescaleParams& rescale_parameters, + unsigned int padding_top, unsigned int padding_left, + unsigned int padding_bottom, unsigned int padding_right); +}; + +} // namespace depthwise diff --git a/src/core/NEON/kernels/detail/NEDirectConvolutionDetail.h b/src/core/NEON/kernels/detail/NEDirectConvolutionDetail.h index d7ee70a1cd..59f5c6c6b3 100644 --- a/src/core/NEON/kernels/detail/NEDirectConvolutionDetail.h +++ b/src/core/NEON/kernels/detail/NEDirectConvolutionDetail.h @@ -25,10 +25,10 @@ #ifndef ARM_COMPUTE_NEDIRECTCONVOLUTIONDETAIL_H #define ARM_COMPUTE_NEDIRECTCONVOLUTIONDETAIL_H -#include "arm_compute/core/AccessWindowStatic.h" -#include "arm_compute/core/utils/misc/Requires.h" +#include "src/core/AccessWindowStatic.h" #include "src/core/NEON/NEFixedPoint.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "support/Requires.h" #include diff --git a/src/core/TensorInfo.cpp b/src/core/TensorInfo.cpp index c1a1c2ecf0..414c128a27 100644 --- a/src/core/TensorInfo.cpp +++ b/src/core/TensorInfo.cpp @@ -28,6 +28,7 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/Utils.h" #include "support/MemorySupport.h" using namespace arm_compute; diff --git a/src/core/helpers/AutoConfiguration.h b/src/core/helpers/AutoConfiguration.h new file mode 100644 index 0000000000..6880a6cb66 --- /dev/null +++ b/src/core/helpers/AutoConfiguration.h @@ -0,0 +1,176 @@ +/* +* Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_CORE_HELPERS_AUTOCONFIGURATION_H +#define SRC_CORE_HELPERS_AUTOCONFIGURATION_H + +#include "arm_compute/core/ITensorInfo.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +/** Auto initialize the tensor info (shape, number of channels and data type) if the current assignment is empty. + * + * @param[in,out] info Tensor info used to check and assign. + * @param[in] shape New shape. + * @param[in] num_channels New number of channels. + * @param[in] data_type New data type + * @param[in] quantization_info (Optional) New quantization info + * + * @return True if the tensor info has been initialized + */ +inline bool auto_init_if_empty(ITensorInfo &info, + const TensorShape &shape, + int num_channels, DataType data_type, + QuantizationInfo quantization_info = QuantizationInfo()) +{ + if(info.tensor_shape().total_size() == 0) + { + info.set_data_type(data_type); + info.set_num_channels(num_channels); + info.set_tensor_shape(shape); + info.set_quantization_info(quantization_info); + return true; + } + + return false; +} + +/** Auto initialize the tensor info using another tensor info. +* +* @param info_sink Tensor info used to check and assign +* @param info_source Tensor info used to assign +* +* @return True if the tensor info has been initialized +*/ +inline bool auto_init_if_empty(ITensorInfo &info_sink, const ITensorInfo &info_source) +{ + if(info_sink.tensor_shape().total_size() == 0) + { + info_sink.set_data_type(info_source.data_type()); + info_sink.set_num_channels(info_source.num_channels()); + info_sink.set_tensor_shape(info_source.tensor_shape()); + info_sink.set_quantization_info(info_source.quantization_info()); + info_sink.set_data_layout(info_source.data_layout()); + return true; + } + + return false; +} + +/** Set the shape to the specified value if the current assignment is empty. + * + * @param[in,out] info Tensor info used to check and assign. + * @param[in] shape New shape. + * + * @return True if the shape has been changed. + */ +inline bool set_shape_if_empty(ITensorInfo &info, const TensorShape &shape) +{ + if(info.tensor_shape().total_size() == 0) + { + info.set_tensor_shape(shape); + return true; + } + + return false; +} + +/** Set the format, data type and number of channels to the specified value if + * the current data type is unknown. + * + * @param[in,out] info Tensor info used to check and assign. + * @param[in] format New format. + * + * @return True if the format has been changed. + */ +inline bool set_format_if_unknown(ITensorInfo &info, Format format) +{ + if(info.data_type() == DataType::UNKNOWN) + { + info.set_format(format); + return true; + } + + return false; +} + +/** Set the data type and number of channels to the specified value if + * the current data type is unknown. + * + * @param[in,out] info Tensor info used to check and assign. + * @param[in] data_type New data type. + * + * @return True if the data type has been changed. + */ +inline bool set_data_type_if_unknown(ITensorInfo &info, DataType data_type) +{ + if(info.data_type() == DataType::UNKNOWN) + { + info.set_data_type(data_type); + return true; + } + + return false; +} + +/** Set the data layout to the specified value if + * the current data layout is unknown. + * + * @param[in,out] info Tensor info used to check and assign. + * @param[in] data_layout New data layout. + * + * @return True if the data type has been changed. + */ +inline bool set_data_layout_if_unknown(ITensorInfo &info, DataLayout data_layout) +{ + if(info.data_layout() == DataLayout::UNKNOWN) + { + info.set_data_layout(data_layout); + return true; + } + + return false; +} + +/** Set the quantization info to the specified value if + * the current quantization info is empty and the data type of asymmetric quantized type + * + * @param[in,out] info Tensor info used to check and assign. + * @param[in] quantization_info Quantization info + * + * @return True if the quantization info has been changed. + */ +inline bool set_quantization_info_if_empty(ITensorInfo &info, QuantizationInfo quantization_info) +{ + if(info.quantization_info().empty() && (is_data_type_quantized_asymmetric(info.data_type()))) + { + info.set_quantization_info(quantization_info); + return true; + } + + return false; +} +} // namespace arm_compute + +#endif /* SRC_CORE_HELPERS_AUTOCONFIGURATION_H */ diff --git a/src/core/helpers/NormalizationHelpers.h b/src/core/helpers/NormalizationHelpers.h new file mode 100644 index 0000000000..d94d5e3602 --- /dev/null +++ b/src/core/helpers/NormalizationHelpers.h @@ -0,0 +1,47 @@ +/* +* Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_CORE_HELPERS_NORMALIZATIONHELPERS_H +#define SRC_CORE_HELPERS_NORMALIZATIONHELPERS_H + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +/** Calculate the normalization dimension index for a given normalization type + * + * @param[in] layout Data layout of the input and output tensor + * @param[in] info Normalization info + * + * @return Normalization dimension index + */ +inline unsigned int get_normalization_dimension_index(DataLayout layout, const NormalizationLayerInfo &info) +{ + const unsigned int width_idx = get_data_layout_dimension_index(layout, DataLayoutDimension::WIDTH); + const unsigned int channel_idx = get_data_layout_dimension_index(layout, DataLayoutDimension::CHANNEL); + + return info.is_in_map() ? width_idx : channel_idx; +} +} // namespace arm_compute +#endif /* SRC_CORE_HELPERS_NORMALIZATIONHELPERS_H */ diff --git a/src/core/helpers/ScaleHelpers.h b/src/core/helpers/ScaleHelpers.h new file mode 100644 index 0000000000..827bbef4cd --- /dev/null +++ b/src/core/helpers/ScaleHelpers.h @@ -0,0 +1,331 @@ +/* +* Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_CORE_HELPERS_SCALEHELPERS_H +#define SRC_CORE_HELPERS_SCALEHELPERS_H + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/QuantizationInfo.h" + +#include +#include +#include +#include + +namespace arm_compute +{ +namespace scale_helpers +{ +/** Computes bilinear interpolation using the pointer to the top-left pixel and the pixel's distance between + * the real coordinates and the smallest following integer coordinates. Input must be in single channel format. + * + * @param[in] pixel_ptr Pointer to the top-left pixel value of a single channel input. + * @param[in] stride Stride to access the bottom-left and bottom-right pixel values + * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer + * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer + * + * @note dx and dy must be in the range [0, 1.0] + * + * @return The bilinear interpolated pixel value + */ +template +inline T delta_bilinear_c1(const T *pixel_ptr, size_t stride, float dx, float dy) +{ + ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr); + + const float dx1 = 1.0f - dx; + const float dy1 = 1.0f - dy; + + const T a00 = *pixel_ptr; + const T a01 = *(pixel_ptr + 1); + const T a10 = *(pixel_ptr + stride); + const T a11 = *(pixel_ptr + stride + 1); + + const float w1 = dx1 * dy1; + const float w2 = dx * dy1; + const float w3 = dx1 * dy; + const float w4 = dx * dy; + + return static_cast(a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4); +} + +/** Computes bilinear interpolation for quantized input and output, using the pointer to the top-left pixel and the pixel's distance between + * the real coordinates and the smallest following integer coordinates. Input must be QASYMM8 and in single channel format. + * + * @param[in] pixel_ptr Pointer to the top-left pixel value of a single channel input. + * @param[in] stride Stride to access the bottom-left and bottom-right pixel values + * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer + * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer + * @param[in] iq_info Input QuantizationInfo + * @param[in] oq_info Output QuantizationInfo + * + * @note dx and dy must be in the range [0, 1.0] + * + * @return The bilinear interpolated pixel value + */ +inline uint8_t delta_bilinear_c1_quantized(const uint8_t *pixel_ptr, size_t stride, float dx, float dy, + UniformQuantizationInfo iq_info, UniformQuantizationInfo oq_info) +{ + ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr); + + const float dx1 = 1.0f - dx; + const float dy1 = 1.0f - dy; + + const float a00 = dequantize_qasymm8(*pixel_ptr, iq_info); + const float a01 = dequantize_qasymm8(*(pixel_ptr + 1), iq_info); + const float a10 = dequantize_qasymm8(*(pixel_ptr + stride), iq_info); + const float a11 = dequantize_qasymm8(*(pixel_ptr + stride + 1), iq_info); + + const float w1 = dx1 * dy1; + const float w2 = dx * dy1; + const float w3 = dx1 * dy; + const float w4 = dx * dy; + float res = a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4; + return static_cast(quantize_qasymm8(res, oq_info)); +} + +/** Computes bilinear interpolation for quantized input and output, using the pointer to the top-left pixel and the pixel's distance between + * the real coordinates and the smallest following integer coordinates. Input must be QASYMM8_SIGNED and in single channel format. + * + * @param[in] pixel_ptr Pointer to the top-left pixel value of a single channel input. + * @param[in] stride Stride to access the bottom-left and bottom-right pixel values + * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer + * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer + * @param[in] iq_info Input QuantizationInfo + * @param[in] oq_info Output QuantizationInfo + * + * @note dx and dy must be in the range [0, 1.0] + * + * @return The bilinear interpolated pixel value + */ +inline int8_t delta_bilinear_c1_quantized(const int8_t *pixel_ptr, size_t stride, float dx, float dy, + UniformQuantizationInfo iq_info, UniformQuantizationInfo oq_info) +{ + ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr); + + const float dx1 = 1.0f - dx; + const float dy1 = 1.0f - dy; + + const float a00 = dequantize_qasymm8_signed(*pixel_ptr, iq_info); + const float a01 = dequantize_qasymm8_signed(*(pixel_ptr + 1), iq_info); + const float a10 = dequantize_qasymm8_signed(*(pixel_ptr + stride), iq_info); + const float a11 = dequantize_qasymm8_signed(*(pixel_ptr + stride + 1), iq_info); + + const float w1 = dx1 * dy1; + const float w2 = dx * dy1; + const float w3 = dx1 * dy; + const float w4 = dx * dy; + float res = a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4; + return static_cast(quantize_qasymm8_signed(res, oq_info)); +} + +/** Computes linear interpolation using the pointer to the top pixel and the pixel's distance between + * the real coordinates and the smallest following integer coordinates. Input must be in single channel format. + * + * @param[in] pixel_ptr Pointer to the top pixel value of a single channel input. + * @param[in] stride Stride to access the bottom pixel value + * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer + * + * @note dy must be in the range [0, 1.0] + * + * @return The linear interpolated pixel value + */ +template +inline T delta_linear_c1_y(const T *pixel_ptr, size_t stride, float dy) +{ + ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr); + + const float dy1 = 1.0f - dy; + + const T a00 = *pixel_ptr; + const T a10 = *(pixel_ptr + stride); + + const float w1 = dy1; + const float w3 = dy; + + return static_cast(a00 * w1 + a10 * w3); +} + +/** Computes linear interpolation using the pointer to the left pixel and the pixel's distance between + * the real coordinates and the smallest following integer coordinates. Input must be in single channel format. + * + * @param[in] pixel_ptr Pointer to the left pixel value of a single channel input. + * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer + * + * @note dx must be in the range [0, 1.0] + * + * @return The linear interpolated pixel value + */ +template +inline T delta_linear_c1_x(const T *pixel_ptr, float dx) +{ + ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr); + + const T a00 = *pixel_ptr; + const T a01 = *(pixel_ptr + 1); + + const float dx1 = 1.0f - dx; + + const float w1 = dx1; + const float w2 = dx; + + return static_cast(a00 * w1 + a01 * w2); +} + +/** Return the pixel at (x,y) using bilinear interpolation. + * + * @warning Only works if the iterator was created with an IImage + * + * @param[in] first_pixel_ptr Pointer to the first pixel of a single channel input. + * @param[in] stride Stride in bytes of the image; + * @param[in] x X position of the wanted pixel + * @param[in] y Y position of the wanted pixel + * + * @return The pixel at (x, y) using bilinear interpolation. + */ +template +inline T pixel_bilinear_c1(const T *first_pixel_ptr, size_t stride, float x, float y) +{ + ARM_COMPUTE_ERROR_ON(first_pixel_ptr == nullptr); + + const int32_t xi = std::floor(x); + const int32_t yi = std::floor(y); + + const float dx = x - xi; + const float dy = y - yi; + + return delta_bilinear_c1(first_pixel_ptr + xi + yi * stride, stride, dx, dy); +} + +/** Return the pixel at (x,y) using bilinear interpolation by clamping when out of borders. The image must be single channel input + * + * @warning Only works if the iterator was created with an IImage + * + * @param[in] first_pixel_ptr Pointer to the first pixel of a single channel image. + * @param[in] stride Stride in bytes of the image + * @param[in] width Width of the image + * @param[in] height Height of the image + * @param[in] x X position of the wanted pixel + * @param[in] y Y position of the wanted pixel + * + * @return The pixel at (x, y) using bilinear interpolation. + */ +template +inline uint8_t +pixel_bilinear_c1_clamp(const T *first_pixel_ptr, size_t stride, size_t width, size_t height, float x, float y) +{ + ARM_COMPUTE_ERROR_ON(first_pixel_ptr == nullptr); + + x = std::max(-1.f, std::min(x, static_cast(width))); + y = std::max(-1.f, std::min(y, static_cast(height))); + + const float xi = std::floor(x); + const float yi = std::floor(y); + + const float dx = x - xi; + const float dy = y - yi; + + if(dx == 0.0f) + { + if(dy == 0.0f) + { + return static_cast(first_pixel_ptr[static_cast(xi) + static_cast(yi) * stride]); + } + return delta_linear_c1_y(first_pixel_ptr + static_cast(xi) + static_cast(yi) * stride, + stride, dy); + } + if(dy == 0.0f) + { + return delta_linear_c1_x(first_pixel_ptr + static_cast(xi) + static_cast(yi) * stride, + dx); + } + return delta_bilinear_c1(first_pixel_ptr + static_cast(xi) + static_cast(yi) * stride, stride, + dx, dy); +} + +/** Return the pixel at (x,y) using area interpolation by clamping when out of borders. The image must be single channel U8 + * + * @note The interpolation area depends on the width and height ration of the input and output images + * @note Currently average of the contributing pixels is calculated + * + * @param[in] first_pixel_ptr Pointer to the first pixel of a single channel U8 image. + * @param[in] stride Stride in bytes of the image + * @param[in] width Width of the image + * @param[in] height Height of the image + * @param[in] wr Width ratio among the input image width and output image width. + * @param[in] hr Height ratio among the input image height and output image height. + * @param[in] x X position of the wanted pixel + * @param[in] y Y position of the wanted pixel + * + * @return The pixel at (x, y) using area interpolation. + */ +inline uint8_t +pixel_area_c1u8_clamp(const uint8_t *first_pixel_ptr, size_t stride, size_t width, size_t height, float wr, + float hr, int x, int y) +{ + ARM_COMPUTE_ERROR_ON(first_pixel_ptr == nullptr); + + // Calculate sampling position + float in_x = (x + 0.5f) * wr - 0.5f; + float in_y = (y + 0.5f) * hr - 0.5f; + + // Get bounding box offsets + int x_from = std::floor(x * wr - 0.5f - in_x); + int y_from = std::floor(y * hr - 0.5f - in_y); + int x_to = std::ceil((x + 1) * wr - 0.5f - in_x); + int y_to = std::ceil((y + 1) * hr - 0.5f - in_y); + + // Clamp position to borders + in_x = std::max(-1.f, std::min(in_x, static_cast(width))); + in_y = std::max(-1.f, std::min(in_y, static_cast(height))); + + // Clamp bounding box offsets to borders + x_from = ((in_x + x_from) < -1) ? -1 : x_from; + y_from = ((in_y + y_from) < -1) ? -1 : y_from; + x_to = ((in_x + x_to) > width) ? (width - in_x) : x_to; + y_to = ((in_y + y_to) > height) ? (height - in_y) : y_to; + + // Get pixel index + const int xi = std::floor(in_x); + const int yi = std::floor(in_y); + + // Bounding box elements in each dimension + const int x_elements = (x_to - x_from + 1); + const int y_elements = (y_to - y_from + 1); + ARM_COMPUTE_ERROR_ON(x_elements == 0 || y_elements == 0); + + // Sum pixels in area + int sum = 0; + for(int j = yi + y_from, je = yi + y_to; j <= je; ++j) + { + const uint8_t *ptr = first_pixel_ptr + j * stride + xi + x_from; + sum = std::accumulate(ptr, ptr + x_elements, sum); + } + + // Return average + return sum / (x_elements * y_elements); +} +} // namespace scale_helpers +} // namespace arm_compute + +#endif /* SRC_CORE_HELPERS_SCALEHELPERS_H */ diff --git a/src/core/helpers/SoftmaxHelpers.cpp b/src/core/helpers/SoftmaxHelpers.cpp new file mode 100644 index 0000000000..71b971af31 --- /dev/null +++ b/src/core/helpers/SoftmaxHelpers.cpp @@ -0,0 +1,45 @@ +/* +* Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "src/core/helpers/SoftmaxHelpers.h" + +namespace arm_compute +{ +namespace softmax_helpers +{ +PermutationVector get_permutation_vector_from_softmax_axis(size_t axis) +{ + switch(axis) + { + case 1: + return PermutationVector(1U, 0U, 2U, 3U); + case 2: + return PermutationVector(2U, 1U, 0U, 3U); + case 3: + return PermutationVector(3U, 1U, 2U, 0U); + default: + ARM_COMPUTE_ERROR("Axis not supported"); + } +} +} // namespace softmax_helpers +} // namespace arm_compute diff --git a/src/core/helpers/SoftmaxHelpers.h b/src/core/helpers/SoftmaxHelpers.h new file mode 100644 index 0000000000..de5490a14d --- /dev/null +++ b/src/core/helpers/SoftmaxHelpers.h @@ -0,0 +1,50 @@ +/* +* Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_CORE_HELPERS_SOFTMAXHELPERS_H +#define SRC_CORE_HELPERS_SOFTMAXHELPERS_H + +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +namespace softmax_helpers +{ +/** Given a softmax axis, this function returns the permutation vector required to put the axis to the front + * + * @note This function assumes a tensor rank <= 4 + * + * Axis selects the dimension on which softmax is performed. + * E.g. For input of shape 4x5x6 and axis=1, softmax will be applied to 4x6=24 vectors of size 5. + * Interally softmax kernels is always performed on the first dimension (front dimension), therefore permutation is + * required to put the dimension specified by @p axis to the first dimension. + * + * @param[in] axis Axis on which to perform softmax. Supported: 1, 2, 3 (0 implies no permutation needed) + * + * @return the permutation vector + */ +PermutationVector get_permutation_vector_from_softmax_axis(size_t axis); +} // namespace softmax_helpers +} // namespace arm_compute + +#endif /* SRC_CORE_HELPERS_SOFTMAXHELPERS_H */ diff --git a/src/core/helpers/Utils.h b/src/core/helpers/Utils.h new file mode 100644 index 0000000000..3c3b2b93f9 --- /dev/null +++ b/src/core/helpers/Utils.h @@ -0,0 +1,97 @@ +/* +* Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_CORE_HELPERS_UTILS_H +#define SRC_CORE_HELPERS_UTILS_H + +#include "arm_compute/core/ITensorInfo.h" + +namespace arm_compute +{ +/** Create a strides object based on the provided strides and the tensor dimensions. + * + * @param[in] info Tensor info object providing the shape of the tensor for unspecified strides. + * @param[in] stride_x Stride to be used in X dimension (in bytes). + * @param[in] fixed_strides Strides to be used in higher dimensions starting at Y (in bytes). + * + * @return Strides object based on the specified strides. Missing strides are + * calculated based on the tensor shape and the strides of lower dimensions. + */ +template +inline Strides compute_strides(const ITensorInfo &info, T stride_x, Ts &&... fixed_strides) +{ + const TensorShape &shape = info.tensor_shape(); + + // Create strides object + Strides strides(stride_x, fixed_strides...); + + for(size_t i = 1 + sizeof...(Ts); i < info.num_dimensions(); ++i) + { + strides.set(i, shape[i - 1] * strides[i - 1]); + } + + return strides; +} + +/** Create a strides object based on the tensor dimensions. + * + * @param[in] info Tensor info object used to compute the strides. + * + * @return Strides object based on element size and tensor shape. + */ +template +inline Strides compute_strides(const ITensorInfo &info) +{ + return compute_strides(info, info.element_size()); +} + +/** Given an integer value, this function returns the next power of two + * + * @param[in] x Input value + * + * @return the next power of two + */ +inline unsigned int get_next_power_two(unsigned int x) +{ + // Decrement by 1 + x--; + + // Shift right by 1 + x |= x >> 1u; + // Shift right by 2 + x |= x >> 2u; + // Shift right by 4 + x |= x >> 4u; + // Shift right by 8 + x |= x >> 8u; + // Shift right by 16 + x |= x >> 16u; + + // Increment by 1 + x++; + + return x; +} +} // namespace arm_compute + +#endif /* SRC_CORE_HELPERS_UTILS_H */ diff --git a/src/core/helpers/WindowHelpers.cpp b/src/core/helpers/WindowHelpers.cpp new file mode 100644 index 0000000000..ba10eb9775 --- /dev/null +++ b/src/core/helpers/WindowHelpers.cpp @@ -0,0 +1,183 @@ +/* +* Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "src/core/helpers/WindowHelpers.h" + +namespace arm_compute +{ +Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size) +{ + if(!skip_border) + { + border_size = BorderSize(0); + } + + const Coordinates &anchor = valid_region.anchor; + const TensorShape &shape = valid_region.shape; + + Window window; + + window.set(0, Window::Dimension( + // Skip the border left of the image + anchor[0] + border_size.left, + // Skip the border right of the image + // Make sure the window width is a multiple of the step size + anchor[0] + border_size.left + ceil_to_multiple(std::max(0, static_cast(shape[0]) - static_cast(border_size.left) - static_cast(border_size.right)), steps[0]), + steps[0])); + + size_t n = 1; + + if(anchor.num_dimensions() > 1) + { + window.set(1, Window::Dimension( + // Skip the border above the image + anchor[1] + border_size.top, + // Skip the border below the image + anchor[1] + border_size.top + ceil_to_multiple(std::max(0, static_cast(shape[1]) - static_cast(border_size.top) - static_cast(border_size.bottom)), steps[1]), + steps[1])); + + ++n; + } + + if(anchor.num_dimensions() > 2) + { + window.set(2, Window::Dimension(anchor[2], std::max(1, shape[2]), steps[2])); + + ++n; + } + + for(; n < anchor.num_dimensions(); ++n) + { + window.set(n, Window::Dimension(anchor[n], std::max(1, shape[n]))); + } + + for(; n < Coordinates::num_max_dimensions; ++n) + { + window.set(n, Window::Dimension(0, 1)); + } + + return window; +} + +Window calculate_max_enlarged_window(const ValidRegion &valid_region, const Steps &steps, BorderSize border_size) +{ + const Coordinates &anchor = valid_region.anchor; + const TensorShape &shape = valid_region.shape; + + Window window; + + window.set(0, Window::Dimension( + // move the anchor to the start from the border + anchor[0] - border_size.left, + // move the anchor to include the right end border + // Make sure the window width is a multiple of the step size + anchor[0] - border_size.left + ceil_to_multiple(shape[0] + border_size.left + border_size.right, steps[0]), + steps[0])); + + size_t n = 1; + + if(anchor.num_dimensions() > 1) + { + window.set(1, Window::Dimension( + // Include the border above the image + anchor[1] - border_size.top, + // Include the border below the image + anchor[1] - border_size.top + ceil_to_multiple(shape[1] + border_size.top + border_size.bottom, steps[1]), + steps[1])); + + ++n; + } + + if(anchor.num_dimensions() > 2) + { + window.set(2, Window::Dimension(0, std::max(1, shape[n]), steps[2])); + + ++n; + } + + for(; n < anchor.num_dimensions(); ++n) + { + window.set(n, Window::Dimension(anchor[n], std::max(1, shape[n]))); + } + + for(; n < Coordinates::num_max_dimensions; ++n) + { + window.set(n, Window::Dimension(0, 1)); + } + + return window; +} + +Window calculate_max_window_horizontal(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size) +{ + if(skip_border) + { + border_size.top = 0; + border_size.bottom = 0; + } + else + { + border_size.left = 0; + border_size.right = 0; + } + + const Coordinates &anchor = valid_region.anchor; + const TensorShape &shape = valid_region.shape; + + Window window; + + window.set(0, Window::Dimension( + // Skip the border left of the image + anchor[0] + border_size.left, + // Skip the border right of the image + // Make sure the window width is a multiple of the step size + anchor[0] + border_size.left + ceil_to_multiple(std::max(0, static_cast(shape[0]) - static_cast(border_size.left) - static_cast(border_size.right)), steps[0]), + steps[0])); + + size_t n = 1; + + if(anchor.num_dimensions() > 1) + { + window.set(1, Window::Dimension( + // Skip the border above the image + anchor[1] - border_size.top, + // Skip the border below the image + anchor[1] + shape[1] + border_size.bottom, + 1)); + + ++n; + } + + for(; n < anchor.num_dimensions(); ++n) + { + window.set(n, Window::Dimension(anchor[n], std::max(1, shape[n]))); + } + + for(; n < Coordinates::num_max_dimensions; ++n) + { + window.set(n, Window::Dimension(0, 1)); + } + + return window; +} +} // namespace arm_compute diff --git a/src/core/helpers/WindowHelpers.h b/src/core/helpers/WindowHelpers.h new file mode 100644 index 0000000000..9bc2135b6d --- /dev/null +++ b/src/core/helpers/WindowHelpers.h @@ -0,0 +1,172 @@ +/* +* Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_CORE_HELPERS_WINDOWHELPERS_H +#define SRC_CORE_HELPERS_WINDOWHELPERS_H + +#include "arm_compute/core/IAccessWindow.h" +#include "arm_compute/core/Steps.h" +#include "arm_compute/core/Window.h" + +namespace arm_compute +{ +/** Update window and padding size for each of the access patterns. + * + * First the window size is reduced based on all access patterns that are not + * allowed to modify the padding of the underlying tensor. Then the padding of + * the remaining tensors is increased to match the window. + * + * @param[in] win Window that is used by the kernel. + * @param[in] patterns Access patterns used to calculate the final window and padding. + * + * @return True if the window has been changed. Changes to the padding do not + * influence the returned value. + */ +template +bool update_window_and_padding(Window &win, Ts &&... patterns) +{ + bool window_changed = false; + + utility::for_each([&](const IAccessWindow & w) + { + window_changed |= w.update_window_if_needed(win); + }, + patterns...); + + bool padding_changed = false; + + utility::for_each([&](IAccessWindow & w) + { + padding_changed |= w.update_padding_if_needed(win); + }, + patterns...); + + return window_changed; +} + +/** Intersect multiple valid regions. + * + * @param[in] regions Valid regions. + * + * @return Intersection of all regions. + */ +template +ValidRegion intersect_valid_regions(const Ts &... regions) +{ + auto intersect = [](const ValidRegion & r1, const ValidRegion & r2) -> ValidRegion + { + ValidRegion region; + + for(size_t d = 0; d < std::min(r1.anchor.num_dimensions(), r2.anchor.num_dimensions()); ++d) + { + region.anchor.set(d, std::max(r1.anchor[d], r2.anchor[d])); + } + + for(size_t d = 0; d < std::min(r1.shape.num_dimensions(), r2.shape.num_dimensions()); ++d) + { + region.shape.set(d, std::min(r1.shape[d], r2.shape[d])); + } + + return region; + }; + + return utility::foldl(intersect, regions...); +} + +#ifndef DOXYGEN_SKIP_THIS +/** Calculate the maximum window for a given tensor shape and border setting + * + * @param[in] valid_region Valid region object defining the shape of the tensor space for which the window is created. + * @param[in] steps (Optional) Number of elements processed for each step. + * @param[in] skip_border (Optional) If true exclude the border region from the window. + * @param[in] border_size (Optional) Border size. + * + * @return The maximum window the kernel can be executed on. + */ +Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize()); + +/** Calculate the maximum window for a given tensor shape and border setting + * + * @param[in] info Tensor info object defining the shape of the object for which the window is created. + * @param[in] steps (Optional) Number of elements processed for each step. + * @param[in] skip_border (Optional) If true exclude the border region from the window. + * @param[in] border_size (Optional) Border size. + * + * @return The maximum window the kernel can be executed on. + */ +inline Window calculate_max_window(const ITensorInfo &info, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize()) +{ + return calculate_max_window(info.valid_region(), steps, skip_border, border_size); +} + +/** Calculate the maximum window used by a horizontal kernel for a given tensor shape and border setting + * + * @param[in] valid_region Valid region object defining the shape of the tensor space for which the window is created. + * @param[in] steps (Optional) Number of elements processed for each step. + * @param[in] skip_border (Optional) If true exclude the border region from the window. + * @param[in] border_size (Optional) Border size. The border region will be excluded from the window. + * + * @return The maximum window the kernel can be executed on. + */ +Window calculate_max_window_horizontal(const ValidRegion &valid_region, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize()); + +/** Calculate the maximum window used by a horizontal kernel for a given tensor shape and border setting + * + * @param[in] info Tensor info object defining the shape of the object for which the window is created. + * @param[in] steps (Optional) Number of elements processed for each step. + * @param[in] skip_border (Optional) If true exclude the border region from the window. + * @param[in] border_size (Optional) Border size. + * + * @return The maximum window the kernel can be executed on. + */ +inline Window calculate_max_window_horizontal(const ITensorInfo &info, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize()) +{ + return calculate_max_window_horizontal(info.valid_region(), steps, skip_border, border_size); +} + +/** Calculate the maximum window for a given tensor shape and border setting. The window will also includes the border. + * + * @param[in] valid_region Valid region object defining the shape of the tensor space for which the window is created. + * @param[in] steps (Optional) Number of elements processed for each step. + * @param[in] border_size (Optional) Border size. The border region will be included in the window. + * + * @return The maximum window the kernel can be executed on. + */ +Window calculate_max_enlarged_window(const ValidRegion &valid_region, const Steps &steps = Steps(), BorderSize border_size = BorderSize()); + +/** Calculate the maximum window for a given tensor shape and border setting. The window will also includes the border. + * + * @param[in] info Tensor info object defining the shape of the object for which the window is created. + * @param[in] steps (Optional) Number of elements processed for each step. + * @param[in] border_size (Optional) Border size. The border region will be included in the window. + * + * @return The maximum window the kernel can be executed on. + */ +inline Window calculate_max_enlarged_window(const ITensorInfo &info, const Steps &steps = Steps(), BorderSize border_size = BorderSize()) +{ + return calculate_max_enlarged_window(info.valid_region(), steps, border_size); +} +#endif /* DOXYGEN_SKIP_THIS */ +} // namespace arm_compute + +#endif /* SRC_CORE_HELPERS_WINDOWHELPERS_H */ diff --git a/src/core/utils/helpers/bit_ops.h b/src/core/utils/helpers/bit_ops.h new file mode 100644 index 0000000000..ef60214c9f --- /dev/null +++ b/src/core/utils/helpers/bit_ops.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_UTILS_HELPERS_BIT_OPS_H +#define ARM_COMPUTE_UTILS_HELPERS_BIT_OPS_H + +#include "support/Requires.h" + +#include + +namespace arm_compute +{ +namespace helpers +{ +namespace bit_ops +{ +/** Checks if the idx-th bit is set in an integral type + * + * @param[in] v Integral input + * @param[in] idx Index of the bit to check + * + * @return True if the idx-th bit is set else false + */ +template ::value)> +bool is_bit_set(T v, unsigned int idx) +{ + return (v & 1 << idx) != 0; +} +} // namespace bit_ops +} // namespace helpers +} // namespace arm_compute +#endif /* ARM_COMPUTE_UTILS_HELPERS_BIT_OPS_H */ diff --git a/src/core/utils/helpers/fft.cpp b/src/core/utils/helpers/fft.cpp index 4c2f8fa494..64633c643d 100644 --- a/src/core/utils/helpers/fft.cpp +++ b/src/core/utils/helpers/fft.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/utils/helpers/fft.h" +#include "src/core/utils/helpers/fft.h" #include diff --git a/src/core/utils/helpers/fft.h b/src/core/utils/helpers/fft.h new file mode 100644 index 0000000000..f7b99dd7b8 --- /dev/null +++ b/src/core/utils/helpers/fft.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_UTILS_HELPERS_FFT_H +#define ARM_COMPUTE_UTILS_HELPERS_FFT_H + +#include +#include + +namespace arm_compute +{ +namespace helpers +{ +namespace fft +{ +/** Decompose a given 1D input size using the provided supported factors. + * + * @param[in] N Input size to be decomposed. + * @param[in] supported_factors Supported factors that can be used for decomposition. + * + * @return A vector with the stages of the decomposition. Will be empty if decomposition failed. + */ +std::vector decompose_stages(unsigned int N, const std::set &supported_factors); +/** Calculate digit reverse index vector given fft size and the decomposed stages + * + * @param N Input size to calculate digit reverse for + * @param fft_stages A vector with the FFT decomposed stages + * + * @return A vector with the digit reverse indices. Will be empty if it failed. + */ +std::vector digit_reverse_indices(unsigned int N, const std::vector &fft_stages); +} // namespace fft +} // namespace helpers +} // namespace arm_compute +#endif /* ARM_COMPUTE_UTILS_HELPERS_FFT_H */ diff --git a/src/core/utils/helpers/float_ops.h b/src/core/utils/helpers/float_ops.h new file mode 100644 index 0000000000..a475a23b59 --- /dev/null +++ b/src/core/utils/helpers/float_ops.h @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_UTILS_HELPERS_FLOAT_OPS_H +#define ARM_COMPUTE_UTILS_HELPERS_FLOAT_OPS_H + +namespace arm_compute +{ +namespace helpers +{ +namespace float_ops +{ +union RawFloat +{ + /** Constructor + * + * @param[in] val Floating-point value + */ + explicit RawFloat(float val) + : f32(val) + { + } + /** Extract sign of floating point number + * + * @return Sign of floating point number + */ + int32_t sign() const + { + return i32 >> 31; + } + /** Extract exponent of floating point number + * + * @return Exponent of floating point number + */ + int32_t exponent() const + { + return (i32 >> 23) & 0xFF; + } + /** Extract mantissa of floating point number + * + * @return Mantissa of floating point number + */ + int32_t mantissa() const + { + return i32 & 0x007FFFFF; + } + + int32_t i32; + float f32; +}; + +/** Checks if two floating point numbers are equal given an allowed number of ULPs + * + * @param[in] a First number to compare + * @param[in] b Second number to compare + * @param[in] max_allowed_ulps (Optional) Number of allowed ULPs + * + * @return True if number is close else false + */ +inline bool is_equal_ulps(float a, float b, int max_allowed_ulps = 0) +{ + RawFloat ra(a); + RawFloat rb(b); + + // Check ULP distance + const int ulps = std::abs(ra.i32 - rb.i32); + return ulps <= max_allowed_ulps; +} + +/** Checks if the input floating point number is 1.0f checking if the difference is within a range defined with epsilon + * + * @param[in] a Input floating point number + * @param[in] epsilon (Optional) Epsilon used to define the error bounds + * + * @return True if number is close to 1.0f + */ +inline bool is_one(float a, float epsilon = 0.00001f) +{ + return std::abs(1.0f - a) <= epsilon; +} + +/** Checks if the input floating point number is 0.0f checking if the difference is within a range defined with epsilon + * + * @param[in] a Input floating point number + * @param[in] epsilon (Optional) Epsilon used to define the error bounds + * + * @return True if number is close to 0.0f + */ +inline bool is_zero(float a, float epsilon = 0.00001f) +{ + return std::abs(0.0f - a) <= epsilon; +} +} // namespace float_ops +} // namespace helpers +} // namespace arm_compute +#endif /* ARM_COMPUTE_UTILS_HELPERS_FLOAT_OPS_H */ diff --git a/src/core/utils/helpers/tensor_info.h b/src/core/utils/helpers/tensor_info.h new file mode 100644 index 0000000000..9279532e2a --- /dev/null +++ b/src/core/utils/helpers/tensor_info.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_UTILS_HELPERS_TENSOR_INFO_H +#define ARM_COMPUTE_UTILS_HELPERS_TENSOR_INFO_H + +#include "arm_compute/core/ITensorInfo.h" + +namespace arm_compute +{ +namespace helpers +{ +namespace tensor_info +{ +/** Checks if the quantization info of given tensors are different + * + * @param tensor_info_1 Tensor info of the first tensor + * @param tensor_info_2 Tensor info of the second tensor + * @param tensor_infos Tensor infos of the rest tensors + * + * @return True if tensors have mismatching quantization info else false. + */ +template +inline bool tensors_have_different_quantization_info(const ITensorInfo *tensor_info_1, const ITensorInfo *tensor_info_2, Ts... tensor_infos) +{ + const QuantizationInfo first_quantization_info = tensor_info_1->quantization_info(); + + const std::array < const ITensorInfo *, 1 + sizeof...(Ts) > tensor_infos_array{ { tensor_info_2, std::forward(tensor_infos)... } }; + return std::any_of(tensor_infos_array.begin(), tensor_infos_array.end(), [&](const ITensorInfo * tensor_info) + { + return tensor_info->quantization_info() != first_quantization_info; + }); +} +} // namespace tensor_info +} // namespace helpers +} // namespace arm_compute +#endif /* ARM_COMPUTE_UTILS_HELPERS_TENSOR_INFO_H */ diff --git a/src/core/utils/helpers/tensor_transform.cpp b/src/core/utils/helpers/tensor_transform.cpp index 84302ea19f..f2216995a9 100644 --- a/src/core/utils/helpers/tensor_transform.cpp +++ b/src/core/utils/helpers/tensor_transform.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,7 +23,7 @@ */ #include "arm_compute/core/utils/helpers/tensor_transform.h" -#include "arm_compute/core/utils/helpers/bit_ops.h" +#include "bit_ops.h" namespace arm_compute { diff --git a/src/graph/algorithms/TopologicalSort.cpp b/src/graph/algorithms/TopologicalSort.cpp index 3647e13e92..3a69352471 100644 --- a/src/graph/algorithms/TopologicalSort.cpp +++ b/src/graph/algorithms/TopologicalSort.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,7 +25,7 @@ #include "arm_compute/graph/Graph.h" -#include "arm_compute/core/utils/misc/Iterable.h" +#include "support/Iterable.h" #include #include @@ -185,4 +185,4 @@ std::vector dfs(Graph &g) return dfs_order_vector; } } // namespace graph -} // namespace arm_compute \ No newline at end of file +} // namespace arm_compute diff --git a/src/graph/backends/CL/CLFunctionsFactory.cpp b/src/graph/backends/CL/CLFunctionsFactory.cpp index 5f2f46f72a..cd732553be 100644 --- a/src/graph/backends/CL/CLFunctionsFactory.cpp +++ b/src/graph/backends/CL/CLFunctionsFactory.cpp @@ -23,12 +23,12 @@ */ #include "arm_compute/graph/backends/CL/CLFunctionFactory.h" -#include "arm_compute/core/utils/misc/Cast.h" #include "arm_compute/graph/Graph.h" #include "arm_compute/graph/GraphContext.h" #include "arm_compute/graph/backends/FunctionHelpers.h" #include "arm_compute/runtime/CL/CLFunctions.h" #include "arm_compute/runtime/CPP/CPPFunctions.h" +#include "support/Cast.h" using namespace arm_compute::utils::cast; diff --git a/src/graph/backends/CL/CLNodeValidator.cpp b/src/graph/backends/CL/CLNodeValidator.cpp index 8b2ecaf20e..8c1fedd93f 100644 --- a/src/graph/backends/CL/CLNodeValidator.cpp +++ b/src/graph/backends/CL/CLNodeValidator.cpp @@ -26,9 +26,9 @@ #include "arm_compute/graph/backends/ValidateHelpers.h" #include "arm_compute/graph/nodes/Nodes.h" -#include "arm_compute/core/utils/misc/Cast.h" #include "arm_compute/runtime/CL/CLFunctions.h" #include "arm_compute/runtime/CPP/CPPFunctions.h" +#include "support/Cast.h" using namespace arm_compute::utils::cast; diff --git a/src/graph/backends/CL/CLSubTensorHandle.cpp b/src/graph/backends/CL/CLSubTensorHandle.cpp index ada0d686ed..b97d25890a 100644 --- a/src/graph/backends/CL/CLSubTensorHandle.cpp +++ b/src/graph/backends/CL/CLSubTensorHandle.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,7 +23,7 @@ */ #include "arm_compute/graph/backends/CL/CLSubTensorHandle.h" -#include "arm_compute/core/utils/misc/Cast.h" +#include "support/Cast.h" namespace arm_compute { diff --git a/src/graph/backends/GLES/GCFunctionsFactory.cpp b/src/graph/backends/GLES/GCFunctionsFactory.cpp index 8ecb593e11..7d9d388ebe 100644 --- a/src/graph/backends/GLES/GCFunctionsFactory.cpp +++ b/src/graph/backends/GLES/GCFunctionsFactory.cpp @@ -23,11 +23,11 @@ */ #include "arm_compute/graph/backends/GLES/GCFunctionFactory.h" -#include "arm_compute/core/utils/misc/Cast.h" #include "arm_compute/graph/Graph.h" #include "arm_compute/graph/GraphContext.h" #include "arm_compute/graph/backends/FunctionHelpers.h" #include "arm_compute/runtime/GLES_COMPUTE/GCFunctions.h" +#include "support/Cast.h" using namespace arm_compute::utils::cast; diff --git a/src/graph/backends/GLES/GCNodeValidator.cpp b/src/graph/backends/GLES/GCNodeValidator.cpp index 159e51246a..13a93a2556 100644 --- a/src/graph/backends/GLES/GCNodeValidator.cpp +++ b/src/graph/backends/GLES/GCNodeValidator.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,8 +26,8 @@ #include "arm_compute/graph/backends/ValidateHelpers.h" #include "arm_compute/graph/nodes/Nodes.h" -#include "arm_compute/core/utils/misc/Cast.h" #include "arm_compute/runtime/GLES_COMPUTE/GCFunctions.h" +#include "support/Cast.h" using namespace arm_compute::utils::cast; diff --git a/src/graph/backends/NEON/NEFunctionFactory.cpp b/src/graph/backends/NEON/NEFunctionFactory.cpp index 61df45d8d0..95c6631830 100644 --- a/src/graph/backends/NEON/NEFunctionFactory.cpp +++ b/src/graph/backends/NEON/NEFunctionFactory.cpp @@ -23,7 +23,6 @@ */ #include "arm_compute/graph/backends/NEON/NEFunctionFactory.h" -#include "arm_compute/core/utils/misc/Cast.h" #include "arm_compute/graph/Graph.h" #include "arm_compute/graph/GraphContext.h" #include "arm_compute/graph/Logger.h" @@ -33,6 +32,7 @@ #include "arm_compute/graph/nodes/Nodes.h" #include "arm_compute/runtime/CPP/CPPFunctions.h" #include "arm_compute/runtime/NEON/NEFunctions.h" +#include "support/Cast.h" #include "support/ToolchainSupport.h" using namespace arm_compute::utils::cast; diff --git a/src/graph/backends/NEON/NENodeValidator.cpp b/src/graph/backends/NEON/NENodeValidator.cpp index 19c96eab50..63e8ff910f 100644 --- a/src/graph/backends/NEON/NENodeValidator.cpp +++ b/src/graph/backends/NEON/NENodeValidator.cpp @@ -26,9 +26,9 @@ #include "arm_compute/graph/backends/ValidateHelpers.h" #include "arm_compute/graph/nodes/Nodes.h" -#include "arm_compute/core/utils/misc/Cast.h" #include "arm_compute/runtime/CPP/CPPFunctions.h" #include "arm_compute/runtime/NEON/NEFunctions.h" +#include "support/Cast.h" using namespace arm_compute::utils::cast; diff --git a/src/graph/backends/NEON/NETensorHandle.cpp b/src/graph/backends/NEON/NETensorHandle.cpp index c8fc3f1ae2..4393156e8a 100644 --- a/src/graph/backends/NEON/NETensorHandle.cpp +++ b/src/graph/backends/NEON/NETensorHandle.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,8 +23,8 @@ */ #include "arm_compute/graph/backends/NEON/NETensorHandle.h" -#include "arm_compute/core/utils/misc/Cast.h" #include "arm_compute/runtime/MemoryGroup.h" +#include "support/Cast.h" namespace arm_compute { diff --git a/src/graph/detail/CrossLayerMemoryManagerHelpers.cpp b/src/graph/detail/CrossLayerMemoryManagerHelpers.cpp index fd16625780..b45f453f23 100644 --- a/src/graph/detail/CrossLayerMemoryManagerHelpers.cpp +++ b/src/graph/detail/CrossLayerMemoryManagerHelpers.cpp @@ -33,7 +33,7 @@ #include "arm_compute/graph/backends/BackendRegistry.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/utils/misc/Cast.h" +#include "support/Cast.h" #include #include diff --git a/src/graph/mutators/DepthConcatSubTensorMutator.cpp b/src/graph/mutators/DepthConcatSubTensorMutator.cpp index fa63f5625b..963b948432 100644 --- a/src/graph/mutators/DepthConcatSubTensorMutator.cpp +++ b/src/graph/mutators/DepthConcatSubTensorMutator.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,8 +30,8 @@ #include "arm_compute/graph/backends/BackendRegistry.h" #include "arm_compute/graph/nodes/ConcatenateLayerNode.h" -#include "arm_compute/core/utils/misc/Cast.h" -#include "arm_compute/core/utils/misc/Iterable.h" +#include "support/Cast.h" +#include "support/Iterable.h" namespace arm_compute { diff --git a/src/graph/mutators/GroupedConvolutionMutator.cpp b/src/graph/mutators/GroupedConvolutionMutator.cpp index e3d3812c1d..b7c551ce8b 100644 --- a/src/graph/mutators/GroupedConvolutionMutator.cpp +++ b/src/graph/mutators/GroupedConvolutionMutator.cpp @@ -30,7 +30,7 @@ #include "arm_compute/graph/backends/BackendRegistry.h" #include "arm_compute/graph/nodes/Nodes.h" -#include "arm_compute/core/utils/misc/Cast.h" +#include "support/Cast.h" #include "support/StringSupport.h" diff --git a/src/graph/mutators/NodeExecutionMethodMutator.cpp b/src/graph/mutators/NodeExecutionMethodMutator.cpp index 48bb9f7fc0..09a3cf50c0 100644 --- a/src/graph/mutators/NodeExecutionMethodMutator.cpp +++ b/src/graph/mutators/NodeExecutionMethodMutator.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -29,7 +29,7 @@ #include "arm_compute/graph/backends/BackendRegistry.h" #include "arm_compute/graph/nodes/Nodes.h" -#include "arm_compute/core/utils/misc/Cast.h" +#include "support/Cast.h" namespace arm_compute { diff --git a/src/graph/mutators/NodeFusionMutator.cpp b/src/graph/mutators/NodeFusionMutator.cpp index 2a80825b36..1d47668cf2 100644 --- a/src/graph/mutators/NodeFusionMutator.cpp +++ b/src/graph/mutators/NodeFusionMutator.cpp @@ -30,7 +30,7 @@ #include "arm_compute/graph/nodes/FusedConvolutionBatchNormalizationNode.h" #include "arm_compute/graph/nodes/Nodes.h" -#include "arm_compute/core/utils/misc/Cast.h" +#include "support/Cast.h" #include @@ -300,11 +300,12 @@ IGraphMutator::MutationType NodeFusionMutator::type() const void NodeFusionMutator::mutate(Graph &g) { // Supported activations when fusing - const std::set supported_fused_activations = { Activation::ABS, Activation::BOUNDED_RELU, Activation::ELU, - Activation::HARD_SWISH, Activation::IDENTITY, Activation::LEAKY_RELU, - Activation::LINEAR, Activation::LOGISTIC, Activation::LU_BOUNDED_RELU, - Activation::RELU, Activation::SOFT_RELU, Activation::SQRT, - Activation::SQUARE, Activation::TANH }; + const std::set supported_fused_activations = { Activation::ABS, Activation::BOUNDED_RELU, Activation::ELU, + Activation::HARD_SWISH, Activation::IDENTITY, Activation::LEAKY_RELU, + Activation::LINEAR, Activation::LOGISTIC, Activation::LU_BOUNDED_RELU, + Activation::RELU, Activation::SOFT_RELU, Activation::SQRT, + Activation::SQUARE, Activation::TANH + }; // Preconditions auto empty_prec = [](INode &) diff --git a/src/graph/mutators/SplitLayerSubTensorMutator.cpp b/src/graph/mutators/SplitLayerSubTensorMutator.cpp index 359bba47ef..2c28a1a2d1 100644 --- a/src/graph/mutators/SplitLayerSubTensorMutator.cpp +++ b/src/graph/mutators/SplitLayerSubTensorMutator.cpp @@ -30,8 +30,8 @@ #include "arm_compute/graph/backends/BackendRegistry.h" #include "arm_compute/graph/nodes/SplitLayerNode.h" -#include "arm_compute/core/utils/misc/Cast.h" -#include "arm_compute/core/utils/misc/Iterable.h" +#include "support/Cast.h" +#include "support/Iterable.h" namespace arm_compute { diff --git a/src/graph/mutators/SyntheticDataTypeMutator.cpp b/src/graph/mutators/SyntheticDataTypeMutator.cpp index dbbebdfb2b..532c0e821b 100644 --- a/src/graph/mutators/SyntheticDataTypeMutator.cpp +++ b/src/graph/mutators/SyntheticDataTypeMutator.cpp @@ -29,7 +29,7 @@ #include "arm_compute/graph/Utils.h" #include "arm_compute/graph/nodes/Nodes.h" -#include "arm_compute/core/utils/misc/Cast.h" +#include "support/Cast.h" #include diff --git a/src/runtime/CL/CLHelpers.cpp b/src/runtime/CL/CLHelpers.cpp index adfdc3c917..5f1842f76d 100644 --- a/src/runtime/CL/CLHelpers.cpp +++ b/src/runtime/CL/CLHelpers.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,6 +26,7 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/Error.h" +#include "arm_compute/core/Validate.h" #include "arm_compute/runtime/CL/CLRuntimeContext.h" namespace diff --git a/src/runtime/CL/CLMemory.cpp b/src/runtime/CL/CLMemory.cpp index efbc68f50e..a1743c56e6 100644 --- a/src/runtime/CL/CLMemory.cpp +++ b/src/runtime/CL/CLMemory.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,7 +24,7 @@ #include "arm_compute/runtime/CL/CLMemory.h" #include "arm_compute/core/Error.h" -#include "arm_compute/core/utils/misc/Cast.h" +#include "support/Cast.h" namespace arm_compute { diff --git a/src/runtime/CL/CLRuntimeContext.cpp b/src/runtime/CL/CLRuntimeContext.cpp index 2fc7f93adf..571e30931c 100644 --- a/src/runtime/CL/CLRuntimeContext.cpp +++ b/src/runtime/CL/CLRuntimeContext.cpp @@ -26,6 +26,8 @@ #include "arm_compute/runtime/CL/CLHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "support/MemorySupport.h" + namespace arm_compute { CLRuntimeContext::CLRuntimeContext() diff --git a/src/runtime/CL/CLTensorAllocator.cpp b/src/runtime/CL/CLTensorAllocator.cpp index 90d77883f6..f37fc779fe 100644 --- a/src/runtime/CL/CLTensorAllocator.cpp +++ b/src/runtime/CL/CLTensorAllocator.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -28,6 +28,8 @@ #include "arm_compute/runtime/CL/CLRuntimeContext.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "support/MemorySupport.h" + namespace arm_compute { const cl::Buffer CLTensorAllocator::_empty_buffer = cl::Buffer(); diff --git a/src/runtime/CL/functions/CLArgMinMaxLayer.cpp b/src/runtime/CL/functions/CLArgMinMaxLayer.cpp index ad6e7ba97b..57c4f685f6 100644 --- a/src/runtime/CL/functions/CLArgMinMaxLayer.cpp +++ b/src/runtime/CL/functions/CLArgMinMaxLayer.cpp @@ -24,13 +24,14 @@ #include "arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/runtime/Utils.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/runtime/Utils.h" namespace arm_compute { @@ -47,7 +48,7 @@ Status CLArgMinMaxLayer::validate(const ITensorInfo *input, int axis, const ITen ARM_COMPUTE_RETURN_ERROR_ON_MSG(op != ReductionOperation::ARG_IDX_MAX && op != ReductionOperation::ARG_IDX_MIN, "Invalid reduction operation"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= static_cast(TensorShape::num_max_dimensions), "Reduction axis greater than max number of dimensions"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis > 3, "Unsupported reduction axis"); - const unsigned int num_of_stages = calculate_number_of_stages_only_x_axis(input->dimension(0), axis); + const unsigned int num_of_stages = utils::calculate_number_of_stages_only_x_axis(input->dimension(0), axis); DataType output_data_type = DataType::S32; TensorInfo not_reshaped_output; @@ -115,7 +116,7 @@ void CLArgMinMaxLayer::configure(const ICLTensor *input, int axis, ICLTensor *ou void CLArgMinMaxLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, int axis, ICLTensor *output, const ReductionOperation &op) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - _num_of_stages = calculate_number_of_stages_only_x_axis(input->info()->dimension(0), axis); + _num_of_stages = utils::calculate_number_of_stages_only_x_axis(input->info()->dimension(0), axis); _reduction_axis = axis; const TensorShape output_shape = arm_compute::misc::shape_calculator::compute_reduced_shape(input->info()->tensor_shape(), axis, false); @@ -172,4 +173,4 @@ void CLArgMinMaxLayer::run() } _reshape.run(); } -} // namespace arm_compute \ No newline at end of file +} // namespace arm_compute diff --git a/src/runtime/CL/functions/CLConcatenateLayer.cpp b/src/runtime/CL/functions/CLConcatenateLayer.cpp index 4214813446..2eb310b893 100644 --- a/src/runtime/CL/functions/CLConcatenateLayer.cpp +++ b/src/runtime/CL/functions/CLConcatenateLayer.cpp @@ -36,6 +36,7 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" +#include "src/core/helpers/AutoConfiguration.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/CL/functions/CLConvertFullyConnectedWeights.cpp b/src/runtime/CL/functions/CLConvertFullyConnectedWeights.cpp index 4c787673b5..b291ae5b88 100644 --- a/src/runtime/CL/functions/CLConvertFullyConnectedWeights.cpp +++ b/src/runtime/CL/functions/CLConvertFullyConnectedWeights.cpp @@ -23,6 +23,8 @@ */ #include "arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h" +#include "support/MemorySupport.h" + namespace arm_compute { void CLConvertFullyConnectedWeights::configure(const ICLTensor *input, ICLTensor *output, const TensorShape &original_input_shape, diff --git a/src/runtime/CL/functions/CLConvolutionLayer.cpp b/src/runtime/CL/functions/CLConvolutionLayer.cpp index 630352e4e6..85355f0f17 100644 --- a/src/runtime/CL/functions/CLConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLConvolutionLayer.cpp @@ -30,6 +30,8 @@ #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "support/MemorySupport.h" + #include #include #include diff --git a/src/runtime/CL/functions/CLCropResize.cpp b/src/runtime/CL/functions/CLCropResize.cpp index 529f7bfb3e..6167e9de0a 100644 --- a/src/runtime/CL/functions/CLCropResize.cpp +++ b/src/runtime/CL/functions/CLCropResize.cpp @@ -25,6 +25,10 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" + +#include "support/MemorySupport.h" #include diff --git a/src/runtime/CL/functions/CLDeconvolutionLayer.cpp b/src/runtime/CL/functions/CLDeconvolutionLayer.cpp index cd55336d9a..e6717b6d01 100644 --- a/src/runtime/CL/functions/CLDeconvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLDeconvolutionLayer.cpp @@ -29,6 +29,8 @@ #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "support/MemorySupport.h" + #include #include #include diff --git a/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp b/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp index c1055dda36..07e7a18941 100644 --- a/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp @@ -43,7 +43,7 @@ void CLDirectConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weig } void CLDirectConvolutionLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, - const PadStrideInfo &conv_info, + const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info) { // Set GPU target diff --git a/src/runtime/CL/functions/CLDirectDeconvolutionLayer.cpp b/src/runtime/CL/functions/CLDirectDeconvolutionLayer.cpp index 3515c25d82..0ffafa0221 100644 --- a/src/runtime/CL/functions/CLDirectDeconvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLDirectDeconvolutionLayer.cpp @@ -28,6 +28,7 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/helpers/AutoConfiguration.h" #include #include diff --git a/src/runtime/CL/functions/CLFFT1D.cpp b/src/runtime/CL/functions/CLFFT1D.cpp index 7d15d33ab5..1269cba90d 100644 --- a/src/runtime/CL/functions/CLFFT1D.cpp +++ b/src/runtime/CL/functions/CLFFT1D.cpp @@ -25,8 +25,8 @@ #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/helpers/fft.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/utils/helpers/fft.h" namespace arm_compute { diff --git a/src/runtime/CL/functions/CLFFTConvolutionLayer.cpp b/src/runtime/CL/functions/CLFFTConvolutionLayer.cpp index 1def674bb6..4d0eab81ee 100644 --- a/src/runtime/CL/functions/CLFFTConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLFFTConvolutionLayer.cpp @@ -26,10 +26,13 @@ #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/helpers/fft.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLScheduler.h" #include "arm_compute/runtime/CPP/CPPScheduler.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/utils/helpers/fft.h" + +#include "support/MemorySupport.h" namespace arm_compute { diff --git a/src/runtime/CL/functions/CLFill.cpp b/src/runtime/CL/functions/CLFill.cpp index 6c0f1786f0..a89383ec31 100644 --- a/src/runtime/CL/functions/CLFill.cpp +++ b/src/runtime/CL/functions/CLFill.cpp @@ -26,6 +26,8 @@ #include "arm_compute/core/CL/kernels/CLMemsetKernel.h" #include "arm_compute/core/Types.h" +#include "support/MemorySupport.h" + #include namespace arm_compute diff --git a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp index 4f365b6a61..75e87c382b 100644 --- a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp +++ b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp @@ -25,10 +25,10 @@ #include "arm_compute/core/Size2D.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/misc/Cast.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "support/Cast.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/CL/functions/CLGEMM.cpp b/src/runtime/CL/functions/CLGEMM.cpp index d56b341abf..ccae6713a6 100644 --- a/src/runtime/CL/functions/CLGEMM.cpp +++ b/src/runtime/CL/functions/CLGEMM.cpp @@ -23,10 +23,7 @@ */ #include "arm_compute/runtime/CL/functions/CLGEMM.h" -#include "arm_compute/core/CL/ICLGEMMKernelConfiguration.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfiguration.h" -#include "arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/GPUTarget.h" #include "arm_compute/core/Helpers.h" @@ -35,12 +32,18 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/helpers/float_ops.h" -#include "arm_compute/core/utils/misc/Cast.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLScheduler.h" -#include "arm_compute/runtime/CL/gemm/CLGEMMKernelSelection.h" #include "arm_compute/runtime/ITensorAllocator.h" +#include "src/core/CL/ICLGEMMKernelConfiguration.h" +#include "src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfiguration.h" +#include "src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/utils/helpers/float_ops.h" +#include "src/runtime/CL/gemm/CLGEMMKernelSelection.h" +#include "support/Cast.h" + +#include "support/MemorySupport.h" namespace arm_compute { diff --git a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp index ee90b39c2b..e871b39805 100644 --- a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp @@ -27,10 +27,11 @@ #include "arm_compute/core/Size2D.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/misc/Cast.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "support/Cast.h" #include #include diff --git a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp index 30dce5b8fe..7a8de6c1f5 100644 --- a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp +++ b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp @@ -24,8 +24,6 @@ #include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfiguration.h" -#include "arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/KernelDescriptors.h" @@ -35,7 +33,10 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" -#include "arm_compute/runtime/CL/gemm/CLGEMMKernelSelection.h" +#include "src/core/CL/gemm/native/CLGEMMNativeKernelConfiguration.h" +#include "src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/runtime/CL/gemm/CLGEMMKernelSelection.h" namespace arm_compute { diff --git a/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp b/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp index 45dc402449..5291de074a 100644 --- a/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp +++ b/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp @@ -25,6 +25,7 @@ #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Types.h" +#include "src/core/helpers/AutoConfiguration.h" namespace arm_compute { diff --git a/src/runtime/CL/functions/CLInstanceNormalizationLayer.cpp b/src/runtime/CL/functions/CLInstanceNormalizationLayer.cpp index fce1fe43a2..4a60ee9d08 100644 --- a/src/runtime/CL/functions/CLInstanceNormalizationLayer.cpp +++ b/src/runtime/CL/functions/CLInstanceNormalizationLayer.cpp @@ -26,6 +26,8 @@ #include "arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernel.h" #include "arm_compute/core/Types.h" +#include "support/MemorySupport.h" + namespace arm_compute { CLInstanceNormalizationLayer::CLInstanceNormalizationLayer() diff --git a/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp b/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp index e30b1dbb86..76a531b1c9 100644 --- a/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp +++ b/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp @@ -27,6 +27,7 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" +#include "src/core/helpers/AutoConfiguration.h" #include diff --git a/src/runtime/CL/functions/CLPriorBoxLayer.cpp b/src/runtime/CL/functions/CLPriorBoxLayer.cpp index 1907c7cc08..fefbff639d 100644 --- a/src/runtime/CL/functions/CLPriorBoxLayer.cpp +++ b/src/runtime/CL/functions/CLPriorBoxLayer.cpp @@ -31,6 +31,8 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "support/MemorySupport.h" + using namespace arm_compute; CLPriorBoxLayer::CLPriorBoxLayer() diff --git a/src/runtime/CL/functions/CLQLSTMLayer.cpp b/src/runtime/CL/functions/CLQLSTMLayer.cpp index 15a54c7928..c493471667 100644 --- a/src/runtime/CL/functions/CLQLSTMLayer.cpp +++ b/src/runtime/CL/functions/CLQLSTMLayer.cpp @@ -30,6 +30,7 @@ #include "arm_compute/core/utils/misc/InfoHelpers.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/helpers/WindowHelpers.h" namespace arm_compute { diff --git a/src/runtime/CL/functions/CLReduceMean.cpp b/src/runtime/CL/functions/CLReduceMean.cpp index 0e2ede7167..4ea7f7642f 100644 --- a/src/runtime/CL/functions/CLReduceMean.cpp +++ b/src/runtime/CL/functions/CLReduceMean.cpp @@ -23,12 +23,13 @@ */ #include "arm_compute/runtime/CL/functions/CLReduceMean.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/kernels/CLReductionOperationKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" namespace arm_compute { diff --git a/src/runtime/CL/functions/CLReductionOperation.cpp b/src/runtime/CL/functions/CLReductionOperation.cpp index 54e91fb8d8..208371c45d 100644 --- a/src/runtime/CL/functions/CLReductionOperation.cpp +++ b/src/runtime/CL/functions/CLReductionOperation.cpp @@ -30,7 +30,9 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLScheduler.h" -#include "arm_compute/runtime/Utils.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/runtime/Utils.h" + #include "support/MemorySupport.h" namespace arm_compute @@ -47,7 +49,7 @@ Status CLReductionOperation::validate(const ITensorInfo *input, const ITensorInf ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions, "Reduction axis greater than max number of dimensions"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis > 3, "Unsupported reduction axis"); - const unsigned int num_of_stages = calculate_number_of_stages_only_x_axis(input->dimension(0), axis); + const unsigned int num_of_stages = utils::calculate_number_of_stages_only_x_axis(input->dimension(0), axis); const bool is_serial = needs_serialized_reduction(op, input->data_type(), axis); const bool is_reshape_required = !keep_dims; @@ -194,7 +196,7 @@ void CLReductionOperation::configure(ICLTensor *input, ICLTensor *output, unsign void CLReductionOperation::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op, bool keep_dims) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - _num_of_stages = calculate_number_of_stages_only_x_axis(input->info()->dimension(0), axis); + _num_of_stages = utils::calculate_number_of_stages_only_x_axis(input->info()->dimension(0), axis); _reduction_axis = axis; _is_serial = needs_serialized_reduction(op, input->info()->data_type(), axis); _is_reshape_required = !keep_dims; diff --git a/src/runtime/CL/functions/CLRemap.cpp b/src/runtime/CL/functions/CLRemap.cpp index 60b72c5f87..1e3d614402 100644 --- a/src/runtime/CL/functions/CLRemap.cpp +++ b/src/runtime/CL/functions/CLRemap.cpp @@ -42,7 +42,7 @@ void CLRemap::configure(ICLTensor *input, const ICLTensor *map_x, const ICLTenso void CLRemap::configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, InterpolationPolicy policy, BorderMode border_mode, - uint8_t constant_border_value) + uint8_t constant_border_value) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); diff --git a/src/runtime/CL/functions/CLSelect.cpp b/src/runtime/CL/functions/CLSelect.cpp index c7d7df75d2..ef8010847b 100644 --- a/src/runtime/CL/functions/CLSelect.cpp +++ b/src/runtime/CL/functions/CLSelect.cpp @@ -27,6 +27,8 @@ #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "support/MemorySupport.h" + using namespace arm_compute; namespace arm_compute diff --git a/src/runtime/CL/functions/CLSoftmaxLayer.cpp b/src/runtime/CL/functions/CLSoftmaxLayer.cpp index 720f9111a5..759c8706a1 100644 --- a/src/runtime/CL/functions/CLSoftmaxLayer.cpp +++ b/src/runtime/CL/functions/CLSoftmaxLayer.cpp @@ -31,6 +31,7 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/helpers/SoftmaxHelpers.h" namespace arm_compute { @@ -63,7 +64,7 @@ void CLSoftmaxLayerGeneric::configure(const CLCompileContext &compile_co { _memory_group.manage(&_input_permuted); _memory_group.manage(&_output_permuted); - _permute_input.configure(compile_context, input, &_input_permuted, get_permutation_vector_from_softmax_axis(actual_axis)); + _permute_input.configure(compile_context, input, &_input_permuted, softmax_helpers::get_permutation_vector_from_softmax_axis(actual_axis)); tmp_output = &_output_permuted; } @@ -99,7 +100,7 @@ void CLSoftmaxLayerGeneric::configure(const CLCompileContext &compile_co _sum.allocator()->allocate(); if(_needs_permute) { - _permute_output.configure(compile_context, &_output_permuted, output, get_permutation_vector_from_softmax_axis(actual_axis)); + _permute_output.configure(compile_context, &_output_permuted, output, softmax_helpers::get_permutation_vector_from_softmax_axis(actual_axis)); _input_permuted.allocator()->allocate(); _output_permuted.allocator()->allocate(); } @@ -117,7 +118,7 @@ Status CLSoftmaxLayerGeneric::validate(const ITensorInfo *input, const I const bool needs_permute = actual_axis != 0; if(needs_permute) { - const PermutationVector permutation_vector = get_permutation_vector_from_softmax_axis(actual_axis); + const PermutationVector permutation_vector = softmax_helpers::get_permutation_vector_from_softmax_axis(actual_axis); const TensorShape permuted_shape = misc::shape_calculator::compute_permutation_output_shape(*input, permutation_vector); TensorInfo input_permuted(input->clone()->set_tensor_shape(permuted_shape)); ARM_COMPUTE_RETURN_ON_ERROR(CLPermute::validate(input, &input_permuted, permutation_vector)); diff --git a/src/runtime/CL/functions/CLSplit.cpp b/src/runtime/CL/functions/CLSplit.cpp index db0b14b9a2..0b27371e3f 100644 --- a/src/runtime/CL/functions/CLSplit.cpp +++ b/src/runtime/CL/functions/CLSplit.cpp @@ -30,6 +30,7 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/helpers/AutoConfiguration.h" namespace arm_compute { diff --git a/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp b/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp index 09a35a6f27..7ad017f918 100644 --- a/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp @@ -102,7 +102,7 @@ void CLWinogradConvolutionLayer::configure(ICLTensor *input, const ICLTensor *we } void CLWinogradConvolutionLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, - const PadStrideInfo &conv_info, + const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info, bool enable_fast_math) { // Get indices for the width and height diff --git a/src/runtime/CL/gemm/CLGEMMKernelSelection.h b/src/runtime/CL/gemm/CLGEMMKernelSelection.h new file mode 100644 index 0000000000..f6fad7e4ff --- /dev/null +++ b/src/runtime/CL/gemm/CLGEMMKernelSelection.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_CLGEMMKERNELSELECTION_H +#define SRC_CLGEMMKERNELSELECTION_H + +#include "arm_compute/runtime/CL/ICLGEMMKernelSelection.h" +#include "src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.h" +#include "src/runtime/CL/gemm/CLGEMMKernelSelectionMidgard.h" +#include "src/runtime/CL/gemm/CLGEMMKernelSelectionValhall.h" + +#include "support/MemorySupport.h" + +namespace arm_compute +{ +namespace cl_gemm +{ +/** CLGEMMKernelSelection factory class */ +class CLGEMMKernelSelectionFactory final +{ +public: + /** Static method to select the GEMM kernel accordingly with the GPU target and GEMM's dimensionality + * + * @param[in] gpu GPU target + * + * @return CLGEMMKernelSelection class + */ + static std::unique_ptr create(GPUTarget gpu) + { + switch(get_arch_from_target(gpu)) + { + case GPUTarget::MIDGARD: + return support::cpp14::make_unique(gpu); + case GPUTarget::BIFROST: + return support::cpp14::make_unique(gpu); + case GPUTarget::VALHALL: + return support::cpp14::make_unique(gpu); + default: + ARM_COMPUTE_ERROR("Not supported GPU target"); + } + } +}; +} // namespace cl_gemm +} // namespace arm_compute +#endif /* SRC_CLGEMMKERNELSELECTION_H */ diff --git a/src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.cpp b/src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.cpp index b1dd690ca5..73b90568f5 100644 --- a/src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.cpp +++ b/src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.cpp @@ -21,11 +21,11 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.h" +#include "src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/gemm/CLGEMMHelpers.h" +#include "src/core/CL/gemm/CLGEMMHelpers.h" #include #include diff --git a/src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.h b/src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.h new file mode 100644 index 0000000000..a495b48301 --- /dev/null +++ b/src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_CLGEMMKERNELSELECTIONBIFROST_H +#define SRC_CLGEMMKERNELSELECTIONBIFROST_H + +#include "arm_compute/runtime/CL/ICLGEMMKernelSelection.h" + +namespace arm_compute +{ +namespace cl_gemm +{ +/** Bifrost based OpenCL GEMMKernel selection */ +class CLGEMMKernelSelectionBifrost final : public ICLGEMMKernelSelection +{ +public: + /** Constructor + * + * @param[in] gpu GPU target + */ + CLGEMMKernelSelectionBifrost(GPUTarget gpu); + + // Inherited overridden method + CLGEMMKernelType select_kernel(const CLGEMMKernelSelectionParams ¶ms) override; + +private: + CLGEMMKernelType g76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant); + CLGEMMKernelType g71_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant); + CLGEMMKernelType default_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant); + CLGEMMKernelType default_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant); + CLGEMMKernelType default_q8(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant); +}; +} // namespace cl_gemm +} // namespace arm_compute +#endif /* SRC_CLGEMMKERNELSELECTIONBIFROST_H */ diff --git a/src/runtime/CL/gemm/CLGEMMKernelSelectionMidgard.cpp b/src/runtime/CL/gemm/CLGEMMKernelSelectionMidgard.cpp index 324c2f7dca..d172a827b5 100644 --- a/src/runtime/CL/gemm/CLGEMMKernelSelectionMidgard.cpp +++ b/src/runtime/CL/gemm/CLGEMMKernelSelectionMidgard.cpp @@ -21,12 +21,12 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/runtime/CL/gemm/CLGEMMKernelSelectionMidgard.h" +#include "src/runtime/CL/gemm/CLGEMMKernelSelectionMidgard.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/gemm/CLGEMMHelpers.h" #include "arm_compute/core/GPUTarget.h" +#include "src/core/CL/gemm/CLGEMMHelpers.h" #include #include diff --git a/src/runtime/CL/gemm/CLGEMMKernelSelectionMidgard.h b/src/runtime/CL/gemm/CLGEMMKernelSelectionMidgard.h new file mode 100644 index 0000000000..3f6003f7dc --- /dev/null +++ b/src/runtime/CL/gemm/CLGEMMKernelSelectionMidgard.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_CLGEMMKERNELSELECTIONMIDGARD_H +#define SRC_CLGEMMKERNELSELECTIONMIDGARD_H + +#include "arm_compute/runtime/CL/ICLGEMMKernelSelection.h" + +namespace arm_compute +{ +namespace cl_gemm +{ +/** Midgard based OpenCL GEMMKernel selection */ +class CLGEMMKernelSelectionMidgard final : public ICLGEMMKernelSelection +{ +public: + /** Constructor + * + * @param[in] gpu GPU target + */ + CLGEMMKernelSelectionMidgard(GPUTarget gpu); + + // Inherited overridden method + CLGEMMKernelType select_kernel(const CLGEMMKernelSelectionParams ¶ms) override; + +private: + CLGEMMKernelType default_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant); + CLGEMMKernelType default_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant); + CLGEMMKernelType default_q8(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant); +}; +} // namespace cl_gemm +} // namespace arm_compute +#endif /* SRC_CLGEMMKERNELSELECTIONMIDGARD_H */ diff --git a/src/runtime/CL/gemm/CLGEMMKernelSelectionValhall.cpp b/src/runtime/CL/gemm/CLGEMMKernelSelectionValhall.cpp index c50c7ae76b..acae0e7565 100644 --- a/src/runtime/CL/gemm/CLGEMMKernelSelectionValhall.cpp +++ b/src/runtime/CL/gemm/CLGEMMKernelSelectionValhall.cpp @@ -21,11 +21,11 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/runtime/CL/gemm/CLGEMMKernelSelectionValhall.h" +#include "src/runtime/CL/gemm/CLGEMMKernelSelectionValhall.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/gemm/CLGEMMHelpers.h" +#include "src/core/CL/gemm/CLGEMMHelpers.h" #include #include diff --git a/src/runtime/CL/gemm/CLGEMMKernelSelectionValhall.h b/src/runtime/CL/gemm/CLGEMMKernelSelectionValhall.h new file mode 100644 index 0000000000..cbea9ea548 --- /dev/null +++ b/src/runtime/CL/gemm/CLGEMMKernelSelectionValhall.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_CLGEMMKERNELSELECTIONVALHALL_H +#define SRC_CLGEMMKERNELSELECTIONVALHALL_H + +#include "arm_compute/runtime/CL/ICLGEMMKernelSelection.h" + +namespace arm_compute +{ +namespace cl_gemm +{ +/** Valhall based OpenCL GEMMKernel selection */ +class CLGEMMKernelSelectionValhall final : public ICLGEMMKernelSelection +{ +public: + /** Constructor + * + * @param[in] gpu GPU target + */ + CLGEMMKernelSelectionValhall(GPUTarget gpu); + + // Inherited overridden method + CLGEMMKernelType select_kernel(const CLGEMMKernelSelectionParams ¶ms) override; + +private: + CLGEMMKernelType default_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant); + CLGEMMKernelType default_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant); + CLGEMMKernelType default_q8(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant); +}; +} // namespace cl_gemm +} // namespace arm_compute +#endif /* SRC_CLGEMMKERNELSELECTIONVALHALL_H */ diff --git a/src/runtime/CL/tuners/BifrostTuner.cpp b/src/runtime/CL/tuners/BifrostTuner.cpp index 52644bf192..a6474c9835 100644 --- a/src/runtime/CL/tuners/BifrostTuner.cpp +++ b/src/runtime/CL/tuners/BifrostTuner.cpp @@ -25,7 +25,7 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernels.h" -#include "arm_compute/core/utils/misc/Cast.h" +#include "support/Cast.h" namespace arm_compute { diff --git a/src/runtime/CL/tuners/MidgardTuner.cpp b/src/runtime/CL/tuners/MidgardTuner.cpp index e49e15508b..58b0d579d2 100644 --- a/src/runtime/CL/tuners/MidgardTuner.cpp +++ b/src/runtime/CL/tuners/MidgardTuner.cpp @@ -25,7 +25,7 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernels.h" -#include "arm_compute/core/utils/misc/Cast.h" +#include "support/Cast.h" namespace arm_compute { diff --git a/src/runtime/CPP/CPPScheduler.cpp b/src/runtime/CPP/CPPScheduler.cpp index f017006de7..e6b0ec20b8 100644 --- a/src/runtime/CPP/CPPScheduler.cpp +++ b/src/runtime/CPP/CPPScheduler.cpp @@ -27,7 +27,8 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Utils.h" -#include "arm_compute/runtime/CPUUtils.h" +#include "src/runtime/CPUUtils.h" +#include "support/MemorySupport.h" #include "support/Mutex.h" #include diff --git a/src/runtime/CPP/functions/CPPDetectionOutputLayer.cpp b/src/runtime/CPP/functions/CPPDetectionOutputLayer.cpp index 9d62733384..fdb4c9f0f6 100644 --- a/src/runtime/CPP/functions/CPPDetectionOutputLayer.cpp +++ b/src/runtime/CPP/functions/CPPDetectionOutputLayer.cpp @@ -26,6 +26,7 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" #include diff --git a/src/runtime/CPP/functions/CPPDetectionPostProcessLayer.cpp b/src/runtime/CPP/functions/CPPDetectionPostProcessLayer.cpp index 3507a3ac45..31f1fafd69 100644 --- a/src/runtime/CPP/functions/CPPDetectionPostProcessLayer.cpp +++ b/src/runtime/CPP/functions/CPPDetectionPostProcessLayer.cpp @@ -26,6 +26,7 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" #include #include diff --git a/src/runtime/CPUUtils.cpp b/src/runtime/CPUUtils.cpp index 4d6caaee01..a7dd464540 100644 --- a/src/runtime/CPUUtils.cpp +++ b/src/runtime/CPUUtils.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/runtime/CPUUtils.h" +#include "src/runtime/CPUUtils.h" #include "arm_compute/core/CPP/CPPTypes.h" #include "arm_compute/core/Error.h" @@ -352,6 +352,10 @@ int get_max_cpus() namespace arm_compute { +namespace utils +{ +namespace cpu +{ void get_cpu_configuration(CPUInfo &cpuinfo) { #if !defined(BARE_METAL) && (defined(__arm__) || defined(__aarch64__)) @@ -460,5 +464,6 @@ unsigned int get_threads_hint() return num_threads_hint; } - +} // namespace cpu +} // namespace utils } // namespace arm_compute diff --git a/src/runtime/CPUUtils.h b/src/runtime/CPUUtils.h new file mode 100644 index 0000000000..452d3d58ca --- /dev/null +++ b/src/runtime/CPUUtils.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_RUNTIME_CPU_UTILS_H +#define ARM_COMPUTE_RUNTIME_CPU_UTILS_H + +namespace arm_compute +{ +class CPUInfo; + +namespace utils +{ +namespace cpu +{ +/** This function will try to detect the CPU configuration on the system and will fill + * the cpuinfo object accordingly to reflect this. + * + * @param[out] cpuinfo @ref CPUInfo to be used to hold the system's cpu configuration. + */ +void get_cpu_configuration(CPUInfo &cpuinfo); +/** Some systems have both big and small cores, this fuction computes the minimum number of cores + * that are exactly the same on the system. To maximize performance the library attempts to process + * workloads concurrently using as many threads as big cores are available on the system. + * + * @return The minumum number of common cores. + */ +unsigned int get_threads_hint(); +} // namespace cpu +} // namespace utils +} // namespace arm_compute +#endif /* ARM_COMPUTE_RUNTIME_CPU_UTILS_H */ diff --git a/src/runtime/DeviceProperties.cpp b/src/runtime/DeviceProperties.cpp index 5d7ae020d7..ec9f4a16ed 100644 --- a/src/runtime/DeviceProperties.cpp +++ b/src/runtime/DeviceProperties.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,12 +23,12 @@ */ #include "arm_compute/runtime/DeviceProperties.h" -#include "arm_compute/runtime/CPUUtils.h" +#include "src/runtime/CPUUtils.h" namespace arm_compute { DeviceProperties::DeviceProperties() { - get_cpu_configuration(cpu_info); + utils::cpu::get_cpu_configuration(cpu_info); } } // namespace arm_compute diff --git a/src/runtime/GLES_COMPUTE/GCMemory.cpp b/src/runtime/GLES_COMPUTE/GCMemory.cpp index 998f8a5cc4..4d74555f4e 100644 --- a/src/runtime/GLES_COMPUTE/GCMemory.cpp +++ b/src/runtime/GLES_COMPUTE/GCMemory.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,8 +23,8 @@ */ #include "arm_compute/runtime/GLES_COMPUTE/GCMemory.h" -#include "arm_compute/core/utils/misc/Cast.h" #include "arm_compute/runtime/GLES_COMPUTE/GCMemoryRegion.h" +#include "support/Cast.h" namespace arm_compute { diff --git a/src/runtime/GLES_COMPUTE/functions/GCConcatenateLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCConcatenateLayer.cpp index 9e23974b8d..807412eb17 100644 --- a/src/runtime/GLES_COMPUTE/functions/GCConcatenateLayer.cpp +++ b/src/runtime/GLES_COMPUTE/functions/GCConcatenateLayer.cpp @@ -29,6 +29,8 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h" +#include "src/core/helpers/AutoConfiguration.h" + #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/IScheduler.cpp b/src/runtime/IScheduler.cpp index 53df3699b0..43df3d5e23 100644 --- a/src/runtime/IScheduler.cpp +++ b/src/runtime/IScheduler.cpp @@ -26,17 +26,17 @@ #include "arm_compute/core/CPP/ICPPKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Window.h" -#include "arm_compute/runtime/CPUUtils.h" -#include "arm_compute/runtime/SchedulerUtils.h" +#include "src/runtime/CPUUtils.h" +#include "src/runtime/SchedulerUtils.h" namespace arm_compute { IScheduler::IScheduler() : _cpu_info() { - get_cpu_configuration(_cpu_info); + utils::cpu::get_cpu_configuration(_cpu_info); // Work out the best possible number of execution threads - _num_threads_hint = get_threads_hint(); + _num_threads_hint = utils::cpu::get_threads_hint(); } CPUInfo &IScheduler::cpu_info() @@ -74,7 +74,7 @@ void IScheduler::schedule_common(ICPPKernel *kernel, const Hints &hints, ITensor //in c++17 this can be swapped for auto [ m_threads, n_threads ] = split_2d(... unsigned m_threads, n_threads; - std::tie(m_threads, n_threads) = split_2d(this->num_threads(), m, n); + std::tie(m_threads, n_threads) = scheduler_utils::split_2d(this->num_threads(), m, n); std::vector workloads; for(unsigned int ni = 0; ni != n_threads; ++ni) diff --git a/src/runtime/NEON/INESimpleFunctionNoBorder.cpp b/src/runtime/NEON/INESimpleFunctionNoBorder.cpp index 82316c49c6..f2181e0a74 100644 --- a/src/runtime/NEON/INESimpleFunctionNoBorder.cpp +++ b/src/runtime/NEON/INESimpleFunctionNoBorder.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,7 +24,7 @@ #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" #include "arm_compute/runtime/NEON/NEScheduler.h" -#include "arm_compute/runtime/Utils.h" +#include "src/runtime/Utils.h" namespace arm_compute { @@ -36,6 +36,6 @@ INESimpleFunctionNoBorder::INESimpleFunctionNoBorder(IRuntimeContext *ctx) void INESimpleFunctionNoBorder::run() { - schedule_kernel_on_ctx(_ctx, _kernel.get(), Window::DimY); + utils::schedule_kernel_on_ctx(_ctx, _kernel.get(), Window::DimY); } } // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEArgMinMaxLayer.cpp b/src/runtime/NEON/functions/NEArgMinMaxLayer.cpp index 0664d3c9d5..70bbba62ad 100644 --- a/src/runtime/NEON/functions/NEArgMinMaxLayer.cpp +++ b/src/runtime/NEON/functions/NEArgMinMaxLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,6 +30,8 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "support/MemorySupport.h" + namespace arm_compute { NEArgMinMaxLayer::NEArgMinMaxLayer(std::shared_ptr memory_manager) diff --git a/src/runtime/NEON/functions/NEBatchNormalizationLayer.cpp b/src/runtime/NEON/functions/NEBatchNormalizationLayer.cpp index 5a593e9c74..eab40ac5be 100644 --- a/src/runtime/NEON/functions/NEBatchNormalizationLayer.cpp +++ b/src/runtime/NEON/functions/NEBatchNormalizationLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,6 +30,8 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "support/MemorySupport.h" + using namespace arm_compute; NEBatchNormalizationLayer::NEBatchNormalizationLayer() diff --git a/src/runtime/NEON/functions/NEBatchToSpaceLayer.cpp b/src/runtime/NEON/functions/NEBatchToSpaceLayer.cpp index c06a8aa0e0..2705cffe68 100644 --- a/src/runtime/NEON/functions/NEBatchToSpaceLayer.cpp +++ b/src/runtime/NEON/functions/NEBatchToSpaceLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -29,6 +29,8 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "support/MemorySupport.h" + namespace arm_compute { void NEBatchToSpaceLayer::configure(const ITensor *input, const ITensor *block_shape, ITensor *output) diff --git a/src/runtime/NEON/functions/NEConcatenateLayer.cpp b/src/runtime/NEON/functions/NEConcatenateLayer.cpp index 8df4f4cb62..72bd9e6b19 100644 --- a/src/runtime/NEON/functions/NEConcatenateLayer.cpp +++ b/src/runtime/NEON/functions/NEConcatenateLayer.cpp @@ -35,6 +35,7 @@ #include "arm_compute/core/ITensor.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" +#include "src/core/helpers/AutoConfiguration.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/NEON/functions/NECropResize.cpp b/src/runtime/NEON/functions/NECropResize.cpp index f6ed2ec250..f8f99169aa 100644 --- a/src/runtime/NEON/functions/NECropResize.cpp +++ b/src/runtime/NEON/functions/NECropResize.cpp @@ -25,6 +25,8 @@ #include "arm_compute/runtime/NEON/functions/NECropResize.h" +#include "support/MemorySupport.h" + #include namespace arm_compute diff --git a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp index dff3070239..cb9ab168a7 100644 --- a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp @@ -28,6 +28,7 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/helpers/AutoConfiguration.h" using namespace arm_compute::misc::shape_calculator; diff --git a/src/runtime/NEON/functions/NEDepthToSpaceLayer.cpp b/src/runtime/NEON/functions/NEDepthToSpaceLayer.cpp index e363f89482..0aaa37ec92 100644 --- a/src/runtime/NEON/functions/NEDepthToSpaceLayer.cpp +++ b/src/runtime/NEON/functions/NEDepthToSpaceLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -29,6 +29,8 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "support/MemorySupport.h" + namespace arm_compute { void NEDepthToSpaceLayer::configure(const ITensor *input, ITensor *output, int32_t block_shape) diff --git a/src/runtime/NEON/functions/NEFFT1D.cpp b/src/runtime/NEON/functions/NEFFT1D.cpp index 744a91521f..2c53b185df 100644 --- a/src/runtime/NEON/functions/NEFFT1D.cpp +++ b/src/runtime/NEON/functions/NEFFT1D.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,8 +25,8 @@ #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/helpers/fft.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/utils/helpers/fft.h" namespace arm_compute { diff --git a/src/runtime/NEON/functions/NEFFTConvolutionLayer.cpp b/src/runtime/NEON/functions/NEFFTConvolutionLayer.cpp index cd68788145..a46fc9f45f 100644 --- a/src/runtime/NEON/functions/NEFFTConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEFFTConvolutionLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,8 +26,11 @@ #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/helpers/fft.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/utils/helpers/fft.h" + +#include "support/MemorySupport.h" namespace arm_compute { diff --git a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp index 4dcf41e360..d956d16f4d 100644 --- a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp +++ b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp @@ -30,6 +30,8 @@ #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "support/MemorySupport.h" + #include #include diff --git a/src/runtime/NEON/functions/NEGEMM.cpp b/src/runtime/NEON/functions/NEGEMM.cpp index 3b8ca44ed7..4166cff97a 100644 --- a/src/runtime/NEON/functions/NEGEMM.cpp +++ b/src/runtime/NEON/functions/NEGEMM.cpp @@ -23,7 +23,6 @@ */ #include "arm_compute/runtime/NEON/functions/NEGEMM.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" @@ -34,6 +33,8 @@ #include "arm_compute/runtime/NEON/NEScheduler.h" #include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" #include "arm_compute/runtime/TensorAllocator.h" +#include "src/core/CPP/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" #include diff --git a/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp index ad349cb635..5b0848398d 100644 --- a/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp +++ b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp @@ -23,13 +23,13 @@ */ #include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" -#include "src/core/NEON/kernels/assembly/arm_gemm.hpp" - -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" -#include "arm_compute/runtime/NEON/functions/NESimpleAssemblyFunction.h" - +#include "src/core/CPP/Validate.h" +#include "src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h" #include "src/core/NEON/kernels/assembly/NEGEMMAssemblyWrapperKernel.h" +#include "src/core/NEON/kernels/assembly/arm_gemm.hpp" + +#include "support/MemorySupport.h" #include diff --git a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp index 83db146a8a..36357dde41 100644 --- a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp +++ b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp @@ -33,6 +33,7 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/NEON/NEScheduler.h" #include "arm_compute/runtime/TensorAllocator.h" +#include "src/core/helpers/AutoConfiguration.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp b/src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp index 3d5377892a..13210a06cd 100644 --- a/src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp +++ b/src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp @@ -25,6 +25,7 @@ #include "arm_compute/core/Types.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/helpers/AutoConfiguration.h" namespace arm_compute { diff --git a/src/runtime/NEON/functions/NELSTMLayerQuantized.cpp b/src/runtime/NEON/functions/NELSTMLayerQuantized.cpp index 11989d3225..7610d15787 100644 --- a/src/runtime/NEON/functions/NELSTMLayerQuantized.cpp +++ b/src/runtime/NEON/functions/NELSTMLayerQuantized.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,6 +26,7 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" +#include "src/core/helpers/AutoConfiguration.h" #include #include diff --git a/src/runtime/NEON/functions/NEPadLayer.cpp b/src/runtime/NEON/functions/NEPadLayer.cpp index 21c349ba95..03c597a3bf 100644 --- a/src/runtime/NEON/functions/NEPadLayer.cpp +++ b/src/runtime/NEON/functions/NEPadLayer.cpp @@ -27,6 +27,7 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/helpers/AutoConfiguration.h" namespace arm_compute { diff --git a/src/runtime/NEON/functions/NEPriorBoxLayer.cpp b/src/runtime/NEON/functions/NEPriorBoxLayer.cpp index fda130bf69..bcf6bef9c7 100644 --- a/src/runtime/NEON/functions/NEPriorBoxLayer.cpp +++ b/src/runtime/NEON/functions/NEPriorBoxLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -31,6 +31,8 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "support/MemorySupport.h" + namespace arm_compute { void NEPriorBoxLayer::configure(const ITensor *input1, const ITensor *input2, ITensor *output, const PriorBoxLayerInfo &info) diff --git a/src/runtime/NEON/functions/NEQLSTMLayer.cpp b/src/runtime/NEON/functions/NEQLSTMLayer.cpp index 5a6b51337a..95f20ae1a9 100644 --- a/src/runtime/NEON/functions/NEQLSTMLayer.cpp +++ b/src/runtime/NEON/functions/NEQLSTMLayer.cpp @@ -30,6 +30,7 @@ #include "arm_compute/core/utils/misc/InfoHelpers.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/helpers/WindowHelpers.h" namespace arm_compute { diff --git a/src/runtime/NEON/functions/NEReduceMean.cpp b/src/runtime/NEON/functions/NEReduceMean.cpp index 021f7b530a..c3c5529c09 100644 --- a/src/runtime/NEON/functions/NEReduceMean.cpp +++ b/src/runtime/NEON/functions/NEReduceMean.cpp @@ -23,11 +23,12 @@ */ #include "arm_compute/runtime/NEON/functions/NEReduceMean.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/CPP/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" namespace arm_compute { diff --git a/src/runtime/NEON/functions/NEReductionOperation.cpp b/src/runtime/NEON/functions/NEReductionOperation.cpp index 91176bfa45..4938a56b3f 100644 --- a/src/runtime/NEON/functions/NEReductionOperation.cpp +++ b/src/runtime/NEON/functions/NEReductionOperation.cpp @@ -26,6 +26,7 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/helpers/AutoConfiguration.h" namespace arm_compute { diff --git a/src/runtime/NEON/functions/NEScale.cpp b/src/runtime/NEON/functions/NEScale.cpp index 2278f07a1c..bbf8343c2b 100644 --- a/src/runtime/NEON/functions/NEScale.cpp +++ b/src/runtime/NEON/functions/NEScale.cpp @@ -30,12 +30,14 @@ #include "arm_compute/core/PixelValue.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Window.h" -#include "arm_compute/core/utils/misc/Rounding.h" #include "arm_compute/runtime/NEON/NEScheduler.h" #include "arm_compute/runtime/TensorAllocator.h" #include "src/core/utils/ScaleUtils.h" +#include "support/MemorySupport.h" +#include "support/Rounding.h" + #include #include #include diff --git a/src/runtime/NEON/functions/NESimpleAssemblyFunction.cpp b/src/runtime/NEON/functions/NESimpleAssemblyFunction.cpp index b0cafae520..d165b2235c 100644 --- a/src/runtime/NEON/functions/NESimpleAssemblyFunction.cpp +++ b/src/runtime/NEON/functions/NESimpleAssemblyFunction.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/runtime/NEON/functions/NESimpleAssemblyFunction.h" +#include "src/runtime/NEON/functions/NESimpleAssemblyFunction.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" diff --git a/src/runtime/NEON/functions/NESimpleAssemblyFunction.h b/src/runtime/NEON/functions/NESimpleAssemblyFunction.h new file mode 100644 index 0000000000..e9be54d35f --- /dev/null +++ b/src/runtime/NEON/functions/NESimpleAssemblyFunction.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NESIMPLEASSEMBLYFUNCTION_H +#define ARM_COMPUTE_NESIMPLEASSEMBLYFUNCTION_H + +#include "arm_compute/runtime/IFunction.h" +#include "src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h" + +#include + +namespace arm_compute +{ +/** Basic interface for functions which have a single NEON GEMM wrapper kernel to run */ +class NESimpleAssemblyFunction : public IFunction +{ +public: + /** Constructor */ + NESimpleAssemblyFunction(); + + /** Configure the function with the kernel to run + * + * @param[in] kernel GEMM Wrapper kernel configured and ready to run + * + * @note The kernel is expected to have a 1D window. The function will multi-thread this window across the X dimension. + */ + void configure(std::unique_ptr kernel); + + // Inherited methods overridden: + void run() override final; + +protected: + std::unique_ptr _kernel; /**< Kernel to run */ +}; +} //namespace arm_compute +#endif /*ARM_COMPUTE_NESIMPLEASSEMBLYFUNCTION_H */ diff --git a/src/runtime/NEON/functions/NESoftmaxLayer.cpp b/src/runtime/NEON/functions/NESoftmaxLayer.cpp index e763caa3a3..4f773861d2 100644 --- a/src/runtime/NEON/functions/NESoftmaxLayer.cpp +++ b/src/runtime/NEON/functions/NESoftmaxLayer.cpp @@ -27,6 +27,7 @@ #include "arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/helpers/SoftmaxHelpers.h" namespace arm_compute { @@ -53,7 +54,7 @@ void NESoftmaxLayerGeneric::configure(ITensor *input, ITensor *output, f // Add to the memory manager _input_permuted _memory_group.manage(&_input_permuted); - _permute_input.configure(input, &_input_permuted, get_permutation_vector_from_softmax_axis(actual_axis)); + _permute_input.configure(input, &_input_permuted, softmax_helpers::get_permutation_vector_from_softmax_axis(actual_axis)); } // We want to deal with a 2D input. Either it is the permuted version of the original input (4D case) @@ -87,7 +88,7 @@ void NESoftmaxLayerGeneric::configure(ITensor *input, ITensor *output, f _input_permuted.allocator()->allocate(); // Re-permute the permuted output into the requested (4D) output - _permute_output.configure(&_output_permuted, output, get_permutation_vector_from_softmax_axis(actual_axis)); + _permute_output.configure(&_output_permuted, output, softmax_helpers::get_permutation_vector_from_softmax_axis(actual_axis)); // Allocate the intermediate permuted tensors _output_permuted.allocator()->allocate(); @@ -128,7 +129,7 @@ Status NESoftmaxLayerGeneric::validate(const ITensorInfo *input, const I if(needs_permute) { - const PermutationVector permutation_vector = get_permutation_vector_from_softmax_axis(actual_axis); + const PermutationVector permutation_vector = softmax_helpers::get_permutation_vector_from_softmax_axis(actual_axis); const TensorShape permuted_shape = misc::shape_calculator::compute_permutation_output_shape(*input, permutation_vector); TensorInfo input_permuted(input->clone()->set_tensor_shape(permuted_shape)); ARM_COMPUTE_RETURN_ON_ERROR(NEPermute::validate(input, &input_permuted, permutation_vector)); diff --git a/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp b/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp index 1bad310640..23b9f60c38 100644 --- a/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp @@ -23,17 +23,17 @@ */ #include "arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/NEON/NEScheduler.h" #include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h" #include "support/MemorySupport.h" -#include "arm_compute/core/NEON/kernels/convolution/common/utils.hpp" +#include "src/core/NEON/kernels/convolution/common/utils.hpp" #include "src/core/NEON/kernels/convolution/winograd/winograd.hpp" namespace arm_compute diff --git a/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp b/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp index 73a7caac8b..11e89cb23b 100644 --- a/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp +++ b/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp @@ -24,18 +24,21 @@ #include "arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/kernels/assembly/NEDepthwiseConvolutionAssemblyKernelWrapper.h" -#include "arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_dilated.hpp" -#include "arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_quantized_dilated.hpp" #include "arm_compute/core/Utils.h" #include "arm_compute/core/utils/misc/InfoHelpers.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" +#include "src/core/CPP/Validate.h" +#include "src/core/NEON/kernels/assembly/NEDepthwiseConvolutionAssemblyKernelWrapper.h" +#include "src/core/NEON/kernels/convolution/depthwise/depthwise_dilated.hpp" +#include "src/core/NEON/kernels/convolution/depthwise/depthwise_quantized_dilated.hpp" +#include "src/core/helpers/AutoConfiguration.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "support/MemorySupport.h" + #include namespace arm_compute diff --git a/src/runtime/OMP/OMPScheduler.cpp b/src/runtime/OMP/OMPScheduler.cpp index 4c2f03a53a..bf34b0114b 100644 --- a/src/runtime/OMP/OMPScheduler.cpp +++ b/src/runtime/OMP/OMPScheduler.cpp @@ -27,7 +27,7 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Utils.h" -#include "arm_compute/runtime/CPUUtils.h" +#include "src/runtime/CPUUtils.h" #include namespace arm_compute diff --git a/src/runtime/SchedulerUtils.cpp b/src/runtime/SchedulerUtils.cpp index 1c12e3ce58..6f9a32c879 100644 --- a/src/runtime/SchedulerUtils.cpp +++ b/src/runtime/SchedulerUtils.cpp @@ -21,6 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ +#include "src/runtime/SchedulerUtils.h" #include "arm_compute/core/Error.h" @@ -28,6 +29,8 @@ namespace arm_compute { +namespace scheduler_utils +{ #ifndef BARE_METAL std::pair split_2d(unsigned max_threads, std::size_t m, std::size_t n) { @@ -76,4 +79,5 @@ std::pair split_2d(unsigned max_threads, std::size_t m, std: } } #endif /* #ifndef BARE_METAL */ +} // namespace scheduler_utils } // namespace arm_compute diff --git a/src/runtime/SchedulerUtils.h b/src/runtime/SchedulerUtils.h new file mode 100644 index 0000000000..46644a369e --- /dev/null +++ b/src/runtime/SchedulerUtils.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_COMPUTE_SCHEDULER_UTILS_H +#define SRC_COMPUTE_SCHEDULER_UTILS_H + +#include +#include + +namespace arm_compute +{ +namespace scheduler_utils +{ +/** Given two dimensions and a maximum number of threads to utilise, calculate the best + * combination of threads that fit in (multiplied together) max_threads. + * + * This algorithm assumes that work in either of the dimensions is equally difficult + * to compute + * + * @returns [m_nthreads, n_nthreads] A pair of the threads that should be used in each dimension + */ +std::pair split_2d(unsigned max_threads, std::size_t m, std::size_t n); +} // namespace scheduler_utils +} // namespace arm_compute +#endif /* SRC_COMPUTE_SCHEDULER_UTILS_H */ diff --git a/src/runtime/Utils.cpp b/src/runtime/Utils.cpp index 534b421f8a..15e9d43a49 100644 --- a/src/runtime/Utils.cpp +++ b/src/runtime/Utils.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/runtime/Utils.h" +#include "src/runtime/Utils.h" #include "arm_compute/runtime/NEON/NEScheduler.h" @@ -31,6 +31,8 @@ namespace arm_compute { +namespace utils +{ #ifndef DOXYGEN_SKIP_THIS static const std::string information = #include "arm_compute_version.embed" @@ -78,4 +80,5 @@ unsigned int calculate_number_of_stages_only_x_axis(size_t input_x_dimension, un const unsigned int num_of_stages = num_of_wg / 128 + 2; return num_of_stages; } +} // namespace utils } // namespace arm_compute diff --git a/src/runtime/Utils.h b/src/runtime/Utils.h new file mode 100644 index 0000000000..f8775c9612 --- /dev/null +++ b/src/runtime/Utils.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_RUNTIME_UTILS_H +#define SRC_RUNTIME_UTILS_H + +#include "arm_compute/runtime/IRuntimeContext.h" +#include "arm_compute/runtime/Scheduler.h" + +#include + +namespace arm_compute +{ +namespace utils +{ +/** Convert a Scheduler::Type into a string. + * + * @param[in] t @ref Scheduler::Type to be translated to string. + * + * @return The string describing the scheduler type. + */ +const std::string &string_from_scheduler_type(Scheduler::Type t); + +/** Schedules a kernel using the context if not nullptr else uses the legacy scheduling flow. + * + * @param[in] ctx Context to use. + * @param[in] kernel Kernel to schedule. + * @param[in] hints Hints to use. + */ +void schedule_kernel_on_ctx(IRuntimeContext *ctx, ICPPKernel *kernel, const IScheduler::Hints &hints); + +/** Calculate number of stages for parallel implementations + * + * @param[in] input_x_dimension input tensor x dimension + * @param[in] axis axis to be used + */ +unsigned int calculate_number_of_stages_only_x_axis(size_t input_x_dimension, unsigned int axis); +} // namespace utils +} // namespace arm_compute +#endif /* SRC_RUNTIME_UTILS_H */ -- cgit v1.2.1