aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorSang-Hoon Park <sang-hoon.park@arm.com>2020-10-19 16:00:11 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2020-10-20 10:27:40 +0000
commit68dd25fbe6e4d3c3513fa5993863419769aa08fc (patch)
treeb918be923f9e4550c306d7f44d168ab938a71fc8 /src
parentf0a4e609d98f111b6a7d4a2b578d1b7cba64b805 (diff)
downloadComputeLibrary-68dd25fbe6e4d3c3513fa5993863419769aa08fc.tar.gz
COMPMID-3637: Move utility headers from arm_compute to src
Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Change-Id: If9d6fa8c900b68c4b6fd373f2fc1f9abb83ea917 Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4145 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Sang-Hoon Park <sang-hoon.park@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src')
-rw-r--r--src/core/AccessWindowAutoPadding.cpp4
-rw-r--r--src/core/AccessWindowAutoPadding.h85
-rw-r--r--src/core/AccessWindowStatic.cpp4
-rw-r--r--src/core/AccessWindowStatic.h101
-rw-r--r--src/core/AccessWindowTranspose.cpp4
-rw-r--r--src/core/AccessWindowTranspose.h48
-rw-r--r--src/core/CL/CLValidate.h61
-rw-r--r--src/core/CL/ICLGEMMKernelConfiguration.h68
-rw-r--r--src/core/CL/ICLKernel.cpp9
-rw-r--r--src/core/CL/ICLSimple2DKernel.cpp7
-rw-r--r--src/core/CL/ICLSimpleKernel.cpp3
-rw-r--r--src/core/CL/gemm/CLGEMMHelpers.cpp2
-rw-r--r--src/core/CL/gemm/CLGEMMHelpers.h73
-rw-r--r--src/core/CL/gemm/native/CLGEMMNativeKernelConfiguration.h65
-rw-r--r--src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationBifrost.cpp4
-rw-r--r--src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationBifrost.h56
-rw-r--r--src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationMidgard.cpp4
-rw-r--r--src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationMidgard.h51
-rw-r--r--src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationValhall.cpp4
-rw-r--r--src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationValhall.h53
-rw-r--r--src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfiguration.h63
-rw-r--r--src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.cpp30
-rw-r--r--src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.h56
-rw-r--r--src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationValhall.cpp4
-rw-r--r--src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationValhall.h53
-rw-r--r--src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h63
-rw-r--r--src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.cpp12
-rw-r--r--src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.h59
-rw-r--r--src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.cpp4
-rw-r--r--src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.h53
-rw-r--r--src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp5
-rw-r--r--src/core/CL/kernels/CLActivationLayerKernel.cpp10
-rw-r--r--src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp7
-rw-r--r--src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp6
-rw-r--r--src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp5
-rw-r--r--src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLBitwiseAndKernel.cpp3
-rw-r--r--src/core/CL/kernels/CLBitwiseOrKernel.cpp3
-rw-r--r--src/core/CL/kernels/CLBitwiseXorKernel.cpp3
-rw-r--r--src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp7
-rw-r--r--src/core/CL/kernels/CLBox3x3Kernel.cpp3
-rw-r--r--src/core/CL/kernels/CLCannyEdgeKernel.cpp1
-rw-r--r--src/core/CL/kernels/CLChannelCombineKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLChannelExtractKernel.cpp5
-rw-r--r--src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp6
-rw-r--r--src/core/CL/kernels/CLCol2ImKernel.cpp6
-rw-r--r--src/core/CL/kernels/CLColorConvertKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLComparisonKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp5
-rw-r--r--src/core/CL/kernels/CLConvolutionKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLCopyKernel.cpp6
-rw-r--r--src/core/CL/kernels/CLCropKernel.cpp9
-rw-r--r--src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp5
-rw-r--r--src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp6
-rw-r--r--src/core/CL/kernels/CLDepthConvertLayerKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp8
-rw-r--r--src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp8
-rw-r--r--src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp7
-rw-r--r--src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.cpp9
-rw-r--r--src/core/CL/kernels/CLDequantizationLayerKernel.cpp7
-rw-r--r--src/core/CL/kernels/CLDerivativeKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLDilateKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp9
-rw-r--r--src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp5
-rw-r--r--src/core/CL/kernels/CLElementwiseOperationKernel.cpp6
-rw-r--r--src/core/CL/kernels/CLErodeKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLFFTDigitReverseKernel.cpp6
-rw-r--r--src/core/CL/kernels/CLFFTRadixStageKernel.cpp6
-rw-r--r--src/core/CL/kernels/CLFFTScaleKernel.cpp6
-rw-r--r--src/core/CL/kernels/CLFastCornersKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLFillBorderKernel.cpp6
-rw-r--r--src/core/CL/kernels/CLFlattenLayerKernel.cpp3
-rw-r--r--src/core/CL/kernels/CLFloorKernel.cpp6
-rw-r--r--src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp5
-rw-r--r--src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.cpp9
-rw-r--r--src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.cpp7
-rw-r--r--src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp7
-rw-r--r--src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp9
-rw-r--r--src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.cpp7
-rw-r--r--src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.cpp6
-rw-r--r--src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.cpp6
-rw-r--r--src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.cpp6
-rw-r--r--src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp14
-rw-r--r--src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp9
-rw-r--r--src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp13
-rw-r--r--src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp11
-rw-r--r--src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp7
-rw-r--r--src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.cpp9
-rw-r--r--src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.cpp11
-rw-r--r--src/core/CL/kernels/CLGatherKernel.cpp3
-rw-r--r--src/core/CL/kernels/CLGaussian3x3Kernel.cpp2
-rw-r--r--src/core/CL/kernels/CLGaussianPyramidKernel.cpp1
-rw-r--r--src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp7
-rw-r--r--src/core/CL/kernels/CLHOGDescriptorKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLHOGDetectorKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLHarrisCornersKernel.cpp7
-rw-r--r--src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp6
-rw-r--r--src/core/CL/kernels/CLHistogramKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLIm2ColKernel.cpp8
-rw-r--r--src/core/CL/kernels/CLInstanceNormalizationLayerKernel.cpp5
-rw-r--r--src/core/CL/kernels/CLIntegralImageKernel.cpp3
-rw-r--r--src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp5
-rw-r--r--src/core/CL/kernels/CLLKTrackerKernel.cpp5
-rw-r--r--src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp8
-rw-r--r--src/core/CL/kernels/CLMagnitudePhaseKernel.cpp3
-rw-r--r--src/core/CL/kernels/CLMaxUnpoolingLayerKernel.cpp9
-rw-r--r--src/core/CL/kernels/CLMeanStdDevKernel.cpp6
-rw-r--r--src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp7
-rw-r--r--src/core/CL/kernels/CLMedian3x3Kernel.cpp2
-rw-r--r--src/core/CL/kernels/CLMemsetKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLMinMaxLayerKernel.cpp5
-rw-r--r--src/core/CL/kernels/CLMinMaxLocationKernel.cpp3
-rw-r--r--src/core/CL/kernels/CLNonLinearFilterKernel.cpp3
-rw-r--r--src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp1
-rw-r--r--src/core/CL/kernels/CLNormalizationLayerKernel.cpp7
-rw-r--r--src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp7
-rw-r--r--src/core/CL/kernels/CLPadLayerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLPermuteKernel.cpp3
-rw-r--r--src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp6
-rw-r--r--src/core/CL/kernels/CLPoolingLayerKernel.cpp7
-rw-r--r--src/core/CL/kernels/CLPriorBoxLayerKernel.cpp5
-rw-r--r--src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.cpp3
-rw-r--r--src/core/CL/kernels/CLQuantizationLayerKernel.cpp6
-rw-r--r--src/core/CL/kernels/CLROIAlignLayerKernel.cpp7
-rw-r--r--src/core/CL/kernels/CLROIPoolingLayerKernel.cpp7
-rw-r--r--src/core/CL/kernels/CLRangeKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLReductionOperationKernel.cpp7
-rw-r--r--src/core/CL/kernels/CLRemapKernel.cpp5
-rw-r--r--src/core/CL/kernels/CLReorgLayerKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLReshapeLayerKernel.cpp8
-rw-r--r--src/core/CL/kernels/CLReverseKernel.cpp6
-rw-r--r--src/core/CL/kernels/CLScaleKernel.cpp5
-rw-r--r--src/core/CL/kernels/CLScharr3x3Kernel.cpp4
-rw-r--r--src/core/CL/kernels/CLSelectKernel.cpp5
-rw-r--r--src/core/CL/kernels/CLSobel3x3Kernel.cpp4
-rw-r--r--src/core/CL/kernels/CLSobel5x5Kernel.cpp4
-rw-r--r--src/core/CL/kernels/CLSobel7x7Kernel.cpp4
-rw-r--r--src/core/CL/kernels/CLSoftmaxLayerKernel.cpp7
-rw-r--r--src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLStackLayerKernel.cpp5
-rw-r--r--src/core/CL/kernels/CLStridedSliceKernel.cpp6
-rw-r--r--src/core/CL/kernels/CLTileKernel.cpp3
-rw-r--r--src/core/CL/kernels/CLTransposeKernel.cpp10
-rw-r--r--src/core/CL/kernels/CLUpsampleLayerKernel.cpp6
-rw-r--r--src/core/CL/kernels/CLWarpAffineKernel.cpp3
-rw-r--r--src/core/CL/kernels/CLWarpPerspectiveKernel.cpp3
-rw-r--r--src/core/CL/kernels/CLWeightsReshapeKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp10
-rw-r--r--src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp11
-rw-r--r--src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp7
-rw-r--r--src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp6
-rw-r--r--src/core/CL/kernels/CLWinogradInputTransformKernel.cpp6
-rw-r--r--src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp6
-rw-r--r--src/core/CL/kernels/CLYOLOLayerKernel.cpp8
-rw-r--r--src/core/CPP/ICPPSimpleKernel.cpp5
-rw-r--r--src/core/CPP/Validate.h117
-rw-r--r--src/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.cpp4
-rw-r--r--src/core/CPP/kernels/CPPCornerCandidatesKernel.cpp12
-rw-r--r--src/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.cpp3
-rw-r--r--src/core/CPP/kernels/CPPNonMaximumSuppressionKernel.cpp4
-rw-r--r--src/core/CPP/kernels/CPPPermuteKernel.cpp7
-rw-r--r--src/core/CPP/kernels/CPPTopKVKernel.cpp10
-rw-r--r--src/core/CPP/kernels/CPPUpsampleKernel.cpp7
-rw-r--r--src/core/GLES_COMPUTE/IGCSimpleKernel.cpp3
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.cpp1
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.cpp2
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCArithmeticAdditionKernel.cpp2
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.cpp4
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCCol2ImKernel.cpp6
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.cpp2
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.cpp4
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp22
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.cpp2
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCFillBorderKernel.cpp2
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.cpp2
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.cpp4
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.cpp2
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.cpp6
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.cpp4
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp12
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.cpp2
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCNormalizePlanarYUVLayerKernel.cpp4
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.cpp2
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp4
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCScaleKernel.cpp4
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.cpp4
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.cpp4
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp4
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCWeightsReshapeKernel.cpp2
-rw-r--r--src/core/Helpers.cpp171
-rw-r--r--src/core/NEON/NETracePoint.cpp4
-rw-r--r--src/core/NEON/kernels/NEAbsoluteDifferenceKernel.cpp2
-rw-r--r--src/core/NEON/kernels/NEAccumulateKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEActivationLayerKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp2
-rw-r--r--src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEBatchToSpaceLayerKernel.cpp2
-rw-r--r--src/core/NEON/kernels/NEBitwiseAndKernel.cpp2
-rw-r--r--src/core/NEON/kernels/NEBitwiseNotKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEBitwiseOrKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEBitwiseXorKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp6
-rw-r--r--src/core/NEON/kernels/NEBox3x3Kernel.cpp5
-rw-r--r--src/core/NEON/kernels/NECannyEdgeKernel.cpp8
-rw-r--r--src/core/NEON/kernels/NEChannelCombineKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEChannelExtractKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp6
-rw-r--r--src/core/NEON/kernels/NECol2ImKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEColorConvertKernel.cpp2
-rw-r--r--src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.cpp2
-rw-r--r--src/core/NEON/kernels/NEConvolutionKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NECopyKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NECropKernel.cpp9
-rw-r--r--src/core/NEON/kernels/NECumulativeDistributionKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp2
-rw-r--r--src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp7
-rw-r--r--src/core/NEON/kernels/NEDepthToSpaceLayerKernel.cpp3
-rw-r--r--src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp7
-rw-r--r--src/core/NEON/kernels/NEDequantizationLayerKernel.cpp6
-rw-r--r--src/core/NEON/kernels/NEDerivativeKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEDilateKernel.cpp2
-rw-r--r--src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp6
-rw-r--r--src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp6
-rw-r--r--src/core/NEON/kernels/NEElementwiseOperationKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEElementwiseUnaryKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEErodeKernel.cpp2
-rw-r--r--src/core/NEON/kernels/NEFFTDigitReverseKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEFFTRadixStageKernel.cpp7
-rw-r--r--src/core/NEON/kernels/NEFFTScaleKernel.cpp2
-rw-r--r--src/core/NEON/kernels/NEFastCornersKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEFillArrayKernel.cpp1
-rw-r--r--src/core/NEON/kernels/NEFillBorderKernel.cpp3
-rw-r--r--src/core/NEON/kernels/NEFlattenLayerKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEFloorKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp2
-rw-r--r--src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp6
-rw-r--r--src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEGEMMLowpReductionKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp7
-rw-r--r--src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEGatherKernel.cpp6
-rw-r--r--src/core/NEON/kernels/NEGaussian3x3Kernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEGaussian5x5Kernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEGaussianPyramidKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp8
-rw-r--r--src/core/NEON/kernels/NEHOGDescriptorKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEHOGDetectorKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEHarrisCornersKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp2
-rw-r--r--src/core/NEON/kernels/NEHistogramKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEIm2ColKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEIntegralImageKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp2
-rw-r--r--src/core/NEON/kernels/NELKTrackerKernel.cpp6
-rw-r--r--src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEMeanStdDevKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEMedian3x3Kernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEMemsetKernel.cpp6
-rw-r--r--src/core/NEON/kernels/NEMinMaxLayerKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEMinMaxLocationKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NENonLinearFilterKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp4
-rw-r--r--src/core/NEON/kernels/NENormalizationLayerKernel.cpp7
-rw-r--r--src/core/NEON/kernels/NEPadLayerKernel.cpp2
-rw-r--r--src/core/NEON/kernels/NEPermuteKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEPoolingLayerKernel.cpp6
-rw-r--r--src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp2
-rw-r--r--src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEQuantizationLayerKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEROIAlignLayerKernel.cpp6
-rw-r--r--src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NERangeKernel.cpp2
-rw-r--r--src/core/NEON/kernels/NEReductionOperationKernel.cpp6
-rw-r--r--src/core/NEON/kernels/NERemapKernel.cpp9
-rw-r--r--src/core/NEON/kernels/NEReorgLayerKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEReshapeLayerKernel.cpp6
-rw-r--r--src/core/NEON/kernels/NEReverseKernel.cpp2
-rw-r--r--src/core/NEON/kernels/NEScaleKernel.cpp12
-rw-r--r--src/core/NEON/kernels/NEScharr3x3Kernel.cpp4
-rw-r--r--src/core/NEON/kernels/NESelectKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NESobel3x3Kernel.cpp4
-rw-r--r--src/core/NEON/kernels/NESobel5x5Kernel.cpp4
-rw-r--r--src/core/NEON/kernels/NESobel7x7Kernel.cpp4
-rw-r--r--src/core/NEON/kernels/NESoftmaxLayerKernel.cpp8
-rw-r--r--src/core/NEON/kernels/NESpaceToBatchLayerKernel.cpp3
-rw-r--r--src/core/NEON/kernels/NESpaceToDepthLayerKernel.cpp3
-rw-r--r--src/core/NEON/kernels/NEStackLayerKernel.cpp2
-rw-r--r--src/core/NEON/kernels/NEStridedSliceKernel.cpp9
-rw-r--r--src/core/NEON/kernels/NEThresholdKernel.cpp2
-rw-r--r--src/core/NEON/kernels/NETileKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NETransposeKernel.cpp6
-rw-r--r--src/core/NEON/kernels/NEUpsampleLayerKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEWarpKernel.cpp19
-rw-r--r--src/core/NEON/kernels/NEWeightsReshapeKernel.cpp2
-rw-r--r--src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp2
-rw-r--r--src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp6
-rw-r--r--src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h4
-rw-r--r--src/core/NEON/kernels/NEYOLOLayerKernel.cpp4
-rw-r--r--src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.cpp4
-rw-r--r--src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h108
-rw-r--r--src/core/NEON/kernels/assembly/NEDepthwiseConvolutionAssemblyKernelWrapper.h88
-rw-r--r--src/core/NEON/kernels/assembly/arm_gemm_local.hpp34
-rw-r--r--src/core/NEON/kernels/convolution/common/activation.hpp37
-rw-r--r--src/core/NEON/kernels/convolution/common/alloc.hpp31
-rw-r--r--src/core/NEON/kernels/convolution/common/arm.hpp39
-rw-r--r--src/core/NEON/kernels/convolution/common/convolution.hpp29
-rw-r--r--src/core/NEON/kernels/convolution/common/padding.hpp91
-rw-r--r--src/core/NEON/kernels/convolution/common/perf.h32
-rw-r--r--src/core/NEON/kernels/convolution/common/qasymm8.hpp54
-rw-r--r--src/core/NEON/kernels/convolution/common/qsymm8.hpp76
-rw-r--r--src/core/NEON/kernels/convolution/common/shims.hpp749
-rw-r--r--src/core/NEON/kernels/convolution/common/tensor.hpp178
-rw-r--r--src/core/NEON/kernels/convolution/common/tensor_utils.hpp46
-rw-r--r--src/core/NEON/kernels/convolution/common/utils.hpp60
-rw-r--r--src/core/NEON/kernels/convolution/depthwise/depthwise.hpp551
-rw-r--r--src/core/NEON/kernels/convolution/depthwise/depthwise_dilated.hpp156
-rw-r--r--src/core/NEON/kernels/convolution/depthwise/depthwise_quantized.hpp291
-rw-r--r--src/core/NEON/kernels/convolution/depthwise/depthwise_quantized_dilated.hpp88
-rw-r--r--src/core/NEON/kernels/detail/NEDirectConvolutionDetail.h4
-rw-r--r--src/core/TensorInfo.cpp1
-rw-r--r--src/core/helpers/AutoConfiguration.h176
-rw-r--r--src/core/helpers/NormalizationHelpers.h47
-rw-r--r--src/core/helpers/ScaleHelpers.h331
-rw-r--r--src/core/helpers/SoftmaxHelpers.cpp45
-rw-r--r--src/core/helpers/SoftmaxHelpers.h50
-rw-r--r--src/core/helpers/Utils.h97
-rw-r--r--src/core/helpers/WindowHelpers.cpp183
-rw-r--r--src/core/helpers/WindowHelpers.h172
-rw-r--r--src/core/utils/helpers/bit_ops.h52
-rw-r--r--src/core/utils/helpers/fft.cpp4
-rw-r--r--src/core/utils/helpers/fft.h55
-rw-r--r--src/core/utils/helpers/float_ops.h116
-rw-r--r--src/core/utils/helpers/tensor_info.h57
-rw-r--r--src/core/utils/helpers/tensor_transform.cpp4
-rw-r--r--src/graph/algorithms/TopologicalSort.cpp6
-rw-r--r--src/graph/backends/CL/CLFunctionsFactory.cpp2
-rw-r--r--src/graph/backends/CL/CLNodeValidator.cpp2
-rw-r--r--src/graph/backends/CL/CLSubTensorHandle.cpp4
-rw-r--r--src/graph/backends/GLES/GCFunctionsFactory.cpp2
-rw-r--r--src/graph/backends/GLES/GCNodeValidator.cpp4
-rw-r--r--src/graph/backends/NEON/NEFunctionFactory.cpp2
-rw-r--r--src/graph/backends/NEON/NENodeValidator.cpp2
-rw-r--r--src/graph/backends/NEON/NETensorHandle.cpp4
-rw-r--r--src/graph/detail/CrossLayerMemoryManagerHelpers.cpp2
-rw-r--r--src/graph/mutators/DepthConcatSubTensorMutator.cpp6
-rw-r--r--src/graph/mutators/GroupedConvolutionMutator.cpp2
-rw-r--r--src/graph/mutators/NodeExecutionMethodMutator.cpp4
-rw-r--r--src/graph/mutators/NodeFusionMutator.cpp13
-rw-r--r--src/graph/mutators/SplitLayerSubTensorMutator.cpp4
-rw-r--r--src/graph/mutators/SyntheticDataTypeMutator.cpp2
-rw-r--r--src/runtime/CL/CLHelpers.cpp3
-rw-r--r--src/runtime/CL/CLMemory.cpp4
-rw-r--r--src/runtime/CL/CLRuntimeContext.cpp2
-rw-r--r--src/runtime/CL/CLTensorAllocator.cpp4
-rw-r--r--src/runtime/CL/functions/CLArgMinMaxLayer.cpp11
-rw-r--r--src/runtime/CL/functions/CLConcatenateLayer.cpp1
-rw-r--r--src/runtime/CL/functions/CLConvertFullyConnectedWeights.cpp2
-rw-r--r--src/runtime/CL/functions/CLConvolutionLayer.cpp2
-rw-r--r--src/runtime/CL/functions/CLCropResize.cpp4
-rw-r--r--src/runtime/CL/functions/CLDeconvolutionLayer.cpp2
-rw-r--r--src/runtime/CL/functions/CLDirectConvolutionLayer.cpp2
-rw-r--r--src/runtime/CL/functions/CLDirectDeconvolutionLayer.cpp1
-rw-r--r--src/runtime/CL/functions/CLFFT1D.cpp2
-rw-r--r--src/runtime/CL/functions/CLFFTConvolutionLayer.cpp5
-rw-r--r--src/runtime/CL/functions/CLFill.cpp2
-rw-r--r--src/runtime/CL/functions/CLFullyConnectedLayer.cpp2
-rw-r--r--src/runtime/CL/functions/CLGEMM.cpp15
-rw-r--r--src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp3
-rw-r--r--src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp7
-rw-r--r--src/runtime/CL/functions/CLGenerateProposalsLayer.cpp1
-rw-r--r--src/runtime/CL/functions/CLInstanceNormalizationLayer.cpp2
-rw-r--r--src/runtime/CL/functions/CLLSTMLayerQuantized.cpp1
-rw-r--r--src/runtime/CL/functions/CLPriorBoxLayer.cpp2
-rw-r--r--src/runtime/CL/functions/CLQLSTMLayer.cpp1
-rw-r--r--src/runtime/CL/functions/CLReduceMean.cpp3
-rw-r--r--src/runtime/CL/functions/CLReductionOperation.cpp8
-rw-r--r--src/runtime/CL/functions/CLRemap.cpp2
-rw-r--r--src/runtime/CL/functions/CLSelect.cpp2
-rw-r--r--src/runtime/CL/functions/CLSoftmaxLayer.cpp7
-rw-r--r--src/runtime/CL/functions/CLSplit.cpp1
-rw-r--r--src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp2
-rw-r--r--src/runtime/CL/gemm/CLGEMMKernelSelection.h65
-rw-r--r--src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.cpp4
-rw-r--r--src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.h55
-rw-r--r--src/runtime/CL/gemm/CLGEMMKernelSelectionMidgard.cpp4
-rw-r--r--src/runtime/CL/gemm/CLGEMMKernelSelectionMidgard.h53
-rw-r--r--src/runtime/CL/gemm/CLGEMMKernelSelectionValhall.cpp4
-rw-r--r--src/runtime/CL/gemm/CLGEMMKernelSelectionValhall.h53
-rw-r--r--src/runtime/CL/tuners/BifrostTuner.cpp2
-rw-r--r--src/runtime/CL/tuners/MidgardTuner.cpp2
-rw-r--r--src/runtime/CPP/CPPScheduler.cpp3
-rw-r--r--src/runtime/CPP/functions/CPPDetectionOutputLayer.cpp1
-rw-r--r--src/runtime/CPP/functions/CPPDetectionPostProcessLayer.cpp1
-rw-r--r--src/runtime/CPUUtils.cpp9
-rw-r--r--src/runtime/CPUUtils.h51
-rw-r--r--src/runtime/DeviceProperties.cpp6
-rw-r--r--src/runtime/GLES_COMPUTE/GCMemory.cpp4
-rw-r--r--src/runtime/GLES_COMPUTE/functions/GCConcatenateLayer.cpp2
-rw-r--r--src/runtime/IScheduler.cpp10
-rw-r--r--src/runtime/NEON/INESimpleFunctionNoBorder.cpp6
-rw-r--r--src/runtime/NEON/functions/NEArgMinMaxLayer.cpp4
-rw-r--r--src/runtime/NEON/functions/NEBatchNormalizationLayer.cpp4
-rw-r--r--src/runtime/NEON/functions/NEBatchToSpaceLayer.cpp4
-rw-r--r--src/runtime/NEON/functions/NEConcatenateLayer.cpp1
-rw-r--r--src/runtime/NEON/functions/NECropResize.cpp2
-rw-r--r--src/runtime/NEON/functions/NEDeconvolutionLayer.cpp1
-rw-r--r--src/runtime/NEON/functions/NEDepthToSpaceLayer.cpp4
-rw-r--r--src/runtime/NEON/functions/NEFFT1D.cpp4
-rw-r--r--src/runtime/NEON/functions/NEFFTConvolutionLayer.cpp7
-rw-r--r--src/runtime/NEON/functions/NEFullyConnectedLayer.cpp2
-rw-r--r--src/runtime/NEON/functions/NEGEMM.cpp3
-rw-r--r--src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp10
-rw-r--r--src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp1
-rw-r--r--src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp1
-rw-r--r--src/runtime/NEON/functions/NELSTMLayerQuantized.cpp3
-rw-r--r--src/runtime/NEON/functions/NEPadLayer.cpp1
-rw-r--r--src/runtime/NEON/functions/NEPriorBoxLayer.cpp4
-rw-r--r--src/runtime/NEON/functions/NEQLSTMLayer.cpp1
-rw-r--r--src/runtime/NEON/functions/NEReduceMean.cpp3
-rw-r--r--src/runtime/NEON/functions/NEReductionOperation.cpp1
-rw-r--r--src/runtime/NEON/functions/NEScale.cpp4
-rw-r--r--src/runtime/NEON/functions/NESimpleAssemblyFunction.cpp4
-rw-r--r--src/runtime/NEON/functions/NESimpleAssemblyFunction.h56
-rw-r--r--src/runtime/NEON/functions/NESoftmaxLayer.cpp7
-rw-r--r--src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp4
-rw-r--r--src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp11
-rw-r--r--src/runtime/OMP/OMPScheduler.cpp2
-rw-r--r--src/runtime/SchedulerUtils.cpp4
-rw-r--r--src/runtime/SchedulerUtils.h45
-rw-r--r--src/runtime/Utils.cpp7
-rw-r--r--src/runtime/Utils.h60
450 files changed, 6798 insertions, 910 deletions
diff --git a/src/core/AccessWindowAutoPadding.cpp b/src/core/AccessWindowAutoPadding.cpp
index 85c5b27d82..ca2f7d238f 100644
--- a/src/core/AccessWindowAutoPadding.cpp
+++ b/src/core/AccessWindowAutoPadding.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/AccessWindowAutoPadding.h"
+#include "src/core/AccessWindowAutoPadding.h"
#include "arm_compute/core/ITensorInfo.h"
#include "arm_compute/core/Window.h"
diff --git a/src/core/AccessWindowAutoPadding.h b/src/core/AccessWindowAutoPadding.h
new file mode 100644
index 0000000000..b8d1508679
--- /dev/null
+++ b/src/core/AccessWindowAutoPadding.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_ACCESS_WINDOW_AUTO_PADDING_H
+#define ARM_COMPUTE_ACCESS_WINDOW_AUTO_PADDING_H
+
+#include "arm_compute/core/Coordinates.h"
+#include "arm_compute/core/IAccessWindow.h"
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class Window;
+class ITensorInfo;
+
+/** Dummy access window.
+ *
+ * This implementation always uses the auto padding of the tensor info and
+ * never updates the window. The valid region is always set to cover the entire
+ * tensor.
+ *
+ * @note This access window is only used during the migration to the new
+ * padding system. It will be removed once all kernels have been ported.
+ *
+ * */
+class AccessWindowAutoPadding : public IAccessWindow
+{
+public:
+ /** Default constructor.
+ *
+ * @param[in,out] info Tensor info of the accessed kernel.
+ */
+ AccessWindowAutoPadding(ITensorInfo *info);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ AccessWindowAutoPadding(const AccessWindowAutoPadding &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ AccessWindowAutoPadding &operator=(const AccessWindowAutoPadding &) = delete;
+ /** Allow instances of this class to be move constructed */
+ AccessWindowAutoPadding(AccessWindowAutoPadding &&) = default;
+ /** Allow instances of this class to be moved */
+ AccessWindowAutoPadding &operator=(AccessWindowAutoPadding &&) = default;
+ /** Default destructor */
+ ~AccessWindowAutoPadding() = default;
+
+ /** Set the valid region to match the entire tensor. */
+ void set_valid_region();
+
+ /** Return a valid region that spans across the entire tensor.
+ *
+ * @return a valid region.
+ *
+ */
+ ValidRegion compute_valid_region() const;
+
+ // Inherited methods overridden:
+ bool update_window_if_needed(Window &window) const override;
+ bool update_padding_if_needed(const Window &window) override;
+ ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) const override;
+
+private:
+ ITensorInfo *_info;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_ACCESS_WINDOW_AUTO_PADDING_H*/
diff --git a/src/core/AccessWindowStatic.cpp b/src/core/AccessWindowStatic.cpp
index 10e88b8632..0607011bc5 100644
--- a/src/core/AccessWindowStatic.cpp
+++ b/src/core/AccessWindowStatic.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/AccessWindowStatic.h"
+#include "src/core/AccessWindowStatic.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensorInfo.h"
diff --git a/src/core/AccessWindowStatic.h b/src/core/AccessWindowStatic.h
new file mode 100644
index 0000000000..f7d43cbb55
--- /dev/null
+++ b/src/core/AccessWindowStatic.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_IACCESS_WINDOW_STATIC_H
+#define ARM_COMPUTE_IACCESS_WINDOW_STATIC_H
+
+#include "arm_compute/core/Coordinates.h"
+#include "arm_compute/core/IAccessWindow.h"
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+
+#include <array>
+
+namespace arm_compute
+{
+class Window;
+class ITensorInfo;
+
+/** Implementation of a static rectangular access pattern.
+ *
+ * In this implementation the access offsets and sizes are not relative to the
+ * current element. Instead they are considered to be absolute coordinates
+ * within the accessed tensor's shape.
+ *
+ * */
+class AccessWindowStatic : public IAccessWindow
+{
+public:
+ /** Constructor for a static access pattern.
+ *
+ * @param[in,out] info Tensor info of the accessed kernel.
+ * @param[in] start_x Start of the access in X direction.
+ * @param[in] start_y Start of the access in Y direction.
+ * @param[in] end_x End of the access in X direction.
+ * @param[in] end_y End of the access in Y direction.
+ */
+ AccessWindowStatic(ITensorInfo *info, int start_x, int start_y, int end_x, int end_y);
+
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ AccessWindowStatic(const AccessWindowStatic &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ AccessWindowStatic &operator=(const AccessWindowStatic &) = delete;
+ /** Allow instances of this class to be move constructed */
+ AccessWindowStatic(AccessWindowStatic &&) = default;
+ /** Allow instances of this class to be moved */
+ AccessWindowStatic &operator=(AccessWindowStatic &&) = default;
+ /** Default destructor */
+ ~AccessWindowStatic() = default;
+
+ /** Set the valid region based on the static access pattern and valid
+ * region of the inputs.
+ *
+ * @param[in] window Execution window of the kernel.
+ * @param[in] input_valid_region Combined valid region of all inputs.
+ */
+ void set_valid_region(const Window &window, const ValidRegion &input_valid_region);
+
+ /** Compute the valid region based on the static access pattern and valid region of the inputs.
+ *
+ * @param[in] window Execution window of the kernel.
+ * @param[in] input_valid_region Combined valid region of all inputs.
+ *
+ * @return a valid region.
+ *
+ */
+ ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region) const;
+
+ // Inherited methods overriden:
+ bool update_window_if_needed(Window &window) const override;
+ bool update_padding_if_needed(const Window &window) override;
+ ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) const override;
+
+private:
+ ITensorInfo *_info;
+ int _start_x;
+ int _start_y;
+ int _end_x;
+ int _end_y;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_IACCESS_WINDOW_STATIC_H*/
diff --git a/src/core/AccessWindowTranspose.cpp b/src/core/AccessWindowTranspose.cpp
index 4c03ca16c7..d8bd4c4de1 100644
--- a/src/core/AccessWindowTranspose.cpp
+++ b/src/core/AccessWindowTranspose.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/AccessWindowTranspose.h"
+#include "src/core/AccessWindowTranspose.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensorInfo.h"
diff --git a/src/core/AccessWindowTranspose.h b/src/core/AccessWindowTranspose.h
new file mode 100644
index 0000000000..0306076d6e
--- /dev/null
+++ b/src/core/AccessWindowTranspose.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_IACCESS_WINDOW_TRANSPOSE_H
+#define ARM_COMPUTE_IACCESS_WINDOW_TRANSPOSE_H
+
+#include "arm_compute/core/Coordinates.h"
+#include "arm_compute/core/IAccessWindow.h"
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class Window;
+class ITensorInfo;
+
+/** Implementation of a XY-transpose access pattern. */
+class AccessWindowTranspose : public AccessWindowRectangle
+{
+public:
+ using AccessWindowRectangle::AccessWindowRectangle;
+ bool update_window_if_needed(Window &window) const override;
+ bool update_padding_if_needed(const Window &window) override;
+ using AccessWindowRectangle::compute_valid_region;
+ ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) const override;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_IACCESS_WINDOW_TRANSPOSE_H*/
diff --git a/src/core/CL/CLValidate.h b/src/core/CL/CLValidate.h
new file mode 100644
index 0000000000..cbbdf2d9d2
--- /dev/null
+++ b/src/core/CL/CLValidate.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CL_VALIDATE_H
+#define ARM_COMPUTE_CL_VALIDATE_H
+
+#include "arm_compute/core/Validate.h"
+
+namespace arm_compute
+{
+#define ARM_COMPUTE_ERROR_ON_F16_UNSUPPORTED(tensor) \
+ ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_unsupported_fp16(__func__, __FILE__, __LINE__, tensor, CLKernelLibrary::get().fp16_supported()))
+
+#define ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(tensor) \
+ ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_fp16(__func__, __FILE__, __LINE__, tensor, CLKernelLibrary::get().fp16_supported()))
+
+/** Return an error if int64_base_atomics extension is not supported by the device.
+ *
+ * @param[in] function Function in which the error occurred.
+ * @param[in] file Name of the file where the error occurred.
+ * @param[in] line Line on which the error occurred.
+ *
+ * @return Status
+ */
+inline arm_compute::Status error_on_unsupported_int64_base_atomics(const char *function, const char *file, const int line)
+{
+ if(!CLKernelLibrary::get().int64_base_atomics_supported())
+ {
+ return ARM_COMPUTE_CREATE_ERROR_LOC(arm_compute::ErrorCode::UNSUPPORTED_EXTENSION_USE, function, file, line, "Atomic functions are not supported");
+ }
+ return arm_compute::Status{};
+}
+
+#define ARM_COMPUTE_ERROR_ON_INT64_BASE_ATOMICS_UNSUPPORTED() \
+ ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_unsupported_int64_base_atomics(__func__, __FILE__, __LINE__));
+
+#define ARM_COMPUTE_RETURN_ERROR_ON_INT64_BASE_ATOMICS_UNSUPPORTED() \
+ ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_int64_base_atomics(__func__, __FILE__, __LINE__));
+
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CL_VALIDATE_H */
diff --git a/src/core/CL/ICLGEMMKernelConfiguration.h b/src/core/CL/ICLGEMMKernelConfiguration.h
new file mode 100644
index 0000000000..ac0e7ab7ff
--- /dev/null
+++ b/src/core/CL/ICLGEMMKernelConfiguration.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_ICLGEMMKERNELCONFIGURATION_H
+#define ARM_COMPUTE_ICLGEMMKERNELCONFIGURATION_H
+
+#include "arm_compute/core/GPUTarget.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+/** Basic interface for the GEMM kernel configuration */
+class ICLGEMMKernelConfiguration
+{
+public:
+ /** Constructor
+ *
+ * @param[in] arch GPU target
+ */
+ ICLGEMMKernelConfiguration(GPUTarget arch)
+ : _target(arch)
+ {
+ }
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ ICLGEMMKernelConfiguration(const ICLGEMMKernelConfiguration &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ ICLGEMMKernelConfiguration &operator=(const ICLGEMMKernelConfiguration &) = delete;
+ /** Default Move Constructor. */
+ ICLGEMMKernelConfiguration(ICLGEMMKernelConfiguration &&) = default;
+ /** Default move assignment operator */
+ ICLGEMMKernelConfiguration &operator=(ICLGEMMKernelConfiguration &&) = default;
+ /** Virtual destructor */
+ virtual ~ICLGEMMKernelConfiguration() = default;
+ /** Given M, N, K and B, this method returns the @ref GEMMLHSMatrixInfo and @ref GEMMRHSMatrixInfo to be used
+ *
+ * @param[in] m Number of rows LHS matrix
+ * @param[in] n Number of columns RHS matrix
+ * @param[in] k Number of columns LHS matrix or number of rows RHS matrix
+ * @param[in] b Batch size
+ * @param[in] data_type Data type
+ */
+ virtual std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) = 0;
+
+protected:
+ GPUTarget _target;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_ICLGEMMKERNELCONFIGURATION_H */
diff --git a/src/core/CL/ICLKernel.cpp b/src/core/CL/ICLKernel.cpp
index be633746a2..f91510b4a7 100644
--- a/src/core/CL/ICLKernel.cpp
+++ b/src/core/CL/ICLKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,14 +23,9 @@
*/
#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
+#include "src/core/helpers/Utils.h"
#include <cstddef>
diff --git a/src/core/CL/ICLSimple2DKernel.cpp b/src/core/CL/ICLSimple2DKernel.cpp
index ce95495fff..dfef5822b2 100644
--- a/src/core/CL/ICLSimple2DKernel.cpp
+++ b/src/core/CL/ICLSimple2DKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,10 +23,7 @@
*/
#include "arm_compute/core/CL/ICLSimple2DKernel.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
+#include "src/core/helpers/WindowHelpers.h"
using namespace arm_compute;
diff --git a/src/core/CL/ICLSimpleKernel.cpp b/src/core/CL/ICLSimpleKernel.cpp
index d2f09a3478..90b5be8069 100644
--- a/src/core/CL/ICLSimpleKernel.cpp
+++ b/src/core/CL/ICLSimpleKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2018 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -27,6 +27,7 @@
#include "arm_compute/core/IAccessWindow.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "src/core/helpers/WindowHelpers.h"
using namespace arm_compute;
diff --git a/src/core/CL/gemm/CLGEMMHelpers.cpp b/src/core/CL/gemm/CLGEMMHelpers.cpp
index 0a4a4adc31..877bf1e047 100644
--- a/src/core/CL/gemm/CLGEMMHelpers.cpp
+++ b/src/core/CL/gemm/CLGEMMHelpers.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/gemm/CLGEMMHelpers.h"
+#include "src/core/CL/gemm/CLGEMMHelpers.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/src/core/CL/gemm/CLGEMMHelpers.h b/src/core/CL/gemm/CLGEMMHelpers.h
new file mode 100644
index 0000000000..013c068cf7
--- /dev/null
+++ b/src/core/CL/gemm/CLGEMMHelpers.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLGEMMHELPERS_H
+#define ARM_COMPUTE_CLGEMMHELPERS_H
+
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class ITensorInfo;
+struct GEMMRHSMatrixInfo;
+
+namespace cl_gemm
+{
+/** Configure @ref GEMMLHSMatrixInfo and @ref GEMMRHSMatrixInfo
+ *
+ * @param[in] m Number of rows (M) in the LHS matrix not reshaped
+ * @param[in] n Number of columns (N) in the RHS matrix not reshaped
+ * @param[in] m0 Number of rows processed by each thread/work-item
+ * @param[in] n0 Number of columns processed by each thread/work-item
+ * @param[in] k0 Number of inner accumulation performed by each thread/work-item
+ * @param[in] v0 Number of vertical blocks of size (m0xk0) stored on the same output row
+ * @param[in] h0 Number of horizontal blocks of size (k0xn0) stored on the same output row
+ * @param[in] lhs_interleave True if the v0 (m0xk0) blocks have to be interleaved in the output row
+ * @param[in] rhs_interleave True if the h0 (k0xn0) blocks have to be interleaved in the output row
+ * @param[in] lhs_transpose True if the (m0xk0) block has to be transposed before been stored
+ * @param[in] rhs_transpose True if the (k0xn0) block has to be transposed before been stored
+ * @param[in] export_to_cl_image (Optional) True if the RHS reshaped matrix has to be exported to cl_image
+ *
+ * @return @ref GEMMLHSMatrixInfo and @ref GEMMRHSMatrixInfo
+ */
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_lhs_rhs_info(unsigned int m, unsigned int n, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int v0, unsigned int h0,
+ bool lhs_interleave, bool rhs_interleave, bool lhs_transpose, bool rhs_transpose, bool export_to_cl_image = false);
+
+/** Update padding required to export the OpenCL buffer to OpenCL image2d
+ *
+ * @param[in,out] tensor ITensorInfo of the tensor required to be exported to OpenCL image2d
+ */
+void update_padding_for_cl_image(ITensorInfo *tensor);
+
+/** Utility function to validate the image2d OpenCL object support on the RHS reshaped matrix
+ *
+ * @param[in] tensor_reshaped_info TensorInfo for the RHS reshaped matrix
+ * @param[in] rhs_info @ref GEMMRHSMatrixInfo
+ *
+ * @return Status reporting if we can use the image2d OpenCL object on the RHS reshaped matrix
+ */
+Status validate_image2d_support_on_rhs(const ITensorInfo &tensor_reshaped_info, const GEMMRHSMatrixInfo &rhs_info);
+} // namespace cl_gemm
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLGEMMHELPERS_H */
diff --git a/src/core/CL/gemm/native/CLGEMMNativeKernelConfiguration.h b/src/core/CL/gemm/native/CLGEMMNativeKernelConfiguration.h
new file mode 100644
index 0000000000..aecf5a8aa8
--- /dev/null
+++ b/src/core/CL/gemm/native/CLGEMMNativeKernelConfiguration.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATION_H
+#define ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATION_H
+
+#include "src/core/CL/ICLGEMMKernelConfiguration.h"
+#include "src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationBifrost.h"
+#include "src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationMidgard.h"
+#include "src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationValhall.h"
+
+#include "support/MemorySupport.h"
+
+namespace arm_compute
+{
+namespace cl_gemm
+{
+/** CLGEMMNative factory class */
+class CLGEMMNativeKernelConfigurationFactory final
+{
+public:
+ /** Static method to construct CLGEMMNative kernel object accordingly with the GPU target
+ *
+ * @param[in] gpu GPU target
+ *
+ * @return CLGEMMNative kernel configuration class
+ */
+ static std::unique_ptr<ICLGEMMKernelConfiguration> create(GPUTarget gpu)
+ {
+ switch(get_arch_from_target(gpu))
+ {
+ case GPUTarget::MIDGARD:
+ return support::cpp14::make_unique<CLGEMMNativeKernelConfigurationMidgard>(gpu);
+ case GPUTarget::BIFROST:
+ return support::cpp14::make_unique<CLGEMMNativeKernelConfigurationBifrost>(gpu);
+ case GPUTarget::VALHALL:
+ return support::cpp14::make_unique<CLGEMMNativeKernelConfigurationValhall>(gpu);
+ default:
+ ARM_COMPUTE_ERROR("Not supported GPU target");
+ }
+ }
+};
+} // namespace cl_gemm
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATION_H */
diff --git a/src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationBifrost.cpp b/src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationBifrost.cpp
index 51b7fc7190..4cc3d6ae74 100644
--- a/src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationBifrost.cpp
+++ b/src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationBifrost.cpp
@@ -21,12 +21,12 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationBifrost.h"
+#include "src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationBifrost.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/gemm/CLGEMMHelpers.h"
#include "arm_compute/core/GPUTarget.h"
+#include "src/core/CL/gemm/CLGEMMHelpers.h"
#include <map>
#include <utility>
diff --git a/src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationBifrost.h b/src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationBifrost.h
new file mode 100644
index 0000000000..1e7432c89a
--- /dev/null
+++ b/src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationBifrost.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONBIFROST_H
+#define ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONBIFROST_H
+
+#include "src/core/CL/ICLGEMMKernelConfiguration.h"
+
+namespace arm_compute
+{
+namespace cl_gemm
+{
+/** Bifrost based OpenCL GEMMNative configuration */
+class CLGEMMNativeKernelConfigurationBifrost final : public ICLGEMMKernelConfiguration
+{
+public:
+ /** Constructor
+ *
+ * @param[in] gpu GPU target
+ */
+ CLGEMMNativeKernelConfigurationBifrost(GPUTarget gpu);
+
+ // Inherited overridden method
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
+
+private:
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G71_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G71_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_default_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_default_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+};
+} // namespace cl_gemm
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONBIFROST_H */
diff --git a/src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationMidgard.cpp b/src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationMidgard.cpp
index 3e7c17664a..fd699a08f7 100644
--- a/src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationMidgard.cpp
+++ b/src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationMidgard.cpp
@@ -21,12 +21,12 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationMidgard.h"
+#include "src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationMidgard.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/gemm/CLGEMMHelpers.h"
#include "arm_compute/core/GPUTarget.h"
+#include "src/core/CL/gemm/CLGEMMHelpers.h"
#include <map>
#include <utility>
diff --git a/src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationMidgard.h b/src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationMidgard.h
new file mode 100644
index 0000000000..2f6671706e
--- /dev/null
+++ b/src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationMidgard.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONMIDGARD_H
+#define ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONMIDGARD_H
+
+#include "src/core/CL/ICLGEMMKernelConfiguration.h"
+
+namespace arm_compute
+{
+namespace cl_gemm
+{
+/** Midgard based OpenCL GEMMNative configuration */
+class CLGEMMNativeKernelConfigurationMidgard final : public ICLGEMMKernelConfiguration
+{
+public:
+ /** Constructor
+ *
+ * @param[in] gpu GPU target
+ */
+ CLGEMMNativeKernelConfigurationMidgard(GPUTarget gpu);
+
+ // Inherited overridden method
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
+
+private:
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> default_q8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+};
+} // namespace cl_gemm
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONMIDGARD_H */
diff --git a/src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationValhall.cpp b/src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationValhall.cpp
index efc82fb78c..2c82340eef 100644
--- a/src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationValhall.cpp
+++ b/src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationValhall.cpp
@@ -21,12 +21,12 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationValhall.h"
+#include "src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationValhall.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/gemm/CLGEMMHelpers.h"
#include "arm_compute/core/GPUTarget.h"
+#include "src/core/CL/gemm/CLGEMMHelpers.h"
#include <map>
#include <utility>
diff --git a/src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationValhall.h b/src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationValhall.h
new file mode 100644
index 0000000000..fb51b02edf
--- /dev/null
+++ b/src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationValhall.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONVALHALL_H
+#define ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONVALHALL_H
+
+#include "src/core/CL/ICLGEMMKernelConfiguration.h"
+
+namespace arm_compute
+{
+namespace cl_gemm
+{
+/** Valhall based OpenCL GEMMNative configuration */
+class CLGEMMNativeKernelConfigurationValhall final : public ICLGEMMKernelConfiguration
+{
+public:
+ /** Constructor
+ *
+ * @param[in] gpu GPU target
+ */
+ CLGEMMNativeKernelConfigurationValhall(GPUTarget gpu);
+
+ // Inherited overridden method
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
+
+private:
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+};
+} // namespace cl_gemm
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONVALHALL_H */
diff --git a/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfiguration.h b/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfiguration.h
new file mode 100644
index 0000000000..21ccf2d647
--- /dev/null
+++ b/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfiguration.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATION_H
+#define ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATION_H
+
+#include "src/core/CL/ICLGEMMKernelConfiguration.h"
+#include "src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.h"
+#include "src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationValhall.h"
+
+#include "support/MemorySupport.h"
+
+namespace arm_compute
+{
+namespace cl_gemm
+{
+/** CLGEMMReshaped factory class */
+class CLGEMMReshapedKernelConfigurationFactory final
+{
+public:
+ /** Static method to call the CLGEMMReshaped kernel configuration class accordingly with the GPU target
+ *
+ * @param[in] gpu GPU target
+ *
+ * @return CLGEMMReshaped kernel configuration class
+ */
+ static std::unique_ptr<ICLGEMMKernelConfiguration> create(GPUTarget gpu)
+ {
+ switch(get_arch_from_target(gpu))
+ {
+ case GPUTarget::MIDGARD:
+ case GPUTarget::BIFROST:
+ return support::cpp14::make_unique<CLGEMMReshapedKernelConfigurationBifrost>(gpu);
+ case GPUTarget::VALHALL:
+ return support::cpp14::make_unique<CLGEMMReshapedKernelConfigurationValhall>(gpu);
+ default:
+ ARM_COMPUTE_ERROR("Not supported GPU target");
+ }
+ }
+};
+} // namespace cl_gemm
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATION_H */
diff --git a/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.cpp b/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.cpp
index b5fc074fb4..00c284facc 100644
--- a/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.cpp
+++ b/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.cpp
@@ -21,15 +21,15 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.h"
+#include "src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/gemm/CLGEMMHelpers.h"
#include "arm_compute/core/GPUTarget.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/TensorShape.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/CL/gemm/CLGEMMHelpers.h"
#include <map>
#include <utility>
@@ -216,17 +216,17 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedKernelConfiguratio
{
if(workload <= 790.39f)
{
- return configure_lhs_rhs_info(m,n,2,4,4,2,2,false,false,true,false,false);
+ return configure_lhs_rhs_info(m, n, 2, 4, 4, 2, 2, false, false, true, false, false);
}
else
{
if(workload <= 982.39f)
{
- return configure_lhs_rhs_info(m,n,4,2,4,4,4,false,false,true,false,false);
+ return configure_lhs_rhs_info(m, n, 4, 2, 4, 4, 4, false, false, true, false, false);
}
else
{
- return configure_lhs_rhs_info(m,n,2,4,4,2,1,false,true,true,false,false);
+ return configure_lhs_rhs_info(m, n, 2, 4, 4, 2, 1, false, true, true, false, false);
}
}
}
@@ -236,16 +236,16 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedKernelConfiguratio
{
if(r_mn <= 0.11f)
{
- return configure_lhs_rhs_info(m,n,2,4,4,2,2,false,false,true,false,false);
+ return configure_lhs_rhs_info(m, n, 2, 4, 4, 2, 2, false, false, true, false, false);
}
else
{
- return configure_lhs_rhs_info(m,n,4,4,4,4,4,false,true,true,false,false);
+ return configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 4, false, true, true, false, false);
}
}
else
{
- return configure_lhs_rhs_info(m,n,2,4,4,2,2,false,false,true,false,false);
+ return configure_lhs_rhs_info(m, n, 2, 4, 4, 2, 2, false, false, true, false, false);
}
}
}
@@ -257,22 +257,22 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedKernelConfiguratio
{
if(m <= 64.5)
{
- return configure_lhs_rhs_info(m,n,4,4,4,2,4,true,false,true,false,false);
+ return configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 4, true, false, true, false, false);
}
else
{
- return configure_lhs_rhs_info(m,n,4,4,4,2,2,false,true,true,false,false);
+ return configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 2, false, true, true, false, false);
}
}
else
{
if(r_mn <= 1.09f)
{
- return configure_lhs_rhs_info(m,n,4,4,4,4,4,false,true,true,false,false);
+ return configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 4, false, true, true, false, false);
}
else
{
- return configure_lhs_rhs_info(m,n,4,4,4,2,2,true,true,true,false,false);
+ return configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 2, true, true, true, false, false);
}
}
}
@@ -280,17 +280,17 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedKernelConfiguratio
{
if(m <= 43)
{
- return configure_lhs_rhs_info(m,n,4,4,4,2,4,true,false,true,false,false);
+ return configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 4, true, false, true, false, false);
}
else
{
if(workload <= 26364.79f)
{
- return configure_lhs_rhs_info(m,n,4,4,4,2,2,false,true,true,false,false);
+ return configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 2, false, true, true, false, false);
}
else
{
- return configure_lhs_rhs_info(m,n,4,4,4,4,4,false,true,true,false,false);
+ return configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 4, false, true, true, false, false);
}
}
}
diff --git a/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.h b/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.h
new file mode 100644
index 0000000000..e3b62ced6a
--- /dev/null
+++ b/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATIONBIFROST_H
+#define ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATIONBIFROST_H
+
+#include "src/core/CL/ICLGEMMKernelConfiguration.h"
+
+namespace arm_compute
+{
+namespace cl_gemm
+{
+/** Bifrost based OpenCL GEMMReshaped configuration */
+class CLGEMMReshapedKernelConfigurationBifrost final : public ICLGEMMKernelConfiguration
+{
+public:
+ /** Constructor
+ *
+ * @param[in] gpu GPU target
+ */
+ CLGEMMReshapedKernelConfigurationBifrost(GPUTarget gpu);
+
+ // Inherited overridden method
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
+
+private:
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G7x_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G7x_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G7x_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+};
+} // namespace cl_gemm
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATIONBIFROST_H */
diff --git a/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationValhall.cpp b/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationValhall.cpp
index 0c09f5084a..519e903a5a 100644
--- a/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationValhall.cpp
+++ b/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationValhall.cpp
@@ -21,12 +21,12 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationValhall.h"
+#include "src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationValhall.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/gemm/CLGEMMHelpers.h"
#include "arm_compute/core/GPUTarget.h"
+#include "src/core/CL/gemm/CLGEMMHelpers.h"
#include <map>
#include <utility>
diff --git a/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationValhall.h b/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationValhall.h
new file mode 100644
index 0000000000..5f7e701e0e
--- /dev/null
+++ b/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationValhall.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATIONVALHALL_H
+#define ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATIONVALHALL_H
+
+#include "src/core/CL/ICLGEMMKernelConfiguration.h"
+
+namespace arm_compute
+{
+namespace cl_gemm
+{
+/** Valhall based OpenCL GEMMReshaped configuration */
+class CLGEMMReshapedKernelConfigurationValhall final : public ICLGEMMKernelConfiguration
+{
+public:
+ /** Constructor
+ *
+ * @param[in] gpu GPU target
+ */
+ CLGEMMReshapedKernelConfigurationValhall(GPUTarget gpu);
+
+ // Inherited overridden method
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
+
+private:
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+};
+} // namespace cl_gemm
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATIONVALHALL_H */
diff --git a/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h b/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h
new file mode 100644
index 0000000000..4efe28ce69
--- /dev/null
+++ b/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATION_H
+#define ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATION_H
+
+#include "src/core/CL/ICLGEMMKernelConfiguration.h"
+#include "src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.h"
+#include "src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.h"
+
+#include "support/MemorySupport.h"
+
+namespace arm_compute
+{
+namespace cl_gemm
+{
+/** CLGEMMReshapedOnlyRHS factory class */
+class CLGEMMReshapedOnlyRHSKernelConfigurationFactory final
+{
+public:
+ /** Static method to call the CLGEMMReshapedOnlyRHS kernel configuration class accordingly with the GPU target
+ *
+ * @param[in] gpu GPU target
+ *
+ * @return CLGEMMReshapedOnlyRHS kernel configuration class
+ */
+ static std::unique_ptr<ICLGEMMKernelConfiguration> create(GPUTarget gpu)
+ {
+ switch(get_arch_from_target(gpu))
+ {
+ case GPUTarget::MIDGARD:
+ case GPUTarget::BIFROST:
+ return support::cpp14::make_unique<CLGEMMReshapedOnlyRHSKernelConfigurationBifrost>(gpu);
+ case GPUTarget::VALHALL:
+ return support::cpp14::make_unique<CLGEMMReshapedOnlyRHSKernelConfigurationValhall>(gpu);
+ default:
+ ARM_COMPUTE_ERROR("Not supported GPU target");
+ }
+ }
+};
+} // namespace cl_gemm
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATION_H */
diff --git a/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.cpp b/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.cpp
index 16eabf069c..0a0fc5d152 100644
--- a/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.cpp
+++ b/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.cpp
@@ -21,15 +21,15 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.h"
+#include "src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/gemm/CLGEMMHelpers.h"
#include "arm_compute/core/GPUTarget.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/TensorShape.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/CL/gemm/CLGEMMHelpers.h"
#include <map>
#include <utility>
@@ -122,13 +122,13 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedOnlyRHSKernelConfi
if(m == 1)
{
- if ( n <= 2548 )
+ if(n <= 2548)
{
- return configure_lhs_rhs_info(m,n,1,2,16,1,4,false,true,false,true,false);
+ return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 4, false, true, false, true, false);
}
else
{
- return configure_lhs_rhs_info(m,n,1,4,16,1,8,false,true,false,true,false);
+ return configure_lhs_rhs_info(m, n, 1, 4, 16, 1, 8, false, true, false, true, false);
}
}
else
@@ -251,7 +251,7 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedOnlyRHSKernelConfi
if(m == 1)
{
- return configure_lhs_rhs_info(m,n,1,2,16,1,32,false,true,false,true,false);
+ return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 32, false, true, false, true, false);
}
else
{
diff --git a/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.h b/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.h
new file mode 100644
index 0000000000..618dbd9923
--- /dev/null
+++ b/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATIONBIFROST_H
+#define ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATIONBIFROST_H
+
+#include "src/core/CL/ICLGEMMKernelConfiguration.h"
+
+namespace arm_compute
+{
+namespace cl_gemm
+{
+/** Bifrost based OpenCL GEMMReshapedOnlyRHS configuration */
+class CLGEMMReshapedOnlyRHSKernelConfigurationBifrost final : public ICLGEMMKernelConfiguration
+{
+public:
+ /** Constructor
+ *
+ * @param[in] gpu GPU target
+ */
+ CLGEMMReshapedOnlyRHSKernelConfigurationBifrost(GPUTarget gpu);
+
+ // Inherited overridden method
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
+
+private:
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G7x_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G51_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G7x_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G51_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G7x_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G51_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+};
+} // namespace cl_gemm
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATIONBIFROST_H */
diff --git a/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.cpp b/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.cpp
index 23bf02c61a..f7939d29c0 100644
--- a/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.cpp
+++ b/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.cpp
@@ -21,15 +21,15 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.h"
+#include "src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/gemm/CLGEMMHelpers.h"
#include "arm_compute/core/GPUTarget.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/TensorShape.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/CL/gemm/CLGEMMHelpers.h"
#include <map>
#include <utility>
diff --git a/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.h b/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.h
new file mode 100644
index 0000000000..b9289923b9
--- /dev/null
+++ b/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATIONVALHALL_H
+#define ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATIONVALHALL_H
+
+#include "src/core/CL/ICLGEMMKernelConfiguration.h"
+
+namespace arm_compute
+{
+namespace cl_gemm
+{
+/** Valhall based OpenCL GEMMReshapedOnlyRHS configuration */
+class CLGEMMReshapedOnlyRHSKernelConfigurationValhall final : public ICLGEMMKernelConfiguration
+{
+public:
+ /** Constructor
+ *
+ * @param[in] gpu GPU target
+ */
+ CLGEMMReshapedOnlyRHSKernelConfigurationValhall(GPUTarget gpu);
+
+ // Inherited overridden method
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
+
+private:
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+};
+} // namespace cl_gemm
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATIONVALHALL_H */
diff --git a/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp b/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp
index 9deb16524e..29745beee7 100644
--- a/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp
+++ b/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp
@@ -27,12 +27,9 @@
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <set>
#include <string>
diff --git a/src/core/CL/kernels/CLActivationLayerKernel.cpp b/src/core/CL/kernels/CLActivationLayerKernel.cpp
index 5a9434ee5a..f0e3047796 100644
--- a/src/core/CL/kernels/CLActivationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLActivationLayerKernel.cpp
@@ -25,14 +25,14 @@
#include "arm_compute/core/CL/CLCoreRuntimeContext.h"
#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Window.h"
-#include "arm_compute/core/utils/helpers/float_ops.h"
-#include "arm_compute/core/utils/misc/Cast.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "support/Cast.h"
+
#include "support/StringSupport.h"
#include <set>
diff --git a/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp b/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp
index b78ac27cfa..b5a801a97f 100644
--- a/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp
+++ b/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp
@@ -23,16 +23,17 @@
*/
#include "arm_compute/core/CL/kernels/CLArgMinMaxLayerKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
diff --git a/src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp
index dd9c234c56..7a8c9ad0fb 100644
--- a/src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp
+++ b/src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp
@@ -25,12 +25,12 @@
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Window.h"
-#include "arm_compute/core/utils/misc/Cast.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "support/Cast.h"
#include "support/StringSupport.h"
diff --git a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp
index 1c1df6c4eb..09b668d6cd 100644
--- a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp
@@ -25,12 +25,13 @@
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Window.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
diff --git a/src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp b/src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp
index c74f7e055b..6eb22c9ada 100644
--- a/src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp
+++ b/src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp
@@ -24,9 +24,11 @@
#include "arm_compute/core/CL/kernels/CLBatchToSpaceLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
using namespace arm_compute::misc::shape_calculator;
diff --git a/src/core/CL/kernels/CLBitwiseAndKernel.cpp b/src/core/CL/kernels/CLBitwiseAndKernel.cpp
index 44378c8239..53a438dcf6 100644
--- a/src/core/CL/kernels/CLBitwiseAndKernel.cpp
+++ b/src/core/CL/kernels/CLBitwiseAndKernel.cpp
@@ -27,9 +27,8 @@
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
+#include "src/core/helpers/WindowHelpers.h"
using namespace arm_compute;
diff --git a/src/core/CL/kernels/CLBitwiseOrKernel.cpp b/src/core/CL/kernels/CLBitwiseOrKernel.cpp
index 77c48e6e82..0e2e5d4f3c 100644
--- a/src/core/CL/kernels/CLBitwiseOrKernel.cpp
+++ b/src/core/CL/kernels/CLBitwiseOrKernel.cpp
@@ -27,9 +27,8 @@
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
+#include "src/core/helpers/WindowHelpers.h"
using namespace arm_compute;
diff --git a/src/core/CL/kernels/CLBitwiseXorKernel.cpp b/src/core/CL/kernels/CLBitwiseXorKernel.cpp
index a15305e3b7..65b17c02bd 100644
--- a/src/core/CL/kernels/CLBitwiseXorKernel.cpp
+++ b/src/core/CL/kernels/CLBitwiseXorKernel.cpp
@@ -27,9 +27,8 @@
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
+#include "src/core/helpers/WindowHelpers.h"
using namespace arm_compute;
diff --git a/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp b/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp
index 95ea3d7df5..b8c0d2f2b8 100644
--- a/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp
+++ b/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp
@@ -23,17 +23,18 @@
*/
#include "arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLArray.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Window.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
namespace arm_compute
diff --git a/src/core/CL/kernels/CLBox3x3Kernel.cpp b/src/core/CL/kernels/CLBox3x3Kernel.cpp
index 7916dce241..2f6c09df0b 100644
--- a/src/core/CL/kernels/CLBox3x3Kernel.cpp
+++ b/src/core/CL/kernels/CLBox3x3Kernel.cpp
@@ -26,9 +26,8 @@
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/IAccessWindow.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <set>
#include <string>
diff --git a/src/core/CL/kernels/CLCannyEdgeKernel.cpp b/src/core/CL/kernels/CLCannyEdgeKernel.cpp
index b8a53650e8..c76ec6769e 100644
--- a/src/core/CL/kernels/CLCannyEdgeKernel.cpp
+++ b/src/core/CL/kernels/CLCannyEdgeKernel.cpp
@@ -29,6 +29,7 @@
#include "arm_compute/core/CL/OpenCL.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
using namespace arm_compute;
diff --git a/src/core/CL/kernels/CLChannelCombineKernel.cpp b/src/core/CL/kernels/CLChannelCombineKernel.cpp
index b0e5111417..d574f352ae 100644
--- a/src/core/CL/kernels/CLChannelCombineKernel.cpp
+++ b/src/core/CL/kernels/CLChannelCombineKernel.cpp
@@ -27,14 +27,12 @@
#include "arm_compute/core/CL/ICLMultiImage.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/MultiImageInfo.h"
#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <set>
#include <string>
diff --git a/src/core/CL/kernels/CLChannelExtractKernel.cpp b/src/core/CL/kernels/CLChannelExtractKernel.cpp
index 13ae8f5ef4..7911b948ae 100644
--- a/src/core/CL/kernels/CLChannelExtractKernel.cpp
+++ b/src/core/CL/kernels/CLChannelExtractKernel.cpp
@@ -28,14 +28,13 @@
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
#include "arm_compute/core/Coordinates.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/MultiImageInfo.h"
#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <set>
#include <string>
diff --git a/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp b/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp
index ad000ba17f..301a762850 100644
--- a/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp
+++ b/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp
@@ -25,12 +25,12 @@
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Window.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
namespace arm_compute
diff --git a/src/core/CL/kernels/CLCol2ImKernel.cpp b/src/core/CL/kernels/CLCol2ImKernel.cpp
index 4050b24e0c..3dc007d9e0 100644
--- a/src/core/CL/kernels/CLCol2ImKernel.cpp
+++ b/src/core/CL/kernels/CLCol2ImKernel.cpp
@@ -25,13 +25,13 @@
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
#include <cmath>
diff --git a/src/core/CL/kernels/CLColorConvertKernel.cpp b/src/core/CL/kernels/CLColorConvertKernel.cpp
index e14b871ae6..0f82d87348 100644
--- a/src/core/CL/kernels/CLColorConvertKernel.cpp
+++ b/src/core/CL/kernels/CLColorConvertKernel.cpp
@@ -27,14 +27,12 @@
#include "arm_compute/core/CL/ICLMultiImage.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/MultiImageInfo.h"
#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
#include <sstream>
diff --git a/src/core/CL/kernels/CLComparisonKernel.cpp b/src/core/CL/kernels/CLComparisonKernel.cpp
index 5bb1d56690..2b72946f49 100644
--- a/src/core/CL/kernels/CLComparisonKernel.cpp
+++ b/src/core/CL/kernels/CLComparisonKernel.cpp
@@ -24,8 +24,10 @@
#include "arm_compute/core/CL/kernels/CLComparisonKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
#include <map>
diff --git a/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp b/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp
index 7c6114640c..b4e42bf3bc 100644
--- a/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp
+++ b/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp
@@ -25,10 +25,11 @@
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
namespace arm_compute
diff --git a/src/core/CL/kernels/CLConvolutionKernel.cpp b/src/core/CL/kernels/CLConvolutionKernel.cpp
index ca07e68345..48b185f78d 100644
--- a/src/core/CL/kernels/CLConvolutionKernel.cpp
+++ b/src/core/CL/kernels/CLConvolutionKernel.cpp
@@ -31,9 +31,9 @@
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
#include <set>
diff --git a/src/core/CL/kernels/CLCopyKernel.cpp b/src/core/CL/kernels/CLCopyKernel.cpp
index 37c3241302..0b7e9aff53 100644
--- a/src/core/CL/kernels/CLCopyKernel.cpp
+++ b/src/core/CL/kernels/CLCopyKernel.cpp
@@ -23,15 +23,15 @@
*/
#include "arm_compute/core/CL/kernels/CLCopyKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
namespace arm_compute
diff --git a/src/core/CL/kernels/CLCropKernel.cpp b/src/core/CL/kernels/CLCropKernel.cpp
index f828162177..2c99d46929 100644
--- a/src/core/CL/kernels/CLCropKernel.cpp
+++ b/src/core/CL/kernels/CLCropKernel.cpp
@@ -26,16 +26,11 @@
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CPP/Validate.h"
#include "arm_compute/core/IAccessWindow.h"
#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Window.h"
+#include "src/core/CPP/Validate.h"
+#include "src/core/helpers/WindowHelpers.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/utils/helpers/bit_ops.h"
-#include "arm_compute/core/utils/helpers/tensor_transform.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "support/StringSupport.h"
#include <map>
diff --git a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp
index e8f12d5d9d..9ba3dc3d8f 100644
--- a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp
+++ b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp
@@ -25,12 +25,11 @@
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/WindowHelpers.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.cpp b/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.cpp
index 69730346fe..1514d906dc 100644
--- a/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.cpp
+++ b/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.cpp
@@ -30,6 +30,8 @@
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
namespace arm_compute
diff --git a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp
index 87067cf717..cb5d727e9b 100644
--- a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp
@@ -25,12 +25,12 @@
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Window.h"
-#include "arm_compute/core/utils/misc/Cast.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "support/Cast.h"
#include "support/StringSupport.h"
diff --git a/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp b/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp
index 11297e7901..24f638f8c4 100644
--- a/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp
@@ -25,13 +25,13 @@
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
#include "support/StringSupport.h"
#include <cstddef>
diff --git a/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp b/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp
index e6f909e884..50bb3b827c 100644
--- a/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp
@@ -24,9 +24,11 @@
#include "arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
using namespace arm_compute::misc::shape_calculator;
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp
index 066e9a5a40..7958230aac 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp
@@ -23,19 +23,19 @@
*/
#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
namespace arm_compute
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp
index 0930fee712..5a0d2d0a62 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp
@@ -23,19 +23,19 @@
*/
#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
namespace arm_compute
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp
index a538ab51cb..5a3a0ec435 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp
@@ -25,17 +25,16 @@
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/IAccessWindow.h"
#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
namespace arm_compute
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.cpp
index 07f25a80cf..0ff3c520ba 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.cpp
@@ -23,19 +23,18 @@
*/
#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
namespace arm_compute
diff --git a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp
index 72eac858ad..e653c59550 100644
--- a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp
@@ -23,15 +23,16 @@
*/
#include "arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
namespace arm_compute
diff --git a/src/core/CL/kernels/CLDerivativeKernel.cpp b/src/core/CL/kernels/CLDerivativeKernel.cpp
index ab5f9dab76..659a7cb209 100644
--- a/src/core/CL/kernels/CLDerivativeKernel.cpp
+++ b/src/core/CL/kernels/CLDerivativeKernel.cpp
@@ -26,11 +26,9 @@
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
#include <set>
diff --git a/src/core/CL/kernels/CLDilateKernel.cpp b/src/core/CL/kernels/CLDilateKernel.cpp
index ae948314a3..1e59c349e7 100644
--- a/src/core/CL/kernels/CLDilateKernel.cpp
+++ b/src/core/CL/kernels/CLDilateKernel.cpp
@@ -26,8 +26,8 @@
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/helpers/WindowHelpers.h"
using namespace arm_compute;
diff --git a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp
index d5d808a80f..161b221e81 100644
--- a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp
@@ -23,19 +23,18 @@
*/
#include "arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/IAccessWindow.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
namespace arm_compute
diff --git a/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp b/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp
index c8c7fb03b8..c7f9df0baa 100644
--- a/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp
+++ b/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp
@@ -24,9 +24,10 @@
#include "arm_compute/core/CL/kernels/CLElementWiseUnaryLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/utils/misc/Cast.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "support/Cast.h"
#include "support/StringSupport.h"
namespace arm_compute
diff --git a/src/core/CL/kernels/CLElementwiseOperationKernel.cpp b/src/core/CL/kernels/CLElementwiseOperationKernel.cpp
index ec33500f20..f0712f5863 100644
--- a/src/core/CL/kernels/CLElementwiseOperationKernel.cpp
+++ b/src/core/CL/kernels/CLElementwiseOperationKernel.cpp
@@ -24,9 +24,11 @@
#include "arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/utils/misc/Cast.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "support/Cast.h"
#include "support/StringSupport.h"
#include <map>
diff --git a/src/core/CL/kernels/CLErodeKernel.cpp b/src/core/CL/kernels/CLErodeKernel.cpp
index a5eb79f73b..29a32979a3 100644
--- a/src/core/CL/kernels/CLErodeKernel.cpp
+++ b/src/core/CL/kernels/CLErodeKernel.cpp
@@ -26,8 +26,8 @@
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/helpers/WindowHelpers.h"
using namespace arm_compute;
diff --git a/src/core/CL/kernels/CLFFTDigitReverseKernel.cpp b/src/core/CL/kernels/CLFFTDigitReverseKernel.cpp
index 30bca2f0f9..0478f550f9 100644
--- a/src/core/CL/kernels/CLFFTDigitReverseKernel.cpp
+++ b/src/core/CL/kernels/CLFFTDigitReverseKernel.cpp
@@ -24,11 +24,11 @@
#include "arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Window.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
namespace arm_compute
diff --git a/src/core/CL/kernels/CLFFTRadixStageKernel.cpp b/src/core/CL/kernels/CLFFTRadixStageKernel.cpp
index 6c36338dae..7b17a227e1 100644
--- a/src/core/CL/kernels/CLFFTRadixStageKernel.cpp
+++ b/src/core/CL/kernels/CLFFTRadixStageKernel.cpp
@@ -25,12 +25,12 @@
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Window.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
#include <cmath>
diff --git a/src/core/CL/kernels/CLFFTScaleKernel.cpp b/src/core/CL/kernels/CLFFTScaleKernel.cpp
index ac5f2b38c3..49fcbb6c7b 100644
--- a/src/core/CL/kernels/CLFFTScaleKernel.cpp
+++ b/src/core/CL/kernels/CLFFTScaleKernel.cpp
@@ -24,11 +24,11 @@
#include "arm_compute/core/CL/kernels/CLFFTScaleKernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Window.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
namespace arm_compute
diff --git a/src/core/CL/kernels/CLFastCornersKernel.cpp b/src/core/CL/kernels/CLFastCornersKernel.cpp
index e71b47228e..ebdfd2741f 100644
--- a/src/core/CL/kernels/CLFastCornersKernel.cpp
+++ b/src/core/CL/kernels/CLFastCornersKernel.cpp
@@ -26,11 +26,9 @@
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/IAccessWindow.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
#include <set>
diff --git a/src/core/CL/kernels/CLFillBorderKernel.cpp b/src/core/CL/kernels/CLFillBorderKernel.cpp
index 1ea654b5cc..e92619a242 100644
--- a/src/core/CL/kernels/CLFillBorderKernel.cpp
+++ b/src/core/CL/kernels/CLFillBorderKernel.cpp
@@ -27,13 +27,11 @@
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-#include "arm_compute/core/utils/misc/Cast.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "support/Cast.h"
#include "support/StringSupport.h"
namespace arm_compute
diff --git a/src/core/CL/kernels/CLFlattenLayerKernel.cpp b/src/core/CL/kernels/CLFlattenLayerKernel.cpp
index 6bd1149612..dc1d33869f 100644
--- a/src/core/CL/kernels/CLFlattenLayerKernel.cpp
+++ b/src/core/CL/kernels/CLFlattenLayerKernel.cpp
@@ -23,9 +23,10 @@
*/
#include "arm_compute/core/CL/kernels/CLFlattenLayerKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
using namespace arm_compute::misc::shape_calculator;
diff --git a/src/core/CL/kernels/CLFloorKernel.cpp b/src/core/CL/kernels/CLFloorKernel.cpp
index 09f5f61a50..8884f3fe36 100644
--- a/src/core/CL/kernels/CLFloorKernel.cpp
+++ b/src/core/CL/kernels/CLFloorKernel.cpp
@@ -25,14 +25,14 @@
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/IAccessWindow.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
namespace arm_compute
diff --git a/src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp b/src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp
index b582295f44..61e2b2700a 100644
--- a/src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp
+++ b/src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp
@@ -25,12 +25,13 @@
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Window.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
diff --git a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.cpp b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.cpp
index d30a9e5d18..cc98845e0f 100644
--- a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.cpp
@@ -23,19 +23,18 @@
*/
#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
#include <cstddef>
@@ -137,7 +136,7 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input0, ITe
num_elems_processed_per_iteration_x = rhs_info.n0;
num_elems_processed_per_iteration_y = lhs_info.m0;
- win = calculate_max_window(tmp_info, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y));
+ win = calculate_max_window(tmp_info, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y));
output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape()));
// Collapse along the Z direction
diff --git a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.cpp b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.cpp
index 56b92a3d41..5469c89c30 100644
--- a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.cpp
@@ -23,19 +23,18 @@
*/
#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
#include <cstddef>
diff --git a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp
index 4770329b7d..4a3ac2da81 100644
--- a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp
@@ -23,19 +23,18 @@
*/
#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
#include <cstddef>
diff --git a/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp b/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp
index 6ef9fd2565..7ab96e5fa9 100644
--- a/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp
@@ -23,15 +23,13 @@
*/
#include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
#include <cstddef>
@@ -148,7 +146,8 @@ void CLGEMMLowpOffsetContributionKernel::configure(ICLTensor *mm_result, const I
configure(CLKernelLibrary::get().get_compile_context(), mm_result, vector_sum_col, vector_sum_row, bias, k, a_offset, b_offset);
}
-void CLGEMMLowpOffsetContributionKernel::configure(const CLCompileContext &compile_context, ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias,
+void CLGEMMLowpOffsetContributionKernel::configure(const CLCompileContext &compile_context, ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row,
+ const ICLTensor *bias,
int32_t k, int32_t a_offset,
int32_t b_offset)
{
diff --git a/src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.cpp b/src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.cpp
index 6d3aa6fbf6..85285d6704 100644
--- a/src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.cpp
@@ -23,16 +23,15 @@
*/
#include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
#include <cstddef>
diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.cpp
index eae66413a6..ab1b5a2203 100644
--- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.cpp
@@ -25,14 +25,14 @@
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.cpp
index 430a84cfa0..ad5bac015b 100644
--- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.cpp
@@ -25,14 +25,14 @@
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
namespace arm_compute
diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.cpp
index 79888cdba2..8e4b291dbe 100644
--- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.cpp
@@ -25,12 +25,12 @@
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
namespace arm_compute
diff --git a/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp b/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp
index 31a97ca32b..826b265dbf 100644
--- a/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp
@@ -23,10 +23,12 @@
*/
#include "arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/KernelDescriptors.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
namespace arm_compute
diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp
index c2dd92c0fd..aa69ed06d1 100644
--- a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp
@@ -23,20 +23,19 @@
*/
#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Window.h"
-#include "arm_compute/core/utils/helpers/float_ops.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/utils/helpers/float_ops.h"
#include "support/StringSupport.h"
#include <set>
@@ -310,7 +309,8 @@ void CLGEMMMatrixMultiplyKernel::configure(const ICLTensor *input0, const ICLTen
configure(CLKernelLibrary::get().get_compile_context(), input0, input1, input2, output, alpha, beta, is_interleaved_transposed, reshape_info, fp_mixed_precision, activation_info);
}
-void CLGEMMMatrixMultiplyKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta,
+void CLGEMMMatrixMultiplyKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha,
+ float beta,
bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info, bool fp_mixed_precision, const ActivationLayerInfo &activation_info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input0, input1, output);
diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp
index da57aa447f..cea147b10c 100644
--- a/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp
@@ -23,20 +23,19 @@
*/
#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-#include "arm_compute/core/utils/helpers/float_ops.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/utils/helpers/float_ops.h"
#include "support/StringSupport.h"
#include <cstddef>
diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp
index b0f0e8a81f..eaf57086a3 100644
--- a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp
@@ -23,23 +23,22 @@
*/
#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/CL/gemm/CLGEMMHelpers.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-#include "arm_compute/core/utils/helpers/float_ops.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/AccessWindowStatic.h"
#include "src/core/CL/CLUtils.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/CL/gemm/CLGEMMHelpers.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/utils/helpers/float_ops.h"
#include "support/StringSupport.h"
#include <cstddef>
diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp
index 0ae30ed30e..912c763ed5 100644
--- a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp
@@ -23,17 +23,18 @@
*/
#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/gemm/CLGEMMHelpers.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/utils/helpers/float_ops.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/AccessWindowStatic.h"
#include "src/core/CL/CLUtils.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/CL/gemm/CLGEMMHelpers.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/utils/helpers/float_ops.h"
#include "support/StringSupport.h"
#include <tuple>
diff --git a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp
index f52384593b..04aa061e98 100644
--- a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp
@@ -23,15 +23,14 @@
*/
#include "arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
namespace arm_compute
diff --git a/src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.cpp b/src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.cpp
index 156a657f28..f2ad677976 100644
--- a/src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.cpp
@@ -23,19 +23,18 @@
*/
#include "arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
namespace arm_compute
diff --git a/src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.cpp b/src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.cpp
index ce294646a0..d94e834d2c 100644
--- a/src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.cpp
@@ -23,20 +23,19 @@
*/
#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/CL/gemm/CLGEMMHelpers.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/CL/gemm/CLGEMMHelpers.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
namespace arm_compute
diff --git a/src/core/CL/kernels/CLGatherKernel.cpp b/src/core/CL/kernels/CLGatherKernel.cpp
index 57759fc1c1..a8508bed2d 100644
--- a/src/core/CL/kernels/CLGatherKernel.cpp
+++ b/src/core/CL/kernels/CLGatherKernel.cpp
@@ -23,8 +23,9 @@
*/
#include "arm_compute/core/CL/kernels/CLGatherKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
#include <string>
diff --git a/src/core/CL/kernels/CLGaussian3x3Kernel.cpp b/src/core/CL/kernels/CLGaussian3x3Kernel.cpp
index 08e7e27b3c..c9ed1ac0d7 100644
--- a/src/core/CL/kernels/CLGaussian3x3Kernel.cpp
+++ b/src/core/CL/kernels/CLGaussian3x3Kernel.cpp
@@ -26,8 +26,8 @@
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <set>
#include <string>
diff --git a/src/core/CL/kernels/CLGaussianPyramidKernel.cpp b/src/core/CL/kernels/CLGaussianPyramidKernel.cpp
index 0e20187d1c..2686e8b32e 100644
--- a/src/core/CL/kernels/CLGaussianPyramidKernel.cpp
+++ b/src/core/CL/kernels/CLGaussianPyramidKernel.cpp
@@ -28,6 +28,7 @@
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
using namespace arm_compute;
diff --git a/src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp b/src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp
index 3108ad87d0..a2fcbbab78 100644
--- a/src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp
+++ b/src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp
@@ -23,17 +23,18 @@
*/
#include "arm_compute/core/CL/kernels/CLGenerateProposalsLayerKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLArray.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Window.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
namespace arm_compute
diff --git a/src/core/CL/kernels/CLHOGDescriptorKernel.cpp b/src/core/CL/kernels/CLHOGDescriptorKernel.cpp
index 7f618b294b..eaf5ea4880 100644
--- a/src/core/CL/kernels/CLHOGDescriptorKernel.cpp
+++ b/src/core/CL/kernels/CLHOGDescriptorKernel.cpp
@@ -27,12 +27,10 @@
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
#include <set>
diff --git a/src/core/CL/kernels/CLHOGDetectorKernel.cpp b/src/core/CL/kernels/CLHOGDetectorKernel.cpp
index fbd2208894..6e14996732 100644
--- a/src/core/CL/kernels/CLHOGDetectorKernel.cpp
+++ b/src/core/CL/kernels/CLHOGDetectorKernel.cpp
@@ -27,12 +27,10 @@
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLHOG.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
using namespace arm_compute;
diff --git a/src/core/CL/kernels/CLHarrisCornersKernel.cpp b/src/core/CL/kernels/CLHarrisCornersKernel.cpp
index 08e670f5d2..19c4e579a0 100644
--- a/src/core/CL/kernels/CLHarrisCornersKernel.cpp
+++ b/src/core/CL/kernels/CLHarrisCornersKernel.cpp
@@ -23,17 +23,16 @@
*/
#include "arm_compute/core/CL/kernels/CLHarrisCornersKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
#include <set>
diff --git a/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp
index 22b2cfcbc5..1ae2599721 100644
--- a/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp
+++ b/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp
@@ -25,13 +25,13 @@
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Window.h"
-#include "arm_compute/core/utils/misc/Cast.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "support/Cast.h"
#include "support/StringSupport.h"
diff --git a/src/core/CL/kernels/CLHistogramKernel.cpp b/src/core/CL/kernels/CLHistogramKernel.cpp
index b8a4e8619d..a85429c1a0 100644
--- a/src/core/CL/kernels/CLHistogramKernel.cpp
+++ b/src/core/CL/kernels/CLHistogramKernel.cpp
@@ -27,12 +27,10 @@
#include "arm_compute/core/CL/ICLDistribution1D.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
#include <cstring>
diff --git a/src/core/CL/kernels/CLIm2ColKernel.cpp b/src/core/CL/kernels/CLIm2ColKernel.cpp
index c94e313b9a..76490f82f6 100644
--- a/src/core/CL/kernels/CLIm2ColKernel.cpp
+++ b/src/core/CL/kernels/CLIm2ColKernel.cpp
@@ -23,18 +23,18 @@
*/
#include "arm_compute/core/CL/kernels/CLIm2ColKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
#include <cmath>
diff --git a/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.cpp
index 2ad5233de8..e97b856456 100644
--- a/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.cpp
@@ -25,12 +25,13 @@
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Window.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
diff --git a/src/core/CL/kernels/CLIntegralImageKernel.cpp b/src/core/CL/kernels/CLIntegralImageKernel.cpp
index aff4bd9cea..82f6da85a5 100644
--- a/src/core/CL/kernels/CLIntegralImageKernel.cpp
+++ b/src/core/CL/kernels/CLIntegralImageKernel.cpp
@@ -28,9 +28,8 @@
#include "arm_compute/core/CL/OpenCL.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
#include <cstddef>
diff --git a/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp b/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp
index a68d8db3c0..9936e29c5f 100644
--- a/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp
+++ b/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp
@@ -25,13 +25,14 @@
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
diff --git a/src/core/CL/kernels/CLLKTrackerKernel.cpp b/src/core/CL/kernels/CLLKTrackerKernel.cpp
index fae5fe2c8e..0fa2e703ec 100644
--- a/src/core/CL/kernels/CLLKTrackerKernel.cpp
+++ b/src/core/CL/kernels/CLLKTrackerKernel.cpp
@@ -23,16 +23,15 @@
*/
#include "arm_compute/core/CL/kernels/CLLKTrackerKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLArray.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Coordinates.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <cmath>
diff --git a/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp
index 0da0d4ca1f..6e4c45eab7 100644
--- a/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp
+++ b/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp
@@ -23,15 +23,13 @@
*/
#include "arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Window.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/WindowHelpers.h"
namespace arm_compute
{
diff --git a/src/core/CL/kernels/CLMagnitudePhaseKernel.cpp b/src/core/CL/kernels/CLMagnitudePhaseKernel.cpp
index ef8ebd52e5..dc130d0ff9 100644
--- a/src/core/CL/kernels/CLMagnitudePhaseKernel.cpp
+++ b/src/core/CL/kernels/CLMagnitudePhaseKernel.cpp
@@ -27,11 +27,10 @@
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
#include <set>
diff --git a/src/core/CL/kernels/CLMaxUnpoolingLayerKernel.cpp b/src/core/CL/kernels/CLMaxUnpoolingLayerKernel.cpp
index 08c74642f4..a78996ddae 100644
--- a/src/core/CL/kernels/CLMaxUnpoolingLayerKernel.cpp
+++ b/src/core/CL/kernels/CLMaxUnpoolingLayerKernel.cpp
@@ -23,19 +23,18 @@
*/
#include "arm_compute/core/CL/kernels/CLMaxUnpoolingLayerKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
namespace arm_compute
diff --git a/src/core/CL/kernels/CLMeanStdDevKernel.cpp b/src/core/CL/kernels/CLMeanStdDevKernel.cpp
index 33099c928d..5acc3ac3d6 100644
--- a/src/core/CL/kernels/CLMeanStdDevKernel.cpp
+++ b/src/core/CL/kernels/CLMeanStdDevKernel.cpp
@@ -25,14 +25,12 @@
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Window.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <cmath>
#include <set>
diff --git a/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp b/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp
index 5ecbb4b2a6..82a22a9f19 100644
--- a/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp
+++ b/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp
@@ -25,14 +25,13 @@
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Window.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
namespace arm_compute
diff --git a/src/core/CL/kernels/CLMedian3x3Kernel.cpp b/src/core/CL/kernels/CLMedian3x3Kernel.cpp
index 5f8c9e5a93..4b899502f9 100644
--- a/src/core/CL/kernels/CLMedian3x3Kernel.cpp
+++ b/src/core/CL/kernels/CLMedian3x3Kernel.cpp
@@ -26,8 +26,8 @@
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
using namespace arm_compute;
diff --git a/src/core/CL/kernels/CLMemsetKernel.cpp b/src/core/CL/kernels/CLMemsetKernel.cpp
index f591c2f6d5..186ed2a38c 100644
--- a/src/core/CL/kernels/CLMemsetKernel.cpp
+++ b/src/core/CL/kernels/CLMemsetKernel.cpp
@@ -23,8 +23,8 @@
*/
#include "arm_compute/core/CL/kernels/CLMemsetKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
namespace arm_compute
diff --git a/src/core/CL/kernels/CLMinMaxLayerKernel.cpp b/src/core/CL/kernels/CLMinMaxLayerKernel.cpp
index 5f0e48dbb9..bf645f82e9 100644
--- a/src/core/CL/kernels/CLMinMaxLayerKernel.cpp
+++ b/src/core/CL/kernels/CLMinMaxLayerKernel.cpp
@@ -23,14 +23,15 @@
*/
#include "arm_compute/core/CL/kernels/CLMinMaxLayerKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
#include <climits>
diff --git a/src/core/CL/kernels/CLMinMaxLocationKernel.cpp b/src/core/CL/kernels/CLMinMaxLocationKernel.cpp
index 9bbda40782..634b58077a 100644
--- a/src/core/CL/kernels/CLMinMaxLocationKernel.cpp
+++ b/src/core/CL/kernels/CLMinMaxLocationKernel.cpp
@@ -28,7 +28,8 @@
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
#include <climits>
diff --git a/src/core/CL/kernels/CLNonLinearFilterKernel.cpp b/src/core/CL/kernels/CLNonLinearFilterKernel.cpp
index 16e5113c62..0a8472bf04 100644
--- a/src/core/CL/kernels/CLNonLinearFilterKernel.cpp
+++ b/src/core/CL/kernels/CLNonLinearFilterKernel.cpp
@@ -27,12 +27,11 @@
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <algorithm>
#include <cmath>
diff --git a/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp b/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp
index 958d94ce11..9c6d44b6c5 100644
--- a/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp
+++ b/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp
@@ -30,6 +30,7 @@
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <set>
#include <string>
diff --git a/src/core/CL/kernels/CLNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLNormalizationLayerKernel.cpp
index 7d8e5db2b4..686e6f1b26 100644
--- a/src/core/CL/kernels/CLNormalizationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLNormalizationLayerKernel.cpp
@@ -23,15 +23,18 @@
*/
#include "arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Window.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/NormalizationHelpers.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
using namespace arm_compute;
diff --git a/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp b/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp
index 00bdac3441..407ce6626b 100644
--- a/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp
+++ b/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp
@@ -23,15 +23,16 @@
*/
#include "arm_compute/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Window.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
diff --git a/src/core/CL/kernels/CLPadLayerKernel.cpp b/src/core/CL/kernels/CLPadLayerKernel.cpp
index b2432a058d..45729738fb 100644
--- a/src/core/CL/kernels/CLPadLayerKernel.cpp
+++ b/src/core/CL/kernels/CLPadLayerKernel.cpp
@@ -26,6 +26,8 @@
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
namespace arm_compute
diff --git a/src/core/CL/kernels/CLPermuteKernel.cpp b/src/core/CL/kernels/CLPermuteKernel.cpp
index dc2d6fe4b4..620665791f 100644
--- a/src/core/CL/kernels/CLPermuteKernel.cpp
+++ b/src/core/CL/kernels/CLPermuteKernel.cpp
@@ -23,8 +23,9 @@
*/
#include "arm_compute/core/CL/kernels/CLPermuteKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
using namespace arm_compute;
diff --git a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp
index 229937ef31..a7bd4dad60 100644
--- a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp
+++ b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp
@@ -25,11 +25,13 @@
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/utils/misc/Cast.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "support/Cast.h"
#include "support/StringSupport.h"
namespace arm_compute
diff --git a/src/core/CL/kernels/CLPoolingLayerKernel.cpp b/src/core/CL/kernels/CLPoolingLayerKernel.cpp
index 1771834aac..0570887b91 100644
--- a/src/core/CL/kernels/CLPoolingLayerKernel.cpp
+++ b/src/core/CL/kernels/CLPoolingLayerKernel.cpp
@@ -23,18 +23,19 @@
*/
#include "arm_compute/core/CL/kernels/CLPoolingLayerKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
#include <set>
diff --git a/src/core/CL/kernels/CLPriorBoxLayerKernel.cpp b/src/core/CL/kernels/CLPriorBoxLayerKernel.cpp
index 3429ef75d1..202e9fbb37 100644
--- a/src/core/CL/kernels/CLPriorBoxLayerKernel.cpp
+++ b/src/core/CL/kernels/CLPriorBoxLayerKernel.cpp
@@ -25,13 +25,14 @@
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
diff --git a/src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.cpp b/src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.cpp
index 2f676d30d1..ff6cc86103 100644
--- a/src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.cpp
+++ b/src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.cpp
@@ -23,8 +23,9 @@
*/
#include "arm_compute/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
namespace arm_compute
diff --git a/src/core/CL/kernels/CLQuantizationLayerKernel.cpp b/src/core/CL/kernels/CLQuantizationLayerKernel.cpp
index f6b08884e7..983cbedc0f 100644
--- a/src/core/CL/kernels/CLQuantizationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLQuantizationLayerKernel.cpp
@@ -23,16 +23,16 @@
*/
#include "arm_compute/core/CL/kernels/CLQuantizationLayerKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
namespace arm_compute
diff --git a/src/core/CL/kernels/CLROIAlignLayerKernel.cpp b/src/core/CL/kernels/CLROIAlignLayerKernel.cpp
index 3f2a904f58..ca6c6fad1a 100644
--- a/src/core/CL/kernels/CLROIAlignLayerKernel.cpp
+++ b/src/core/CL/kernels/CLROIAlignLayerKernel.cpp
@@ -23,18 +23,19 @@
*/
#include "arm_compute/core/CL/kernels/CLROIAlignLayerKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLArray.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
using namespace arm_compute::misc::shape_calculator;
diff --git a/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp b/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp
index c2ed32653a..55fe5a5321 100644
--- a/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp
+++ b/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp
@@ -23,17 +23,18 @@
*/
#include "arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLArray.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Window.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
#include <cmath>
diff --git a/src/core/CL/kernels/CLRangeKernel.cpp b/src/core/CL/kernels/CLRangeKernel.cpp
index d46cdd78da..a4c30b63c2 100644
--- a/src/core/CL/kernels/CLRangeKernel.cpp
+++ b/src/core/CL/kernels/CLRangeKernel.cpp
@@ -24,9 +24,11 @@
#include "arm_compute/core/CL/kernels/CLRangeKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Utils.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
using namespace arm_compute;
diff --git a/src/core/CL/kernels/CLReductionOperationKernel.cpp b/src/core/CL/kernels/CLReductionOperationKernel.cpp
index 0ba63cc4e0..325e4b994c 100644
--- a/src/core/CL/kernels/CLReductionOperationKernel.cpp
+++ b/src/core/CL/kernels/CLReductionOperationKernel.cpp
@@ -23,17 +23,18 @@
*/
#include "arm_compute/core/CL/kernels/CLReductionOperationKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
diff --git a/src/core/CL/kernels/CLRemapKernel.cpp b/src/core/CL/kernels/CLRemapKernel.cpp
index fe8c81a3b9..8d3f41b35f 100644
--- a/src/core/CL/kernels/CLRemapKernel.cpp
+++ b/src/core/CL/kernels/CLRemapKernel.cpp
@@ -23,15 +23,14 @@
*/
#include "arm_compute/core/CL/kernels/CLRemapKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <algorithm>
diff --git a/src/core/CL/kernels/CLReorgLayerKernel.cpp b/src/core/CL/kernels/CLReorgLayerKernel.cpp
index ab81a8fca3..ade7761b91 100644
--- a/src/core/CL/kernels/CLReorgLayerKernel.cpp
+++ b/src/core/CL/kernels/CLReorgLayerKernel.cpp
@@ -26,12 +26,12 @@
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
#include <string>
diff --git a/src/core/CL/kernels/CLReshapeLayerKernel.cpp b/src/core/CL/kernels/CLReshapeLayerKernel.cpp
index 3daf21a9a7..e08970992e 100644
--- a/src/core/CL/kernels/CLReshapeLayerKernel.cpp
+++ b/src/core/CL/kernels/CLReshapeLayerKernel.cpp
@@ -23,18 +23,18 @@
*/
#include "arm_compute/core/CL/kernels/CLReshapeLayerKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/IAccessWindow.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Window.h"
-#include "arm_compute/core/utils/misc/Cast.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "support/Cast.h"
#include <string>
diff --git a/src/core/CL/kernels/CLReverseKernel.cpp b/src/core/CL/kernels/CLReverseKernel.cpp
index 6546ced72e..f8240984d1 100644
--- a/src/core/CL/kernels/CLReverseKernel.cpp
+++ b/src/core/CL/kernels/CLReverseKernel.cpp
@@ -25,12 +25,12 @@
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Window.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
namespace arm_compute
diff --git a/src/core/CL/kernels/CLScaleKernel.cpp b/src/core/CL/kernels/CLScaleKernel.cpp
index 2e7ee36bcb..8233f210b4 100644
--- a/src/core/CL/kernels/CLScaleKernel.cpp
+++ b/src/core/CL/kernels/CLScaleKernel.cpp
@@ -23,16 +23,17 @@
*/
#include "arm_compute/core/CL/kernels/CLScaleKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
#include "src/core/utils/ScaleUtils.h"
diff --git a/src/core/CL/kernels/CLScharr3x3Kernel.cpp b/src/core/CL/kernels/CLScharr3x3Kernel.cpp
index 3172966b8f..1e33af3047 100644
--- a/src/core/CL/kernels/CLScharr3x3Kernel.cpp
+++ b/src/core/CL/kernels/CLScharr3x3Kernel.cpp
@@ -26,11 +26,9 @@
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <set>
#include <string>
diff --git a/src/core/CL/kernels/CLSelectKernel.cpp b/src/core/CL/kernels/CLSelectKernel.cpp
index dcac78cca3..d9a1044e1f 100644
--- a/src/core/CL/kernels/CLSelectKernel.cpp
+++ b/src/core/CL/kernels/CLSelectKernel.cpp
@@ -25,12 +25,13 @@
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Window.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
diff --git a/src/core/CL/kernels/CLSobel3x3Kernel.cpp b/src/core/CL/kernels/CLSobel3x3Kernel.cpp
index 86dcf22258..89e5207c44 100644
--- a/src/core/CL/kernels/CLSobel3x3Kernel.cpp
+++ b/src/core/CL/kernels/CLSobel3x3Kernel.cpp
@@ -26,11 +26,9 @@
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
#include <set>
diff --git a/src/core/CL/kernels/CLSobel5x5Kernel.cpp b/src/core/CL/kernels/CLSobel5x5Kernel.cpp
index e010fdda75..3e765e47fb 100644
--- a/src/core/CL/kernels/CLSobel5x5Kernel.cpp
+++ b/src/core/CL/kernels/CLSobel5x5Kernel.cpp
@@ -26,11 +26,9 @@
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
#include <set>
diff --git a/src/core/CL/kernels/CLSobel7x7Kernel.cpp b/src/core/CL/kernels/CLSobel7x7Kernel.cpp
index c2b4bec494..37ceaba502 100644
--- a/src/core/CL/kernels/CLSobel7x7Kernel.cpp
+++ b/src/core/CL/kernels/CLSobel7x7Kernel.cpp
@@ -26,11 +26,9 @@
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
#include <set>
diff --git a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp
index c7881b9f5f..5c0acda41a 100644
--- a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp
+++ b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp
@@ -23,18 +23,19 @@
*/
#include "arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/KernelDescriptors.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
#include <set>
diff --git a/src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp b/src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp
index 3e0ac74f69..c6f70c3c09 100644
--- a/src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp
+++ b/src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp
@@ -24,9 +24,11 @@
#include "arm_compute/core/CL/kernels/CLSpaceToBatchLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
using namespace arm_compute::misc::shape_calculator;
diff --git a/src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp b/src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp
index 4b6c1be8c2..2d46aade34 100644
--- a/src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp
+++ b/src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp
@@ -24,9 +24,11 @@
#include "arm_compute/core/CL/kernels/CLSpaceToDepthLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
using namespace arm_compute::misc::shape_calculator;
diff --git a/src/core/CL/kernels/CLStackLayerKernel.cpp b/src/core/CL/kernels/CLStackLayerKernel.cpp
index c283c440a3..5055065779 100644
--- a/src/core/CL/kernels/CLStackLayerKernel.cpp
+++ b/src/core/CL/kernels/CLStackLayerKernel.cpp
@@ -25,13 +25,14 @@
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
diff --git a/src/core/CL/kernels/CLStridedSliceKernel.cpp b/src/core/CL/kernels/CLStridedSliceKernel.cpp
index f7b7290a3f..b632e05d84 100644
--- a/src/core/CL/kernels/CLStridedSliceKernel.cpp
+++ b/src/core/CL/kernels/CLStridedSliceKernel.cpp
@@ -24,10 +24,12 @@
#include "arm_compute/core/CL/kernels/CLStridedSliceKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/utils/helpers/bit_ops.h"
#include "arm_compute/core/utils/helpers/tensor_transform.h"
-#include "arm_compute/core/utils/misc/Cast.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/utils/helpers/bit_ops.h"
+#include "support/Cast.h"
#include "support/StringSupport.h"
namespace arm_compute
diff --git a/src/core/CL/kernels/CLTileKernel.cpp b/src/core/CL/kernels/CLTileKernel.cpp
index bba152530c..43c8953363 100644
--- a/src/core/CL/kernels/CLTileKernel.cpp
+++ b/src/core/CL/kernels/CLTileKernel.cpp
@@ -23,8 +23,9 @@
*/
#include "arm_compute/core/CL/kernels/CLTileKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
namespace arm_compute
diff --git a/src/core/CL/kernels/CLTransposeKernel.cpp b/src/core/CL/kernels/CLTransposeKernel.cpp
index a47d956620..bd910196e9 100644
--- a/src/core/CL/kernels/CLTransposeKernel.cpp
+++ b/src/core/CL/kernels/CLTransposeKernel.cpp
@@ -23,18 +23,18 @@
*/
#include "arm_compute/core/CL/kernels/CLTransposeKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/AccessWindowTranspose.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/Utils.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/AccessWindowTranspose.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <set>
#include <sstream>
diff --git a/src/core/CL/kernels/CLUpsampleLayerKernel.cpp b/src/core/CL/kernels/CLUpsampleLayerKernel.cpp
index 101055001c..a4fc10f26a 100644
--- a/src/core/CL/kernels/CLUpsampleLayerKernel.cpp
+++ b/src/core/CL/kernels/CLUpsampleLayerKernel.cpp
@@ -23,16 +23,18 @@
*/
#include "arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
namespace arm_compute
diff --git a/src/core/CL/kernels/CLWarpAffineKernel.cpp b/src/core/CL/kernels/CLWarpAffineKernel.cpp
index e8da803628..95a7c1b875 100644
--- a/src/core/CL/kernels/CLWarpAffineKernel.cpp
+++ b/src/core/CL/kernels/CLWarpAffineKernel.cpp
@@ -23,7 +23,6 @@
*/
#include "arm_compute/core/CL/kernels/CLWarpAffineKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
@@ -32,6 +31,8 @@
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
#include <cstddef>
diff --git a/src/core/CL/kernels/CLWarpPerspectiveKernel.cpp b/src/core/CL/kernels/CLWarpPerspectiveKernel.cpp
index dc7c359849..2fe1feb485 100644
--- a/src/core/CL/kernels/CLWarpPerspectiveKernel.cpp
+++ b/src/core/CL/kernels/CLWarpPerspectiveKernel.cpp
@@ -23,7 +23,6 @@
*/
#include "arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
@@ -32,6 +31,8 @@
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <cstddef>
#include <set>
diff --git a/src/core/CL/kernels/CLWeightsReshapeKernel.cpp b/src/core/CL/kernels/CLWeightsReshapeKernel.cpp
index 267957e51a..f69967265a 100644
--- a/src/core/CL/kernels/CLWeightsReshapeKernel.cpp
+++ b/src/core/CL/kernels/CLWeightsReshapeKernel.cpp
@@ -25,6 +25,8 @@
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
namespace arm_compute
diff --git a/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp b/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp
index 76100c2a63..27c650894c 100644
--- a/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp
+++ b/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp
@@ -23,16 +23,16 @@
*/
#include "arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Window.h"
-#include "arm_compute/core/utils/helpers/tensor_info.h"
-#include "arm_compute/core/utils/misc/Cast.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/utils/helpers/tensor_info.h"
+#include "support/Cast.h"
#include "support/StringSupport.h"
diff --git a/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp b/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp
index 0377eb76b1..5ef2cc46ee 100644
--- a/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp
+++ b/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp
@@ -23,17 +23,16 @@
*/
#include "arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Window.h"
-#include "arm_compute/core/utils/helpers/tensor_info.h"
-#include "arm_compute/core/utils/misc/Cast.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/utils/helpers/tensor_info.h"
+#include "support/Cast.h"
#include "support/StringSupport.h"
diff --git a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp
index d40597fbb5..8b0aaf3227 100644
--- a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp
+++ b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp
@@ -25,13 +25,12 @@
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Window.h"
-#include "arm_compute/core/utils/misc/Cast.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "support/Cast.h"
#include "support/StringSupport.h"
diff --git a/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp b/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp
index 4a1c48a258..7fab208221 100644
--- a/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp
+++ b/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp
@@ -23,10 +23,8 @@
*/
#include "arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/IAccessWindow.h"
@@ -36,6 +34,10 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
diff --git a/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp b/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp
index c4c2b08a81..15c239e849 100644
--- a/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp
+++ b/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp
@@ -23,10 +23,8 @@
*/
#include "arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
#include "arm_compute/core/Error.h"
@@ -34,6 +32,10 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
using namespace arm_compute;
diff --git a/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp b/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp
index 19f61b19b3..8ae0255319 100644
--- a/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp
+++ b/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp
@@ -23,10 +23,8 @@
*/
#include "arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/IAccessWindow.h"
@@ -36,6 +34,10 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
diff --git a/src/core/CL/kernels/CLYOLOLayerKernel.cpp b/src/core/CL/kernels/CLYOLOLayerKernel.cpp
index 132d5d1cd0..0c7588d740 100644
--- a/src/core/CL/kernels/CLYOLOLayerKernel.cpp
+++ b/src/core/CL/kernels/CLYOLOLayerKernel.cpp
@@ -24,17 +24,19 @@
#include "arm_compute/core/CL/kernels/CLYOLOLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/IAccessWindow.h"
#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Window.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/Types.h"
#include "support/StringSupport.h"
namespace arm_compute
diff --git a/src/core/CPP/ICPPSimpleKernel.cpp b/src/core/CPP/ICPPSimpleKernel.cpp
index 126bf548e2..9e4df5ec8a 100644
--- a/src/core/CPP/ICPPSimpleKernel.cpp
+++ b/src/core/CPP/ICPPSimpleKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2018 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,6 +26,7 @@
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/IAccessWindow.h"
#include "arm_compute/core/ITensor.h"
+#include "src/core/helpers/WindowHelpers.h"
namespace arm_compute
{
@@ -71,4 +72,4 @@ Status ICPPSimpleKernel::validate(const ITensorInfo *input, const ITensorInfo *o
return Status{};
}
-} // namespace arm_compute \ No newline at end of file
+} // namespace arm_compute
diff --git a/src/core/CPP/Validate.h b/src/core/CPP/Validate.h
new file mode 100644
index 0000000000..9e95f72c3f
--- /dev/null
+++ b/src/core/CPP/Validate.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CPP_VALIDATE_H
+#define ARM_COMPUTE_CPP_VALIDATE_H
+
+#include "arm_compute/core/Validate.h"
+
+namespace arm_compute
+{
+/** Return an error if the data type of the passed tensor info is FP16 and FP16 support is not compiled in.
+ *
+ * @param[in] function Function in which the error occurred.
+ * @param[in] file Name of the file where the error occurred.
+ * @param[in] line Line on which the error occurred.
+ * @param[in] tensor_info Tensor info to validate.
+ *
+ * @return Status
+ */
+inline Status error_on_unsupported_cpu_fp16(const char *function, const char *file, const int line,
+ const ITensorInfo *tensor_info)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info == nullptr, function, file, line);
+#ifndef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+ ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(tensor_info->data_type() == DataType::F16,
+ function, file, line, "This CPU architecture does not support F16 data type, you need v8.2 or above");
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+ return Status {};
+}
+
+/** Return an error if the data type of the passed tensor info is BFLOAT16 and BFLOAT16 support is not compiled in.
+ *
+ * @param[in] function Function in which the error occurred.
+ * @param[in] file Name of the file where the error occurred.
+ * @param[in] line Line on which the error occurred.
+ * @param[in] tensor_info Tensor info to validate.
+ *
+ * @return Status
+ */
+inline Status error_on_unsupported_cpu_bf16(const char *function, const char *file, const int line,
+ const ITensorInfo *tensor_info)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info == nullptr, function, file, line);
+#if !(defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16))
+ ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(tensor_info->data_type() == DataType::BFLOAT16,
+ function, file, line, "This CPU architecture does not support BFloat16 data type, you need v8.6 or above");
+#endif /* !(defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16)) */
+ return Status {};
+}
+
+/** Return an error if the data type of the passed tensor is FP16 and FP16 support is not compiled in.
+ *
+ * @param[in] function Function in which the error occurred.
+ * @param[in] file Name of the file where the error occurred.
+ * @param[in] line Line on which the error occurred.
+ * @param[in] tensor Tensor to validate.
+ *
+ * @return Status
+ */
+inline Status error_on_unsupported_cpu_fp16(const char *function, const char *file, const int line,
+ const ITensor *tensor)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line);
+ ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_cpu_fp16(function, file, line, tensor->info()));
+ return Status{};
+}
+
+/** Return an error if the data type of the passed tensor is BFLOAT16 and BFLOAT16 support is not compiled in.
+ *
+ * @param[in] function Function in which the error occurred.
+ * @param[in] file Name of the file where the error occurred.
+ * @param[in] line Line on which the error occurred.
+ * @param[in] tensor Tensor to validate.
+ *
+ * @return Status
+ */
+inline Status error_on_unsupported_cpu_bf16(const char *function, const char *file, const int line,
+ const ITensor *tensor)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line);
+ ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_cpu_bf16(function, file, line, tensor->info()));
+ return Status{};
+}
+
+#define ARM_COMPUTE_ERROR_ON_CPU_F16_UNSUPPORTED(tensor) \
+ ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_unsupported_cpu_fp16(__func__, __FILE__, __LINE__, tensor))
+
+#define ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(tensor) \
+ ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_cpu_fp16(__func__, __FILE__, __LINE__, tensor))
+
+#define ARM_COMPUTE_ERROR_ON_CPU_BF16_UNSUPPORTED(tensor) \
+ ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_unsupported_cpu_bf16(__func__, __FILE__, __LINE__, tensor))
+
+#define ARM_COMPUTE_RETURN_ERROR_ON_CPU_BF16_UNSUPPORTED(tensor) \
+ ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_cpu_bf16(__func__, __FILE__, __LINE__, tensor))
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CPP_VALIDATE_H */
diff --git a/src/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.cpp b/src/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.cpp
index 917a6ad08b..fb1754247c 100644
--- a/src/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.cpp
+++ b/src/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,8 +23,8 @@
*/
#include "arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <algorithm>
#include <cmath>
diff --git a/src/core/CPP/kernels/CPPCornerCandidatesKernel.cpp b/src/core/CPP/kernels/CPPCornerCandidatesKernel.cpp
index a0cfb3ba8b..d7af9c9e7a 100644
--- a/src/core/CPP/kernels/CPPCornerCandidatesKernel.cpp
+++ b/src/core/CPP/kernels/CPPCornerCandidatesKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,17 +23,9 @@
*/
#include "arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h"
-#include "arm_compute/core/Coordinates.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/IAccessWindow.h"
#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-
-#include "support/Mutex.h"
+#include "src/core/helpers/WindowHelpers.h"
using namespace arm_compute;
diff --git a/src/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.cpp b/src/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.cpp
index ec03b72b6b..3166faba48 100644
--- a/src/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.cpp
+++ b/src/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,7 +23,6 @@
*/
#include "arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include <algorithm>
diff --git a/src/core/CPP/kernels/CPPNonMaximumSuppressionKernel.cpp b/src/core/CPP/kernels/CPPNonMaximumSuppressionKernel.cpp
index 89e3058520..c1187ff2b3 100644
--- a/src/core/CPP/kernels/CPPNonMaximumSuppressionKernel.cpp
+++ b/src/core/CPP/kernels/CPPNonMaximumSuppressionKernel.cpp
@@ -23,10 +23,12 @@
*/
#include "arm_compute/core/CPP/kernels/CPPNonMaximumSuppressionKernel.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
+
#include <algorithm>
namespace arm_compute
diff --git a/src/core/CPP/kernels/CPPPermuteKernel.cpp b/src/core/CPP/kernels/CPPPermuteKernel.cpp
index 1d1f0cd30e..054c7bf05a 100644
--- a/src/core/CPP/kernels/CPPPermuteKernel.cpp
+++ b/src/core/CPP/kernels/CPPPermuteKernel.cpp
@@ -23,13 +23,10 @@
*/
#include "arm_compute/core/CPP/kernels/CPPPermuteKernel.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <cstddef>
#include <cstdint>
diff --git a/src/core/CPP/kernels/CPPTopKVKernel.cpp b/src/core/CPP/kernels/CPPTopKVKernel.cpp
index 7ba8d7cdd0..d2b54e412e 100644
--- a/src/core/CPP/kernels/CPPTopKVKernel.cpp
+++ b/src/core/CPP/kernels/CPPTopKVKernel.cpp
@@ -22,16 +22,14 @@
* SOFTWARE.
*/
#include "arm_compute/core/CPP/kernels/CPPTopKVKernel.h"
-#include "arm_compute/core/Coordinates.h"
-#include "arm_compute/core/Error.h"
+
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/misc/Traits.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
+
namespace arm_compute
{
namespace
diff --git a/src/core/CPP/kernels/CPPUpsampleKernel.cpp b/src/core/CPP/kernels/CPPUpsampleKernel.cpp
index ff4ffb6124..7ef83fb2c4 100644
--- a/src/core/CPP/kernels/CPPUpsampleKernel.cpp
+++ b/src/core/CPP/kernels/CPPUpsampleKernel.cpp
@@ -23,13 +23,8 @@
*/
#include "arm_compute/core/CPP/kernels/CPPUpsampleKernel.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <cstddef>
#include <cstdint>
diff --git a/src/core/GLES_COMPUTE/IGCSimpleKernel.cpp b/src/core/GLES_COMPUTE/IGCSimpleKernel.cpp
index 6609f457e2..fb31ac8377 100644
--- a/src/core/GLES_COMPUTE/IGCSimpleKernel.cpp
+++ b/src/core/GLES_COMPUTE/IGCSimpleKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -27,6 +27,7 @@
#include "arm_compute/core/IAccessWindow.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "src/core/helpers/WindowHelpers.h"
using namespace arm_compute;
diff --git a/src/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.cpp
index f0a500398b..5e8accc95d 100644
--- a/src/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.cpp
@@ -32,6 +32,7 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
#include <set>
diff --git a/src/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.cpp
index 1c02f41286..0173b81cf8 100644
--- a/src/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.cpp
@@ -32,6 +32,8 @@
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
#include <set>
diff --git a/src/core/GLES_COMPUTE/kernels/GCArithmeticAdditionKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCArithmeticAdditionKernel.cpp
index 06c34863d7..f31c8ca156 100644
--- a/src/core/GLES_COMPUTE/kernels/GCArithmeticAdditionKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCArithmeticAdditionKernel.cpp
@@ -34,6 +34,8 @@
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
#include <cstddef>
diff --git a/src/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.cpp
index 3bd34acb92..9281ce5ffb 100644
--- a/src/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.cpp
@@ -23,7 +23,6 @@
*/
#include "arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
@@ -31,6 +30,9 @@
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
diff --git a/src/core/GLES_COMPUTE/kernels/GCCol2ImKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCCol2ImKernel.cpp
index 4fe6484cf8..5781c564ea 100644
--- a/src/core/GLES_COMPUTE/kernels/GCCol2ImKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCCol2ImKernel.cpp
@@ -24,7 +24,6 @@
#include "arm_compute/core/GLES_COMPUTE/kernels/GCCol2ImKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
@@ -33,6 +32,9 @@
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
using namespace arm_compute;
@@ -42,7 +44,7 @@ GCCol2ImKernel::GCCol2ImKernel()
{
}
-void GCCol2ImKernel::configure(const IGCTensor *input, IGCTensor *output,
+void GCCol2ImKernel::configure(const IGCTensor *input, IGCTensor *output,
std::pair<unsigned int, unsigned int> convolved_dims)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
diff --git a/src/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.cpp
index 458cb639a3..3256f11e74 100644
--- a/src/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.cpp
@@ -32,6 +32,8 @@
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
diff --git a/src/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.cpp
index cb70dae3ec..95d487b4dd 100644
--- a/src/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.cpp
@@ -23,7 +23,6 @@
*/
#include "arm_compute/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
@@ -34,6 +33,9 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
using namespace arm_compute;
diff --git a/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp
index 302b21be0d..9ce8acea09 100644
--- a/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp
@@ -23,7 +23,6 @@
*/
#include "arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
@@ -33,6 +32,9 @@
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
using namespace arm_compute;
@@ -44,7 +46,7 @@ GCDirectConvolutionLayerKernel<kernel_size>::GCDirectConvolutionLayerKernel()
}
template <unsigned int kernel_size>
-BorderSize GCDirectConvolutionLayerKernel<kernel_size>::border_size() const
+BorderSize GCDirectConvolutionLayerKernel<kernel_size>::border_size() const
{
return _border_size;
}
@@ -70,8 +72,8 @@ void GCDirectConvolutionLayerKernel<kernel_size>::configure(const IGCTensor *inp
}
// Get convolved dimensions
- unsigned int owidth = 0;
- unsigned int oheight = 0;
+ unsigned int owidth = 0;
+ unsigned int oheight = 0;
std::tie(owidth, oheight) = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), kernel_size, kernel_size, conv_info);
TensorShape output_shape = input->info()->tensor_shape();
@@ -238,20 +240,20 @@ void GCDirectConvolutionLayerKernel<kernel_size>::configure(const IGCTensor *inp
num_elems_written_per_iteration_x = 4;
#elif defined(PROCESS_4X_2Y_1Z)
options.emplace("#define PROCESS_4X_2Y_1Z");
- num_elems_read_per_iteration_x = 4;
- num_elems_read_per_iteration_y = 2;
+ num_elems_read_per_iteration_x = 4;
+ num_elems_read_per_iteration_y = 2;
num_elems_written_per_iteration_x = 4;
num_elems_written_per_iteration_y = 2;
#elif defined(PROCESS_4X_3Y_1Z)
options.emplace("#define PROCESS_4X_3Y_1Z");
- num_elems_read_per_iteration_x = 4;
- num_elems_read_per_iteration_y = 3;
+ num_elems_read_per_iteration_x = 4;
+ num_elems_read_per_iteration_y = 3;
num_elems_written_per_iteration_x = 4;
num_elems_written_per_iteration_y = 3;
#elif defined(PROCESS_4X_4Y_1Z)
options.emplace("#define PROCESS_4X_4Y_1Z");
- num_elems_read_per_iteration_x = 4;
- num_elems_read_per_iteration_y = 4;
+ num_elems_read_per_iteration_x = 4;
+ num_elems_read_per_iteration_y = 4;
num_elems_written_per_iteration_x = 4;
num_elems_written_per_iteration_y = 4;
#elif defined(PROCESS_4X_2Y_2Z)
diff --git a/src/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.cpp
index 5c6722af6a..bda6599f86 100644
--- a/src/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.cpp
@@ -32,6 +32,8 @@
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
#include <cmath>
diff --git a/src/core/GLES_COMPUTE/kernels/GCFillBorderKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCFillBorderKernel.cpp
index 3b3118bc3d..7ffcdd2f3f 100644
--- a/src/core/GLES_COMPUTE/kernels/GCFillBorderKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCFillBorderKernel.cpp
@@ -32,6 +32,8 @@
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
#include <cstdint>
diff --git a/src/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.cpp b/src/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.cpp
index e0f7e957d8..d395759558 100644
--- a/src/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.cpp
@@ -33,6 +33,8 @@
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
using namespace arm_compute;
diff --git a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.cpp
index c9eb4337fa..66fdde5473 100644
--- a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.cpp
@@ -23,7 +23,6 @@
*/
#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
@@ -33,6 +32,9 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
using namespace arm_compute;
diff --git a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.cpp
index e8298bc327..daad70bba9 100644
--- a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.cpp
@@ -32,6 +32,8 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
using namespace arm_compute;
diff --git a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.cpp
index dd03faf2df..2f69728b61 100644
--- a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.cpp
@@ -23,8 +23,6 @@
*/
#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/AccessWindowTranspose.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
@@ -37,6 +35,10 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/AccessWindowTranspose.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
#include <set>
diff --git a/src/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.cpp
index 4190163694..1d6ef3d0e8 100644
--- a/src/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.cpp
@@ -23,7 +23,6 @@
*/
#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.h"
-#include "arm_compute/core/AccessWindowTranspose.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
@@ -33,6 +32,9 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "src/core/AccessWindowTranspose.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
#include <cmath>
diff --git a/src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp
index 64f2d63fec..c12dd38cb4 100644
--- a/src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp
@@ -24,7 +24,6 @@
#include "arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
@@ -35,6 +34,9 @@
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
#include <cmath>
@@ -91,7 +93,7 @@ void GCIm2ColKernel::configure(const IGCTensor *input, IGCTensor *output, const
int stride_y = 0;
std::tie(stride_x, stride_y) = conv_info.stride();
- _kernel_dims = std::make_pair(kernel_dims.width, kernel_dims.height);
+ _kernel_dims = std::make_pair(kernel_dims.width, kernel_dims.height);
const bool run_img2col_reduced = (output->info()->dimension(0) == (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2))) && (TensorShape::num_max_dimensions >= 4)
&& (std::equal(input->info()->tensor_shape().cbegin() + 3,
@@ -109,9 +111,9 @@ void GCIm2ColKernel::configure(const IGCTensor *input, IGCTensor *output, const
}
build_opts.emplace("#define IM2COL_GENERIC");
- _convolved_dims = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1),
- kernel_dims.width, kernel_dims.height,
- conv_info, dilation);
+ _convolved_dims = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1),
+ kernel_dims.width, kernel_dims.height,
+ conv_info, dilation);
_num_elems_processed_per_iteration = (input->info()->data_type() == DataType::F32) ? 1 : 2;
build_opts.emplace("#define KERNEL_WIDTH " + support::cpp11::to_string(kernel_dims.width));
diff --git a/src/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.cpp
index 5fa1987bf1..c29d9fc4d5 100644
--- a/src/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.cpp
@@ -31,6 +31,8 @@
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
#include <string>
diff --git a/src/core/GLES_COMPUTE/kernels/GCNormalizePlanarYUVLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCNormalizePlanarYUVLayerKernel.cpp
index 6a79990484..971b540a83 100644
--- a/src/core/GLES_COMPUTE/kernels/GCNormalizePlanarYUVLayerKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCNormalizePlanarYUVLayerKernel.cpp
@@ -23,7 +23,6 @@
*/
#include "arm_compute/core/GLES_COMPUTE/kernels/GCNormalizePlanarYUVLayerKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
@@ -31,6 +30,9 @@
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
diff --git a/src/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.cpp
index 45aa06cc2d..76559146ae 100644
--- a/src/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.cpp
@@ -32,6 +32,8 @@
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
#include <cmath>
diff --git a/src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp
index a592c09cc0..13efd10532 100644
--- a/src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp
@@ -23,7 +23,6 @@
*/
#include "arm_compute/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
@@ -33,6 +32,9 @@
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
#include <set>
diff --git a/src/core/GLES_COMPUTE/kernels/GCScaleKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCScaleKernel.cpp
index cf10b92dd1..a0795c668f 100644
--- a/src/core/GLES_COMPUTE/kernels/GCScaleKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCScaleKernel.cpp
@@ -23,7 +23,6 @@
*/
#include "arm_compute/core/GLES_COMPUTE/kernels/GCScaleKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
@@ -33,6 +32,9 @@
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
#include <set>
diff --git a/src/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.cpp
index f4ed9617fa..39d586da72 100644
--- a/src/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.cpp
@@ -23,7 +23,6 @@
*/
#include "arm_compute/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
@@ -33,6 +32,9 @@
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
#include <set>
diff --git a/src/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.cpp
index d06be9b8a6..78b008484e 100644
--- a/src/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.cpp
@@ -23,7 +23,6 @@
*/
#include "arm_compute/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
@@ -33,6 +32,9 @@
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
using namespace arm_compute;
diff --git a/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp
index 66b4a55bd8..3bec05b5f1 100644
--- a/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp
@@ -23,7 +23,6 @@
*/
#include "arm_compute/core/GLES_COMPUTE/kernels/GCTransposeKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
@@ -31,6 +30,9 @@
#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Types.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
#include <set>
diff --git a/src/core/GLES_COMPUTE/kernels/GCWeightsReshapeKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCWeightsReshapeKernel.cpp
index 9a430b43cb..bcdbfb60dc 100644
--- a/src/core/GLES_COMPUTE/kernels/GCWeightsReshapeKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCWeightsReshapeKernel.cpp
@@ -32,6 +32,8 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
diff --git a/src/core/Helpers.cpp b/src/core/Helpers.cpp
index 6701ff4483..e692cc1e7c 100644
--- a/src/core/Helpers.cpp
+++ b/src/core/Helpers.cpp
@@ -25,162 +25,6 @@
namespace arm_compute
{
-Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
-{
- if(!skip_border)
- {
- border_size = BorderSize(0);
- }
-
- const Coordinates &anchor = valid_region.anchor;
- const TensorShape &shape = valid_region.shape;
-
- Window window;
-
- window.set(0, Window::Dimension(
- // Skip the border left of the image
- anchor[0] + border_size.left,
- // Skip the border right of the image
- // Make sure the window width is a multiple of the step size
- anchor[0] + border_size.left + ceil_to_multiple(std::max(0, static_cast<int>(shape[0]) - static_cast<int>(border_size.left) - static_cast<int>(border_size.right)), steps[0]),
- steps[0]));
-
- size_t n = 1;
-
- if(anchor.num_dimensions() > 1)
- {
- window.set(1, Window::Dimension(
- // Skip the border above the image
- anchor[1] + border_size.top,
- // Skip the border below the image
- anchor[1] + border_size.top + ceil_to_multiple(std::max(0, static_cast<int>(shape[1]) - static_cast<int>(border_size.top) - static_cast<int>(border_size.bottom)), steps[1]),
- steps[1]));
-
- ++n;
- }
-
- if(anchor.num_dimensions() > 2)
- {
- window.set(2, Window::Dimension(anchor[2], std::max<size_t>(1, shape[2]), steps[2]));
-
- ++n;
- }
-
- for(; n < anchor.num_dimensions(); ++n)
- {
- window.set(n, Window::Dimension(anchor[n], std::max<size_t>(1, shape[n])));
- }
-
- for(; n < Coordinates::num_max_dimensions; ++n)
- {
- window.set(n, Window::Dimension(0, 1));
- }
-
- return window;
-}
-
-Window calculate_max_enlarged_window(const ValidRegion &valid_region, const Steps &steps, BorderSize border_size)
-{
- const Coordinates &anchor = valid_region.anchor;
- const TensorShape &shape = valid_region.shape;
-
- Window window;
-
- window.set(0, Window::Dimension(
- // move the anchor to the start from the border
- anchor[0] - border_size.left,
- // move the anchor to include the right end border
- // Make sure the window width is a multiple of the step size
- anchor[0] - border_size.left + ceil_to_multiple(shape[0] + border_size.left + border_size.right, steps[0]),
- steps[0]));
-
- size_t n = 1;
-
- if(anchor.num_dimensions() > 1)
- {
- window.set(1, Window::Dimension(
- // Include the border above the image
- anchor[1] - border_size.top,
- // Include the border below the image
- anchor[1] - border_size.top + ceil_to_multiple(shape[1] + border_size.top + border_size.bottom, steps[1]),
- steps[1]));
-
- ++n;
- }
-
- if(anchor.num_dimensions() > 2)
- {
- window.set(2, Window::Dimension(0, std::max<size_t>(1, shape[n]), steps[2]));
-
- ++n;
- }
-
- for(; n < anchor.num_dimensions(); ++n)
- {
- window.set(n, Window::Dimension(anchor[n], std::max<size_t>(1, shape[n])));
- }
-
- for(; n < Coordinates::num_max_dimensions; ++n)
- {
- window.set(n, Window::Dimension(0, 1));
- }
-
- return window;
-}
-
-Window calculate_max_window_horizontal(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
-{
- if(skip_border)
- {
- border_size.top = 0;
- border_size.bottom = 0;
- }
- else
- {
- border_size.left = 0;
- border_size.right = 0;
- }
-
- const Coordinates &anchor = valid_region.anchor;
- const TensorShape &shape = valid_region.shape;
-
- Window window;
-
- window.set(0, Window::Dimension(
- // Skip the border left of the image
- anchor[0] + border_size.left,
- // Skip the border right of the image
- // Make sure the window width is a multiple of the step size
- anchor[0] + border_size.left + ceil_to_multiple(std::max(0, static_cast<int>(shape[0]) - static_cast<int>(border_size.left) - static_cast<int>(border_size.right)), steps[0]),
- steps[0]));
-
- size_t n = 1;
-
- if(anchor.num_dimensions() > 1)
- {
- window.set(1, Window::Dimension(
- // Skip the border above the image
- anchor[1] - border_size.top,
- // Skip the border below the image
- anchor[1] + shape[1] + border_size.bottom,
- 1));
-
- ++n;
- }
-
- for(; n < anchor.num_dimensions(); ++n)
- {
- window.set(n, Window::Dimension(anchor[n], std::max<size_t>(1, shape[n])));
- }
-
- for(; n < Coordinates::num_max_dimensions; ++n)
- {
- window.set(n, Window::Dimension(0, 1));
- }
-
- return window;
-}
-
ValidRegion calculate_valid_region_scale(const ITensorInfo &src_info, const TensorShape &dst_shape,
InterpolationPolicy interpolate_policy, SamplingPolicy sampling_policy, bool border_undefined)
{
@@ -256,19 +100,4 @@ ValidRegion calculate_valid_region_scale(const ITensorInfo &src_info, const Tens
return valid_region;
}
-
-PermutationVector get_permutation_vector_from_softmax_axis(size_t actual_axis)
-{
- switch(actual_axis)
- {
- case 1:
- return PermutationVector(1U, 0U, 2U, 3U);
- case 2:
- return PermutationVector(2U, 1U, 0U, 3U);
- case 3:
- return PermutationVector(3U, 1U, 2U, 0U);
- default:
- ARM_COMPUTE_ERROR("Axis not supported");
- }
-}
} // namespace arm_compute \ No newline at end of file
diff --git a/src/core/NEON/NETracePoint.cpp b/src/core/NEON/NETracePoint.cpp
index cb0dc1400a..4a6bffa54e 100644
--- a/src/core/NEON/NETracePoint.cpp
+++ b/src/core/NEON/NETracePoint.cpp
@@ -24,8 +24,8 @@
#include "arm_compute/core/TracePoint.h"
#include "arm_compute/core/NEON/kernels/NELKTrackerKernel.h"
-#include "arm_compute/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h"
-#include "arm_compute/core/NEON/kernels/convolution/common/convolution.hpp"
+#include "src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h"
+#include "src/core/NEON/kernels/convolution/common/convolution.hpp"
#include "utils/TypePrinter.h"
#include <memory>
diff --git a/src/core/NEON/kernels/NEAbsoluteDifferenceKernel.cpp b/src/core/NEON/kernels/NEAbsoluteDifferenceKernel.cpp
index 3d4800fe15..acea0af02d 100644
--- a/src/core/NEON/kernels/NEAbsoluteDifferenceKernel.cpp
+++ b/src/core/NEON/kernels/NEAbsoluteDifferenceKernel.cpp
@@ -30,6 +30,8 @@
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
diff --git a/src/core/NEON/kernels/NEAccumulateKernel.cpp b/src/core/NEON/kernels/NEAccumulateKernel.cpp
index 7c85f698ae..73ef7eb66f 100644
--- a/src/core/NEON/kernels/NEAccumulateKernel.cpp
+++ b/src/core/NEON/kernels/NEAccumulateKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -28,6 +28,8 @@
#include "arm_compute/core/IAccessWindow.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/NEON/kernels/NEActivationLayerKernel.cpp
index d80aab7069..9616f4faca 100644
--- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEActivationLayerKernel.cpp
@@ -23,15 +23,17 @@
*/
#include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h"
-#include "arm_compute/core/CPP/Validate.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Window.h"
+#include "src/core/CPP/Validate.h"
#include "src/core/NEON/NEAsymm.h"
#include "src/core/NEON/NESymm.h"
#include "src/core/NEON/wrapper/wrapper.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
#include <set>
diff --git a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp
index 525e2866f2..7f1a35fb55 100644
--- a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp
+++ b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp
@@ -23,12 +23,14 @@
*/
#include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h"
-#include "arm_compute/core/CPP/Validate.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/CPP/Validate.h"
#include "src/core/NEON/wrapper/wrapper.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <map>
#include <string>
diff --git a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp
index a3da7508ab..49e503fac4 100644
--- a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp
+++ b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp
@@ -23,12 +23,14 @@
*/
#include "arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h"
-#include "arm_compute/core/CPP/Validate.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/CPP/Validate.h"
#include "src/core/NEON/NEAsymm.h"
#include "src/core/NEON/NESymm.h"
#include "src/core/NEON/wrapper/wrapper.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
namespace arm_compute
{
diff --git a/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp
index c7169d8932..65ac996f46 100644
--- a/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp
@@ -32,6 +32,8 @@
#include "arm_compute/core/Window.h"
#include "src/core/NEON/NEAsymm.h"
#include "src/core/NEON/wrapper/wrapper.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
namespace arm_compute
{
diff --git a/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp b/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp
index 50e46474b5..bda396662f 100644
--- a/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp
@@ -23,14 +23,16 @@
*/
#include "arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h"
-#include "arm_compute/core/CPP/Validate.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "src/core/CPP/Validate.h"
#include "src/core/NEON/NEFixedPoint.h"
#include "src/core/NEON/NEMath.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "src/core/NEON/kernels/detail/NEActivationFunctionDetail.h"
#include "src/core/NEON/wrapper/wrapper.h"
diff --git a/src/core/NEON/kernels/NEBatchToSpaceLayerKernel.cpp b/src/core/NEON/kernels/NEBatchToSpaceLayerKernel.cpp
index eb28ce0a8b..e24d7b6c0a 100644
--- a/src/core/NEON/kernels/NEBatchToSpaceLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEBatchToSpaceLayerKernel.cpp
@@ -28,6 +28,8 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
using namespace arm_compute::misc::shape_calculator;
diff --git a/src/core/NEON/kernels/NEBitwiseAndKernel.cpp b/src/core/NEON/kernels/NEBitwiseAndKernel.cpp
index caaa6c22e8..2d49ff825e 100644
--- a/src/core/NEON/kernels/NEBitwiseAndKernel.cpp
+++ b/src/core/NEON/kernels/NEBitwiseAndKernel.cpp
@@ -28,6 +28,8 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
#include "src/core/NEON/wrapper/wrapper.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
#include <cstdint>
diff --git a/src/core/NEON/kernels/NEBitwiseNotKernel.cpp b/src/core/NEON/kernels/NEBitwiseNotKernel.cpp
index 4da07f93b0..eed9b273ae 100644
--- a/src/core/NEON/kernels/NEBitwiseNotKernel.cpp
+++ b/src/core/NEON/kernels/NEBitwiseNotKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -27,6 +27,8 @@
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
#include <cstdint>
diff --git a/src/core/NEON/kernels/NEBitwiseOrKernel.cpp b/src/core/NEON/kernels/NEBitwiseOrKernel.cpp
index 591acf50e1..f96117e860 100644
--- a/src/core/NEON/kernels/NEBitwiseOrKernel.cpp
+++ b/src/core/NEON/kernels/NEBitwiseOrKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -27,6 +27,8 @@
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
#include <cstdint>
diff --git a/src/core/NEON/kernels/NEBitwiseXorKernel.cpp b/src/core/NEON/kernels/NEBitwiseXorKernel.cpp
index b0aec4078f..45d2b0a0db 100644
--- a/src/core/NEON/kernels/NEBitwiseXorKernel.cpp
+++ b/src/core/NEON/kernels/NEBitwiseXorKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -27,6 +27,8 @@
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
#include <cstdint>
diff --git a/src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp b/src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp
index 56444dcbc0..5a18e88321 100644
--- a/src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp
+++ b/src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp
@@ -23,12 +23,14 @@
*/
#include "arm_compute/core/NEON/kernels/NEBoundingBoxTransformKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/CPP/Validate.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Window.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/CPP/Validate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
diff --git a/src/core/NEON/kernels/NEBox3x3Kernel.cpp b/src/core/NEON/kernels/NEBox3x3Kernel.cpp
index d5d03a9def..1177f6f1dd 100644
--- a/src/core/NEON/kernels/NEBox3x3Kernel.cpp
+++ b/src/core/NEON/kernels/NEBox3x3Kernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -29,6 +29,9 @@
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/NEON/INEKernel.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
+
#include <arm_neon.h>
using namespace arm_compute;
diff --git a/src/core/NEON/kernels/NECannyEdgeKernel.cpp b/src/core/NEON/kernels/NECannyEdgeKernel.cpp
index 0278bb08e1..da33c1b1ea 100644
--- a/src/core/NEON/kernels/NECannyEdgeKernel.cpp
+++ b/src/core/NEON/kernels/NECannyEdgeKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,7 +23,6 @@
*/
#include "arm_compute/core/NEON/kernels/NECannyEdgeKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
@@ -31,6 +30,11 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
#include <cstddef>
diff --git a/src/core/NEON/kernels/NEChannelCombineKernel.cpp b/src/core/NEON/kernels/NEChannelCombineKernel.cpp
index 0de6c4326a..7bd380831b 100644
--- a/src/core/NEON/kernels/NEChannelCombineKernel.cpp
+++ b/src/core/NEON/kernels/NEChannelCombineKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -33,6 +33,8 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
diff --git a/src/core/NEON/kernels/NEChannelExtractKernel.cpp b/src/core/NEON/kernels/NEChannelExtractKernel.cpp
index 800c63606f..86245acd05 100644
--- a/src/core/NEON/kernels/NEChannelExtractKernel.cpp
+++ b/src/core/NEON/kernels/NEChannelExtractKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -34,6 +34,8 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
diff --git a/src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp b/src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp
index 88cd0ae514..6d04d71534 100644
--- a/src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,7 +23,6 @@
*/
#include "arm_compute/core/NEON/kernels/NEChannelShuffleLayerKernel.h"
-#include "arm_compute/core/CPP/Validate.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
@@ -31,6 +30,9 @@
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "src/core/CPP/Validate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
namespace arm_compute
{
diff --git a/src/core/NEON/kernels/NECol2ImKernel.cpp b/src/core/NEON/kernels/NECol2ImKernel.cpp
index 6a07defd79..f3192370a6 100644
--- a/src/core/NEON/kernels/NECol2ImKernel.cpp
+++ b/src/core/NEON/kernels/NECol2ImKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -30,6 +30,8 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
#include <cstddef>
diff --git a/src/core/NEON/kernels/NEColorConvertKernel.cpp b/src/core/NEON/kernels/NEColorConvertKernel.cpp
index bc8c77543a..f933a2a898 100644
--- a/src/core/NEON/kernels/NEColorConvertKernel.cpp
+++ b/src/core/NEON/kernels/NEColorConvertKernel.cpp
@@ -33,6 +33,8 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "src/core/NEON/kernels/detail/NEColorConvertHelper.inl"
diff --git a/src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.cpp b/src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.cpp
index 97bb8ccb8a..8716cfd9b5 100644
--- a/src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.cpp
+++ b/src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,6 +25,8 @@
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Types.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
namespace arm_compute
{
diff --git a/src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.cpp b/src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.cpp
index f40f1215d3..bd8ea30fb3 100644
--- a/src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.cpp
+++ b/src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.cpp
@@ -30,6 +30,8 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
#include "src/core/NEON/wrapper/wrapper.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
namespace arm_compute
{
diff --git a/src/core/NEON/kernels/NEConvolutionKernel.cpp b/src/core/NEON/kernels/NEConvolutionKernel.cpp
index 7103fa1618..69b65b2816 100644
--- a/src/core/NEON/kernels/NEConvolutionKernel.cpp
+++ b/src/core/NEON/kernels/NEConvolutionKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -32,6 +32,8 @@
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <algorithm>
#include <arm_neon.h>
diff --git a/src/core/NEON/kernels/NECopyKernel.cpp b/src/core/NEON/kernels/NECopyKernel.cpp
index 3d00139263..b299957b57 100644
--- a/src/core/NEON/kernels/NECopyKernel.cpp
+++ b/src/core/NEON/kernels/NECopyKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -30,6 +30,8 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
namespace arm_compute
{
diff --git a/src/core/NEON/kernels/NECropKernel.cpp b/src/core/NEON/kernels/NECropKernel.cpp
index 7c65e71727..5fb55d95a9 100644
--- a/src/core/NEON/kernels/NECropKernel.cpp
+++ b/src/core/NEON/kernels/NECropKernel.cpp
@@ -23,17 +23,18 @@
*/
#include "arm_compute/core/NEON/kernels/NECropKernel.h"
-#include "arm_compute/core/CPP/Validate.h"
#include "arm_compute/core/IAccessWindow.h"
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Window.h"
-
#include "arm_compute/core/Types.h"
-#include "arm_compute/core/utils/helpers/bit_ops.h"
+#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/helpers/tensor_transform.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/CPP/Validate.h"
#include "src/core/NEON/wrapper/wrapper.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/utils/helpers/bit_ops.h"
namespace arm_compute
{
diff --git a/src/core/NEON/kernels/NECumulativeDistributionKernel.cpp b/src/core/NEON/kernels/NECumulativeDistributionKernel.cpp
index cec0e1ce60..5628802783 100644
--- a/src/core/NEON/kernels/NECumulativeDistributionKernel.cpp
+++ b/src/core/NEON/kernels/NECumulativeDistributionKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2018 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -30,6 +30,8 @@
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <algorithm>
#include <cmath>
diff --git a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp
index 6066326fec..b500268477 100644
--- a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp
@@ -34,6 +34,8 @@
#include "src/core/NEON/NEAsymm.h"
#include "src/core/NEON/NEFixedPoint.h"
#include "src/core/NEON/wrapper/wrapper.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <cstdint>
diff --git a/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp b/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp
index ee23909bd6..259ece7c6f 100644
--- a/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp
@@ -23,17 +23,18 @@
*/
#include "arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h"
-#include "arm_compute/core/CPP/Validate.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/utils/misc/SaturateCast.h"
+#include "src/core/CPP/Validate.h"
#include "src/core/NEON/NEFixedPoint.h"
#include "src/core/NEON/NEMath.h"
-
#include "src/core/NEON/wrapper/wrapper.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "support/SaturateCast.h"
using namespace arm_compute;
diff --git a/src/core/NEON/kernels/NEDepthToSpaceLayerKernel.cpp b/src/core/NEON/kernels/NEDepthToSpaceLayerKernel.cpp
index 6465848999..403e7aac9f 100644
--- a/src/core/NEON/kernels/NEDepthToSpaceLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEDepthToSpaceLayerKernel.cpp
@@ -29,6 +29,9 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "src/core/NEON/wrapper/wrapper.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
+
#include <arm_neon.h>
#include <cstdint>
diff --git a/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp b/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp
index 0a34ee6a07..533b374594 100644
--- a/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp
+++ b/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp
@@ -23,13 +23,15 @@
*/
#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/CPP/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/CPP/Validate.h"
#include "src/core/NEON/kernels/convolution/depthwise/impl_qa8_qa8.hpp"
#include "src/core/NEON/wrapper/traits.h"
#include "src/core/NEON/wrapper/wrapper.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/ToolchainSupport.h"
namespace arm_compute
@@ -48,7 +50,6 @@ constexpr size_t vector_size = 8;
struct DepthwiseConvolutionRunInfo
{
-public:
const size_t num_read_elements_per_iteration;
const uint32_t x_start;
const uint32_t x_end;
diff --git a/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp b/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp
index 9352088b1f..2f3c6f431c 100644
--- a/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp
@@ -23,16 +23,18 @@
*/
#include "arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/CPP/Validate.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/CPP/Validate.h"
#include "src/core/NEON/NEAsymm.h"
#include "src/core/NEON/NESymm.h"
#include "src/core/NEON/wrapper/wrapper.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
diff --git a/src/core/NEON/kernels/NEDerivativeKernel.cpp b/src/core/NEON/kernels/NEDerivativeKernel.cpp
index ad590e9f2b..5d3fc01bd2 100644
--- a/src/core/NEON/kernels/NEDerivativeKernel.cpp
+++ b/src/core/NEON/kernels/NEDerivativeKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -29,6 +29,8 @@
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
#include <cstddef>
diff --git a/src/core/NEON/kernels/NEDilateKernel.cpp b/src/core/NEON/kernels/NEDilateKernel.cpp
index c30dab22c6..cc781c699f 100644
--- a/src/core/NEON/kernels/NEDilateKernel.cpp
+++ b/src/core/NEON/kernels/NEDilateKernel.cpp
@@ -28,6 +28,8 @@
#include "arm_compute/core/NEON/INEKernel.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
index c22fa6a2b3..56cd6e62d0 100644
--- a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
@@ -26,8 +26,6 @@
#include "src/core/NEON/kernels/detail/NEDirectConvolutionDetail.h"
#include "src/core/NEON/wrapper/wrapper.h"
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/CPP/Validate.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/IAccessWindow.h"
@@ -36,7 +34,11 @@
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/CPP/Validate.h"
#include "src/core/NEON/NEFixedPoint.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <algorithm>
diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp
index 8c11574755..abaaf12e92 100644
--- a/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp
+++ b/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp
@@ -23,8 +23,6 @@
*/
#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/CPP/Validate.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
@@ -32,9 +30,13 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/misc/Traits.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/CPP/Validate.h"
#include "src/core/NEON/NEAsymm.h"
#include "src/core/NEON/NEFixedPoint.h"
#include "src/core/NEON/wrapper/wrapper.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
#include <cstddef>
diff --git a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp
index f862d04b22..efe6161096 100644
--- a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp
+++ b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp
@@ -23,12 +23,14 @@
*/
#include "arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h"
-#include "arm_compute/core/CPP/Validate.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/IAccessWindow.h"
+#include "src/core/CPP/Validate.h"
#include "src/core/NEON/NEAsymm.h"
#include "src/core/NEON/NEFixedPoint.h"
#include "src/core/NEON/wrapper/wrapper.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
#include <map>
diff --git a/src/core/NEON/kernels/NEElementwiseUnaryKernel.cpp b/src/core/NEON/kernels/NEElementwiseUnaryKernel.cpp
index 40430bdb81..8e4b7eda30 100644
--- a/src/core/NEON/kernels/NEElementwiseUnaryKernel.cpp
+++ b/src/core/NEON/kernels/NEElementwiseUnaryKernel.cpp
@@ -23,12 +23,14 @@
*/
#include "arm_compute/core/NEON/kernels/NEElementwiseUnaryKernel.h"
-#include "arm_compute/core/CPP/Validate.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/CPP/Validate.h"
#include "src/core/NEON/wrapper/wrapper.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/ToolchainSupport.h"
namespace arm_compute
diff --git a/src/core/NEON/kernels/NEErodeKernel.cpp b/src/core/NEON/kernels/NEErodeKernel.cpp
index 4b93c3b4d1..31b0f487d6 100644
--- a/src/core/NEON/kernels/NEErodeKernel.cpp
+++ b/src/core/NEON/kernels/NEErodeKernel.cpp
@@ -28,6 +28,8 @@
#include "arm_compute/core/NEON/INEKernel.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
diff --git a/src/core/NEON/kernels/NEFFTDigitReverseKernel.cpp b/src/core/NEON/kernels/NEFFTDigitReverseKernel.cpp
index d5b20d278d..d8036f2f60 100644
--- a/src/core/NEON/kernels/NEFFTDigitReverseKernel.cpp
+++ b/src/core/NEON/kernels/NEFFTDigitReverseKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -28,6 +28,8 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <set>
diff --git a/src/core/NEON/kernels/NEFFTRadixStageKernel.cpp b/src/core/NEON/kernels/NEFFTRadixStageKernel.cpp
index de8ba3f484..1b0af488a2 100644
--- a/src/core/NEON/kernels/NEFFTRadixStageKernel.cpp
+++ b/src/core/NEON/kernels/NEFFTRadixStageKernel.cpp
@@ -28,15 +28,16 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Window.h"
+#include "src/core/NEON/wrapper/traits.h"
+#include "src/core/NEON/wrapper/wrapper.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
#include <cmath>
#include <complex>
#include <map>
-#include "src/core/NEON/wrapper/traits.h"
-#include "src/core/NEON/wrapper/wrapper.h"
-
namespace arm_compute
{
namespace
diff --git a/src/core/NEON/kernels/NEFFTScaleKernel.cpp b/src/core/NEON/kernels/NEFFTScaleKernel.cpp
index d99ff953fc..0cb8b84db8 100644
--- a/src/core/NEON/kernels/NEFFTScaleKernel.cpp
+++ b/src/core/NEON/kernels/NEFFTScaleKernel.cpp
@@ -29,6 +29,8 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
#include "src/core/NEON/wrapper/wrapper.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
diff --git a/src/core/NEON/kernels/NEFastCornersKernel.cpp b/src/core/NEON/kernels/NEFastCornersKernel.cpp
index 7b1d81e12c..99312f5134 100644
--- a/src/core/NEON/kernels/NEFastCornersKernel.cpp
+++ b/src/core/NEON/kernels/NEFastCornersKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -27,6 +27,8 @@
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <algorithm>
#include <arm_neon.h>
diff --git a/src/core/NEON/kernels/NEFillArrayKernel.cpp b/src/core/NEON/kernels/NEFillArrayKernel.cpp
index 6b22dadd08..93798db6c3 100644
--- a/src/core/NEON/kernels/NEFillArrayKernel.cpp
+++ b/src/core/NEON/kernels/NEFillArrayKernel.cpp
@@ -28,6 +28,7 @@
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/IAccessWindow.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/helpers/WindowHelpers.h"
using namespace arm_compute;
diff --git a/src/core/NEON/kernels/NEFillBorderKernel.cpp b/src/core/NEON/kernels/NEFillBorderKernel.cpp
index dbaec83d04..c1dd5cf81f 100644
--- a/src/core/NEON/kernels/NEFillBorderKernel.cpp
+++ b/src/core/NEON/kernels/NEFillBorderKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -30,6 +30,7 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <algorithm>
#include <cstdint>
diff --git a/src/core/NEON/kernels/NEFlattenLayerKernel.cpp b/src/core/NEON/kernels/NEFlattenLayerKernel.cpp
index 35ebc5b70b..e6b34b6165 100644
--- a/src/core/NEON/kernels/NEFlattenLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEFlattenLayerKernel.cpp
@@ -23,13 +23,15 @@
*/
#include "arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h"
-#include "arm_compute/core/CPP/Validate.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/CPP/Validate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
diff --git a/src/core/NEON/kernels/NEFloorKernel.cpp b/src/core/NEON/kernels/NEFloorKernel.cpp
index 301dc7a422..48f964c6a2 100644
--- a/src/core/NEON/kernels/NEFloorKernel.cpp
+++ b/src/core/NEON/kernels/NEFloorKernel.cpp
@@ -23,12 +23,14 @@
*/
#include "arm_compute/core/NEON/kernels/NEFloorKernel.h"
-#include "arm_compute/core/CPP/Validate.h"
#include "arm_compute/core/Coordinates.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/NEON/INEKernel.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/CPP/Validate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "src/core/NEON/kernels/floor/impl/list.h"
#include "src/core/common/Registrars.h"
diff --git a/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp b/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp
index 00d251f79e..e353df1c39 100644
--- a/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp
+++ b/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp
@@ -23,14 +23,16 @@
*/
#include "arm_compute/core/NEON/kernels/NEFuseBatchNormalizationKernel.h"
-#include "arm_compute/core/CPP/Validate.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "src/core/CPP/Validate.h"
#include "src/core/NEON/wrapper/wrapper.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <map>
diff --git a/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp b/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp
index 8b4ad0da23..2997c1d003 100644
--- a/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp
@@ -31,6 +31,8 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
#include <cstddef>
diff --git a/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp
index f3ba2901cb..acc519012b 100644
--- a/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp
@@ -31,6 +31,10 @@
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
+
#include <arm_neon.h>
using namespace arm_compute;
@@ -1052,5 +1056,3 @@ void NEGEMMLowpMatrixMultiplyKernel::run(const Window &window, const ThreadInfo
}
}
} // namespace arm_compute
-
-
diff --git a/src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.cpp
index 4ac33d1e29..1c76926546 100644
--- a/src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.cpp
@@ -23,7 +23,6 @@
*/
#include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
@@ -32,6 +31,9 @@
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
diff --git a/src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.cpp
index 8d0d7c26a3..6a7d225167 100644
--- a/src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.cpp
@@ -23,7 +23,6 @@
*/
#include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
@@ -32,8 +31,11 @@
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "src/core/AccessWindowStatic.h"
#include "src/core/NEON/NEAsymm.h"
#include "src/core/NEON/wrapper/wrapper.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
#include <cstddef>
diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.cpp
index 023b798b9a..659c4105c1 100644
--- a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.cpp
@@ -23,7 +23,6 @@
*/
#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
@@ -32,7 +31,10 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
+#include "src/core/AccessWindowStatic.h"
#include "src/core/NEON/wrapper/wrapper.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
#include <cstddef>
diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp
index 68f16c5fc7..afa8cec76f 100644
--- a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp
@@ -23,7 +23,6 @@
*/
#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
@@ -33,7 +32,10 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/AccessWindowStatic.h"
#include "src/core/NEON/NESymm.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
#include <cstddef>
diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp
index 2ef32c4e81..83416e03e9 100644
--- a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp
@@ -23,7 +23,6 @@
*/
#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
@@ -33,7 +32,10 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/AccessWindowStatic.h"
#include "src/core/NEON/NEAsymm.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
#include <cstddef>
diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp
index 8fc33dcc82..1e8aa0cc0a 100644
--- a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp
@@ -23,7 +23,6 @@
*/
#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
@@ -33,7 +32,10 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/AccessWindowStatic.h"
#include "src/core/NEON/NEAsymm.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
#include <cstddef>
diff --git a/src/core/NEON/kernels/NEGEMMLowpReductionKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpReductionKernel.cpp
index 1494cd459c..566872f02c 100644
--- a/src/core/NEON/kernels/NEGEMMLowpReductionKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMLowpReductionKernel.cpp
@@ -23,11 +23,13 @@
*/
#include "arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/KernelDescriptors.h"
#include "arm_compute/core/TensorInfo.h"
+#include "src/core/AccessWindowStatic.h"
#include "src/core/NEON/wrapper/wrapper.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
namespace arm_compute
{
diff --git a/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp
index bd931469a3..9aee26ca55 100644
--- a/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp
@@ -23,12 +23,14 @@
*/
#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h"
-#include "arm_compute/core/CPP/Validate.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/CPP/Validate.h"
#include "src/core/NEON/NEFixedPoint.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
diff --git a/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp
index 6f74e3fc06..a9236890e3 100644
--- a/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp
@@ -23,7 +23,6 @@
*/
#include "arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h"
-#include "arm_compute/core/CPP/Validate.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
@@ -32,9 +31,13 @@
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
-#include "arm_compute/core/utils/helpers/float_ops.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/CPP/Validate.h"
#include "src/core/NEON/NEFixedPoint.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/utils/helpers/float_ops.h"
#include <arm_neon.h>
diff --git a/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp b/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp
index a8adc45645..b9b4fe9e9c 100644
--- a/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp
@@ -23,12 +23,14 @@
*/
#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/NEON/INEKernel.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
diff --git a/src/core/NEON/kernels/NEGatherKernel.cpp b/src/core/NEON/kernels/NEGatherKernel.cpp
index 906e8a053e..193fe98c7b 100644
--- a/src/core/NEON/kernels/NEGatherKernel.cpp
+++ b/src/core/NEON/kernels/NEGatherKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,7 +23,6 @@
*/
#include "arm_compute/core/NEON/kernels/NEGatherKernel.h"
-#include "arm_compute/core/CPP/Validate.h"
#include "arm_compute/core/Coordinates.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
@@ -32,6 +31,9 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/CPP/Validate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
namespace arm_compute
{
diff --git a/src/core/NEON/kernels/NEGaussian3x3Kernel.cpp b/src/core/NEON/kernels/NEGaussian3x3Kernel.cpp
index 18dd80e283..5ff5db7266 100644
--- a/src/core/NEON/kernels/NEGaussian3x3Kernel.cpp
+++ b/src/core/NEON/kernels/NEGaussian3x3Kernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -28,6 +28,8 @@
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/NEON/INEKernel.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
diff --git a/src/core/NEON/kernels/NEGaussian5x5Kernel.cpp b/src/core/NEON/kernels/NEGaussian5x5Kernel.cpp
index 99b5d4b093..5bb3e76ded 100644
--- a/src/core/NEON/kernels/NEGaussian5x5Kernel.cpp
+++ b/src/core/NEON/kernels/NEGaussian5x5Kernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -30,6 +30,8 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
#include <cstddef>
diff --git a/src/core/NEON/kernels/NEGaussianPyramidKernel.cpp b/src/core/NEON/kernels/NEGaussianPyramidKernel.cpp
index 83d2877836..62cf414df2 100644
--- a/src/core/NEON/kernels/NEGaussianPyramidKernel.cpp
+++ b/src/core/NEON/kernels/NEGaussianPyramidKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -32,6 +32,8 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
#include <cstddef>
diff --git a/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp b/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp
index c3b105919b..483f204b04 100644
--- a/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,12 +23,14 @@
*/
#include "arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/CPP/Validate.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Window.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/CPP/Validate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
diff --git a/src/core/NEON/kernels/NEHOGDescriptorKernel.cpp b/src/core/NEON/kernels/NEHOGDescriptorKernel.cpp
index 84bb59ef0e..00f4087cbc 100644
--- a/src/core/NEON/kernels/NEHOGDescriptorKernel.cpp
+++ b/src/core/NEON/kernels/NEHOGDescriptorKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -28,6 +28,8 @@
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/IAccessWindow.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <algorithm>
#include <arm_neon.h>
diff --git a/src/core/NEON/kernels/NEHOGDetectorKernel.cpp b/src/core/NEON/kernels/NEHOGDetectorKernel.cpp
index eb0d45000a..d5dfa4195d 100644
--- a/src/core/NEON/kernels/NEHOGDetectorKernel.cpp
+++ b/src/core/NEON/kernels/NEHOGDetectorKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -28,6 +28,8 @@
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/IAccessWindow.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
diff --git a/src/core/NEON/kernels/NEHarrisCornersKernel.cpp b/src/core/NEON/kernels/NEHarrisCornersKernel.cpp
index 340c694a7c..be68b9c44b 100644
--- a/src/core/NEON/kernels/NEHarrisCornersKernel.cpp
+++ b/src/core/NEON/kernels/NEHarrisCornersKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -31,6 +31,8 @@
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <algorithm>
#include <arm_neon.h>
diff --git a/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp
index fc7b819f6a..a50712598a 100644
--- a/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp
@@ -33,6 +33,8 @@
#include "arm_compute/core/Window.h"
#include "src/core/NEON/NEAsymm.h"
#include "src/core/NEON/wrapper/wrapper.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <cstdint>
diff --git a/src/core/NEON/kernels/NEHistogramKernel.cpp b/src/core/NEON/kernels/NEHistogramKernel.cpp
index 0f8397f117..12d1bb8e7e 100644
--- a/src/core/NEON/kernels/NEHistogramKernel.cpp
+++ b/src/core/NEON/kernels/NEHistogramKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -30,6 +30,8 @@
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Window.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <algorithm>
#include <arm_neon.h>
diff --git a/src/core/NEON/kernels/NEIm2ColKernel.cpp b/src/core/NEON/kernels/NEIm2ColKernel.cpp
index 6eae0541aa..915ea75431 100644
--- a/src/core/NEON/kernels/NEIm2ColKernel.cpp
+++ b/src/core/NEON/kernels/NEIm2ColKernel.cpp
@@ -23,7 +23,6 @@
*/
#include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h"
-#include "arm_compute/core/CPP/Validate.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
@@ -31,6 +30,9 @@
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/CPP/Validate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
diff --git a/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp b/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp
index 78acbc399d..7aa23de6eb 100644
--- a/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp
@@ -23,7 +23,6 @@
*/
#include "arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h"
-#include "arm_compute/core/CPP/Validate.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
@@ -32,8 +31,11 @@
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "src/core/CPP/Validate.h"
#include "src/core/NEON/NEMath.h"
#include "src/core/NEON/wrapper/wrapper.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
diff --git a/src/core/NEON/kernels/NEIntegralImageKernel.cpp b/src/core/NEON/kernels/NEIntegralImageKernel.cpp
index 58ee3b4bea..5fc6ca65e3 100644
--- a/src/core/NEON/kernels/NEIntegralImageKernel.cpp
+++ b/src/core/NEON/kernels/NEIntegralImageKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -28,6 +28,8 @@
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
#include <cstddef>
diff --git a/src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp b/src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp
index d99def53ba..a216981f0f 100644
--- a/src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp
@@ -31,6 +31,8 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
#include "src/core/NEON/NEMath.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "src/core/NEON/wrapper/wrapper.h"
#include <arm_neon.h>
diff --git a/src/core/NEON/kernels/NELKTrackerKernel.cpp b/src/core/NEON/kernels/NELKTrackerKernel.cpp
index 533c241b9b..6567a8d206 100644
--- a/src/core/NEON/kernels/NELKTrackerKernel.cpp
+++ b/src/core/NEON/kernels/NELKTrackerKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,7 +23,6 @@
*/
#include "arm_compute/core/NEON/kernels/NELKTrackerKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/Coordinates.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
@@ -31,6 +30,9 @@
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
#include <cmath>
diff --git a/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp b/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp
index 9eafe18020..b8e6a6d763 100644
--- a/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp
+++ b/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp
@@ -23,7 +23,6 @@
*/
#include "arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h"
-#include "arm_compute/core/CPP/Validate.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/IAccessWindow.h"
@@ -33,7 +32,10 @@
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "src/core/CPP/Validate.h"
#include "src/core/NEON/NEFixedPoint.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
#include <cstddef>
diff --git a/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp b/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp
index a0c1dbc668..8d82e1abd6 100644
--- a/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp
+++ b/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -28,6 +28,8 @@
#include "arm_compute/core/IAccessWindow.h"
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
#include <cstdint>
diff --git a/src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.cpp b/src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.cpp
index 821bf53817..87caf00477 100644
--- a/src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.cpp
@@ -23,11 +23,13 @@
*/
#include "arm_compute/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h"
-#include "arm_compute/core/CPP/Validate.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/CPP/Validate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/ToolchainSupport.h"
diff --git a/src/core/NEON/kernels/NEMeanStdDevKernel.cpp b/src/core/NEON/kernels/NEMeanStdDevKernel.cpp
index 914a21c0a0..c4e036a8b9 100644
--- a/src/core/NEON/kernels/NEMeanStdDevKernel.cpp
+++ b/src/core/NEON/kernels/NEMeanStdDevKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -29,6 +29,8 @@
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
#include <cmath>
diff --git a/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp b/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp
index bcce843638..8ee9ff6f40 100644
--- a/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp
+++ b/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp
@@ -23,14 +23,16 @@
*/
#include "arm_compute/core/NEON/kernels/NEMeanStdDevNormalizationKernel.h"
-#include "arm_compute/core/CPP/Validate.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Window.h"
+#include "src/core/CPP/Validate.h"
#include "src/core/NEON/NEMath.h"
#include "src/core/NEON/wrapper/wrapper.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
namespace arm_compute
{
diff --git a/src/core/NEON/kernels/NEMedian3x3Kernel.cpp b/src/core/NEON/kernels/NEMedian3x3Kernel.cpp
index 72225a4f43..86fcc30e91 100644
--- a/src/core/NEON/kernels/NEMedian3x3Kernel.cpp
+++ b/src/core/NEON/kernels/NEMedian3x3Kernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -29,6 +29,8 @@
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/NEON/INEKernel.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
#include <utility>
diff --git a/src/core/NEON/kernels/NEMemsetKernel.cpp b/src/core/NEON/kernels/NEMemsetKernel.cpp
index 3870fa57f0..fd427cc8c5 100644
--- a/src/core/NEON/kernels/NEMemsetKernel.cpp
+++ b/src/core/NEON/kernels/NEMemsetKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,12 +23,14 @@
*/
#include "arm_compute/core/NEON/kernels/NEMemsetKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
namespace arm_compute
{
diff --git a/src/core/NEON/kernels/NEMinMaxLayerKernel.cpp b/src/core/NEON/kernels/NEMinMaxLayerKernel.cpp
index b1c2b1c376..f675c391ed 100644
--- a/src/core/NEON/kernels/NEMinMaxLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEMinMaxLayerKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -33,6 +33,8 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <algorithm>
#include <arm_neon.h>
diff --git a/src/core/NEON/kernels/NEMinMaxLocationKernel.cpp b/src/core/NEON/kernels/NEMinMaxLocationKernel.cpp
index e956f9a8d0..e1691dc8ff 100644
--- a/src/core/NEON/kernels/NEMinMaxLocationKernel.cpp
+++ b/src/core/NEON/kernels/NEMinMaxLocationKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -33,6 +33,8 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/misc/Utility.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <algorithm>
#include <arm_neon.h>
diff --git a/src/core/NEON/kernels/NENonLinearFilterKernel.cpp b/src/core/NEON/kernels/NENonLinearFilterKernel.cpp
index f20e869272..31919ead03 100644
--- a/src/core/NEON/kernels/NENonLinearFilterKernel.cpp
+++ b/src/core/NEON/kernels/NENonLinearFilterKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -29,6 +29,8 @@
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <algorithm>
#include <arm_neon.h>
diff --git a/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp b/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp
index 3e4c6e29d3..9566ced768 100644
--- a/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp
+++ b/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -30,6 +30,8 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
#include <cstddef>
diff --git a/src/core/NEON/kernels/NENormalizationLayerKernel.cpp b/src/core/NEON/kernels/NENormalizationLayerKernel.cpp
index 7b888266fb..1b72a3e277 100644
--- a/src/core/NEON/kernels/NENormalizationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NENormalizationLayerKernel.cpp
@@ -23,17 +23,20 @@
*/
#include "arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/CPP/Validate.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/CPP/Validate.h"
#include "src/core/NEON/NEFixedPoint.h"
#include "src/core/NEON/NEMath.h"
#include "src/core/NEON/wrapper/wrapper.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/NormalizationHelpers.h"
+#include "src/core/helpers/WindowHelpers.h"
namespace arm_compute
{
diff --git a/src/core/NEON/kernels/NEPadLayerKernel.cpp b/src/core/NEON/kernels/NEPadLayerKernel.cpp
index 1b52117bbe..ca9c5419e0 100644
--- a/src/core/NEON/kernels/NEPadLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEPadLayerKernel.cpp
@@ -31,6 +31,8 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "src/core/NEON/wrapper/wrapper.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
namespace arm_compute
{
diff --git a/src/core/NEON/kernels/NEPermuteKernel.cpp b/src/core/NEON/kernels/NEPermuteKernel.cpp
index 3f447f90b9..eab11ebfff 100644
--- a/src/core/NEON/kernels/NEPermuteKernel.cpp
+++ b/src/core/NEON/kernels/NEPermuteKernel.cpp
@@ -30,10 +30,12 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
namespace
{
-#include "arm_compute/core/NEON/kernels/convolution/common/shims.hpp"
+#include "src/core/NEON/kernels/convolution/common/shims.hpp"
} // namespace
namespace arm_compute
diff --git a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp
index c5320b9dbf..0847cb1f23 100644
--- a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp
+++ b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp
@@ -23,11 +23,13 @@
*/
#include "arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h"
-#include "arm_compute/core/CPP/Validate.h"
#include "arm_compute/core/TensorInfo.h"
+#include "src/core/CPP/Validate.h"
#include "src/core/NEON/NEAsymm.h"
#include "src/core/NEON/NESymm.h"
#include "src/core/NEON/wrapper/wrapper.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
diff --git a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
index 397eae94ea..f9636dcb8d 100644
--- a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
@@ -23,8 +23,6 @@
*/
#include "arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/CPP/Validate.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
@@ -33,9 +31,13 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/CPP/Validate.h"
#include "src/core/NEON/NEAsymm.h"
#include "src/core/NEON/NEFixedPoint.h"
#include "src/core/NEON/NEMath.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/ToolchainSupport.h"
#include "src/core/NEON/wrapper/wrapper.h"
diff --git a/src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp b/src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp
index 808b68a0d7..06a1f14e5f 100644
--- a/src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp
@@ -27,6 +27,8 @@
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
diff --git a/src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.cpp b/src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.cpp
index 6a038f8f44..55585b4e00 100644
--- a/src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.cpp
+++ b/src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.cpp
@@ -23,16 +23,18 @@
*/
#include "arm_compute/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h"
-#include "arm_compute/core/CPP/Validate.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
+#include "src/core/CPP/Validate.h"
#include "src/core/NEON/NEFixedPoint.h"
#include "src/core/NEON/NEMath.h"
#include "src/core/NEON/NESymm.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "src/core/NEON/kernels/detail/NEActivationFunctionDetail.h"
diff --git a/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp b/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp
index 6d5202d6b5..990e4b67bc 100644
--- a/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp
@@ -31,8 +31,10 @@
#include "src/core/NEON/NEAsymm.h"
#include "src/core/NEON/NEMath.h"
#include "src/core/NEON/wrapper/wrapper.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
-#include "arm_compute/core/CPP/Validate.h"
+#include "src/core/CPP/Validate.h"
#include <arm_neon.h>
#include <map>
diff --git a/src/core/NEON/kernels/NEROIAlignLayerKernel.cpp b/src/core/NEON/kernels/NEROIAlignLayerKernel.cpp
index 955cdc2074..79f7888eba 100644
--- a/src/core/NEON/kernels/NEROIAlignLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEROIAlignLayerKernel.cpp
@@ -23,14 +23,16 @@
*/
#include "arm_compute/core/NEON/kernels/NEROIAlignLayerKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/CPP/Validate.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/misc/Utility.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/CPP/Validate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
diff --git a/src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp b/src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp
index 6a960c74dc..a3171d9aa6 100644
--- a/src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp
@@ -23,10 +23,12 @@
*/
#include "arm_compute/core/NEON/kernels/NEROIPoolingLayerKernel.h"
-#include "arm_compute/core/CPP/Validate.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "src/core/CPP/Validate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/ToolchainSupport.h"
#include <cfloat>
diff --git a/src/core/NEON/kernels/NERangeKernel.cpp b/src/core/NEON/kernels/NERangeKernel.cpp
index 7d8fbb1ec1..3466794b11 100644
--- a/src/core/NEON/kernels/NERangeKernel.cpp
+++ b/src/core/NEON/kernels/NERangeKernel.cpp
@@ -31,6 +31,8 @@
#include "arm_compute/core/Validate.h"
#include "src/core/NEON/NEAsymm.h"
#include "src/core/NEON/wrapper/wrapper.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "arm_compute/core/Utils.h"
diff --git a/src/core/NEON/kernels/NEReductionOperationKernel.cpp b/src/core/NEON/kernels/NEReductionOperationKernel.cpp
index 9af7f2ab10..716b092396 100644
--- a/src/core/NEON/kernels/NEReductionOperationKernel.cpp
+++ b/src/core/NEON/kernels/NEReductionOperationKernel.cpp
@@ -23,7 +23,6 @@
*/
#include "arm_compute/core/NEON/kernels/NEReductionOperationKernel.h"
-#include "arm_compute/core/CPP/Validate.h"
#include "arm_compute/core/Coordinates.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/IAccessWindow.h"
@@ -32,9 +31,12 @@
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/utils/misc/SaturateCast.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/CPP/Validate.h"
#include "src/core/NEON/NEMath.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "support/SaturateCast.h"
#include "src/core/NEON/wrapper/wrapper.h"
#include <arm_neon.h>
diff --git a/src/core/NEON/kernels/NERemapKernel.cpp b/src/core/NEON/kernels/NERemapKernel.cpp
index 2881161d7f..f698439507 100644
--- a/src/core/NEON/kernels/NERemapKernel.cpp
+++ b/src/core/NEON/kernels/NERemapKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,13 +23,16 @@
*/
#include "arm_compute/core/NEON/kernels/NERemapKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/ScaleHelpers.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
#include <cstddef>
@@ -175,6 +178,8 @@ void NERemapKernel::remap_nearest(const Window &window)
void NERemapKernel::remap_bilinear(const Window &window)
{
+ using namespace scale_helpers;
+
// Don't increment in X and Y direction for the input tensor
// A pointer to the start of this plane is needed as base for the precomputed offsets
Window win_in(window);
diff --git a/src/core/NEON/kernels/NEReorgLayerKernel.cpp b/src/core/NEON/kernels/NEReorgLayerKernel.cpp
index 317bc25967..1c48a5c93d 100644
--- a/src/core/NEON/kernels/NEReorgLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEReorgLayerKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -30,6 +30,8 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <cstddef>
#include <cstdint>
diff --git a/src/core/NEON/kernels/NEReshapeLayerKernel.cpp b/src/core/NEON/kernels/NEReshapeLayerKernel.cpp
index 23b349b443..7946812811 100644
--- a/src/core/NEON/kernels/NEReshapeLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEReshapeLayerKernel.cpp
@@ -23,8 +23,6 @@
*/
#include "arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/CPP/Validate.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/IAccessWindow.h"
@@ -33,6 +31,10 @@
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/CPP/Validate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <cstdint>
diff --git a/src/core/NEON/kernels/NEReverseKernel.cpp b/src/core/NEON/kernels/NEReverseKernel.cpp
index 0c44a7e0c9..2c081cb917 100644
--- a/src/core/NEON/kernels/NEReverseKernel.cpp
+++ b/src/core/NEON/kernels/NEReverseKernel.cpp
@@ -27,6 +27,8 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
#include "src/core/NEON/wrapper/wrapper.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
namespace arm_compute
{
diff --git a/src/core/NEON/kernels/NEScaleKernel.cpp b/src/core/NEON/kernels/NEScaleKernel.cpp
index 94f5a18102..e07fcad0ab 100644
--- a/src/core/NEON/kernels/NEScaleKernel.cpp
+++ b/src/core/NEON/kernels/NEScaleKernel.cpp
@@ -23,15 +23,17 @@
*/
#include "arm_compute/core/NEON/kernels/NEScaleKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/CPP/Validate.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Window.h"
-#include "arm_compute/core/utils/misc/Rounding.h"
#include "arm_compute/core/utils/misc/Utility.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/CPP/Validate.h"
#include "src/core/NEON/wrapper/wrapper.h"
-
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/ScaleHelpers.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "src/core/utils/ScaleUtils.h"
+#include "support/Rounding.h"
#include <arm_neon.h>
#include <map>
@@ -336,6 +338,8 @@ void NEScaleKernel::scale_bilinear_nchw(const Window &window)
void NEScaleKernel::scale_area_nchw_u8(const Window &window)
{
+ using namespace scale_helpers;
+
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(_input, 1, DataType::U8);
// Don't increment in width/height/channels for the input tensor
diff --git a/src/core/NEON/kernels/NEScharr3x3Kernel.cpp b/src/core/NEON/kernels/NEScharr3x3Kernel.cpp
index dcc9362cf0..eb1dc65c0f 100644
--- a/src/core/NEON/kernels/NEScharr3x3Kernel.cpp
+++ b/src/core/NEON/kernels/NEScharr3x3Kernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -29,6 +29,8 @@
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
#include <cstdint>
diff --git a/src/core/NEON/kernels/NESelectKernel.cpp b/src/core/NEON/kernels/NESelectKernel.cpp
index 286b8a63c8..2f36db2ddb 100644
--- a/src/core/NEON/kernels/NESelectKernel.cpp
+++ b/src/core/NEON/kernels/NESelectKernel.cpp
@@ -23,7 +23,6 @@
*/
#include "arm_compute/core/NEON/kernels/NESelectKernel.h"
-#include "arm_compute/core/CPP/Validate.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/IAccessWindow.h"
@@ -31,7 +30,10 @@
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/CPP/Validate.h"
#include "src/core/NEON/wrapper/wrapper.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "utils/TypePrinter.h"
#include <arm_neon.h>
diff --git a/src/core/NEON/kernels/NESobel3x3Kernel.cpp b/src/core/NEON/kernels/NESobel3x3Kernel.cpp
index eb9d3c3020..1c7089b641 100644
--- a/src/core/NEON/kernels/NESobel3x3Kernel.cpp
+++ b/src/core/NEON/kernels/NESobel3x3Kernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -29,6 +29,8 @@
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
#include <cstdint>
diff --git a/src/core/NEON/kernels/NESobel5x5Kernel.cpp b/src/core/NEON/kernels/NESobel5x5Kernel.cpp
index fc8ccc803d..2421ea72ad 100644
--- a/src/core/NEON/kernels/NESobel5x5Kernel.cpp
+++ b/src/core/NEON/kernels/NESobel5x5Kernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -30,6 +30,8 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
#include <cstddef>
diff --git a/src/core/NEON/kernels/NESobel7x7Kernel.cpp b/src/core/NEON/kernels/NESobel7x7Kernel.cpp
index 95ab12b6cd..779d67a044 100644
--- a/src/core/NEON/kernels/NESobel7x7Kernel.cpp
+++ b/src/core/NEON/kernels/NESobel7x7Kernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -30,6 +30,8 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
#include <cstdint>
diff --git a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp
index e71818f213..13f0a54275 100644
--- a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp
+++ b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp
@@ -23,8 +23,6 @@
*/
#include "arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/CPP/Validate.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
@@ -32,10 +30,14 @@
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
-#include "arm_compute/core/utils/misc/SaturateCast.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/CPP/Validate.h"
#include "src/core/NEON/NEFixedPoint.h"
#include "src/core/NEON/NEMath.h"
#include "src/core/NEON/wrapper/wrapper.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "support/SaturateCast.h"
#include <algorithm>
#include <arm_neon.h>
diff --git a/src/core/NEON/kernels/NESpaceToBatchLayerKernel.cpp b/src/core/NEON/kernels/NESpaceToBatchLayerKernel.cpp
index ccad92a685..3293466979 100644
--- a/src/core/NEON/kernels/NESpaceToBatchLayerKernel.cpp
+++ b/src/core/NEON/kernels/NESpaceToBatchLayerKernel.cpp
@@ -29,6 +29,9 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "src/core/NEON/wrapper/wrapper.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
+
#include <arm_neon.h>
#include <cstdint>
diff --git a/src/core/NEON/kernels/NESpaceToDepthLayerKernel.cpp b/src/core/NEON/kernels/NESpaceToDepthLayerKernel.cpp
index 2667611d2c..7c9cc4996b 100644
--- a/src/core/NEON/kernels/NESpaceToDepthLayerKernel.cpp
+++ b/src/core/NEON/kernels/NESpaceToDepthLayerKernel.cpp
@@ -29,6 +29,9 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "src/core/NEON/wrapper/wrapper.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
+
#include <arm_neon.h>
#include <cstdint>
diff --git a/src/core/NEON/kernels/NEStackLayerKernel.cpp b/src/core/NEON/kernels/NEStackLayerKernel.cpp
index 1d44be60a0..ad7f1b1300 100644
--- a/src/core/NEON/kernels/NEStackLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEStackLayerKernel.cpp
@@ -33,6 +33,8 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
using namespace arm_compute;
using namespace arm_compute::misc::shape_calculator;
diff --git a/src/core/NEON/kernels/NEStridedSliceKernel.cpp b/src/core/NEON/kernels/NEStridedSliceKernel.cpp
index 243a60f249..13b2cb5a10 100644
--- a/src/core/NEON/kernels/NEStridedSliceKernel.cpp
+++ b/src/core/NEON/kernels/NEStridedSliceKernel.cpp
@@ -23,16 +23,17 @@
*/
#include "arm_compute/core/NEON/kernels/NEStridedSliceKernel.h"
-#include "arm_compute/core/CPP/Validate.h"
#include "arm_compute/core/IAccessWindow.h"
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Window.h"
-
#include "arm_compute/core/Types.h"
-#include "arm_compute/core/utils/helpers/bit_ops.h"
+#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/helpers/tensor_transform.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/CPP/Validate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/utils/helpers/bit_ops.h"
namespace arm_compute
{
diff --git a/src/core/NEON/kernels/NEThresholdKernel.cpp b/src/core/NEON/kernels/NEThresholdKernel.cpp
index 9e8ec5c106..aad440b120 100644
--- a/src/core/NEON/kernels/NEThresholdKernel.cpp
+++ b/src/core/NEON/kernels/NEThresholdKernel.cpp
@@ -27,6 +27,8 @@
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "src/core/NEON/wrapper/wrapper.h"
diff --git a/src/core/NEON/kernels/NETileKernel.cpp b/src/core/NEON/kernels/NETileKernel.cpp
index cc7655a479..99651c8b8a 100644
--- a/src/core/NEON/kernels/NETileKernel.cpp
+++ b/src/core/NEON/kernels/NETileKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -30,6 +30,8 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
namespace arm_compute
{
diff --git a/src/core/NEON/kernels/NETransposeKernel.cpp b/src/core/NEON/kernels/NETransposeKernel.cpp
index 7118e45f1e..6037810a44 100644
--- a/src/core/NEON/kernels/NETransposeKernel.cpp
+++ b/src/core/NEON/kernels/NETransposeKernel.cpp
@@ -23,14 +23,16 @@
*/
#include "arm_compute/core/NEON/kernels/NETransposeKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/AccessWindowTranspose.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/AccessWindowTranspose.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
diff --git a/src/core/NEON/kernels/NEUpsampleLayerKernel.cpp b/src/core/NEON/kernels/NEUpsampleLayerKernel.cpp
index 69324c1693..129c83c695 100644
--- a/src/core/NEON/kernels/NEUpsampleLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEUpsampleLayerKernel.cpp
@@ -23,7 +23,6 @@
*/
#include "arm_compute/core/NEON/kernels/NEUpsampleLayerKernel.h"
-#include "arm_compute/core/CPP/Validate.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
@@ -31,7 +30,10 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/CPP/Validate.h"
#include "src/core/NEON/wrapper/wrapper.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
diff --git a/src/core/NEON/kernels/NEWarpKernel.cpp b/src/core/NEON/kernels/NEWarpKernel.cpp
index d8191dce53..891304f02c 100644
--- a/src/core/NEON/kernels/NEWarpKernel.cpp
+++ b/src/core/NEON/kernels/NEWarpKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2018 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,7 +23,6 @@
*/
#include "arm_compute/core/NEON/kernels/NEWarpKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/Coordinates.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
@@ -31,6 +30,10 @@
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/ScaleHelpers.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <cstddef>
@@ -184,7 +187,7 @@ void NEWarpAffineKernel<interpolation>::warp_undefined(const Window &window)
*out.ptr() = nearest_interpolation(in.ptr(), x0, y0, stride);
break;
case InterpolationPolicy::BILINEAR:
- *out.ptr() = pixel_bilinear_c1(in.ptr(), stride, x0, y0);
+ *out.ptr() = scale_helpers::pixel_bilinear_c1(in.ptr(), stride, x0, y0);
break;
default:
ARM_COMPUTE_ERROR("Interpolation not supported");
@@ -271,7 +274,7 @@ void NEWarpAffineKernel<interpolation>::warp_constant(const Window &window)
*out.ptr() = nearest_interpolation(in.ptr(), x0, y0, stride);
break;
case InterpolationPolicy::BILINEAR:
- *out.ptr() = pixel_bilinear_c1(in.ptr(), stride, x0, y0);
+ *out.ptr() = scale_helpers::pixel_bilinear_c1(in.ptr(), stride, x0, y0);
break;
default:
ARM_COMPUTE_ERROR("Interpolation not supported");
@@ -386,7 +389,7 @@ void NEWarpAffineKernel<interpolation>::warp_replicate(const Window &window)
*out.ptr() = nearest_interpolation(in.ptr(), x0, y0, stride);
break;
case InterpolationPolicy::BILINEAR:
- *out.ptr() = pixel_bilinear_c1(in.ptr(), stride, x0, y0);
+ *out.ptr() = scale_helpers::pixel_bilinear_c1(in.ptr(), stride, x0, y0);
break;
default:
ARM_COMPUTE_ERROR("Interpolation not supported");
@@ -519,7 +522,7 @@ void NEWarpPerspectiveKernel<interpolation>::warp_undefined(const Window &window
*out.ptr() = nearest_interpolation(in.ptr(), xn, yn, stride);
break;
case InterpolationPolicy::BILINEAR:
- *out.ptr() = pixel_bilinear_c1(in.ptr(), stride, xn, yn);
+ *out.ptr() = scale_helpers::pixel_bilinear_c1(in.ptr(), stride, xn, yn);
break;
default:
ARM_COMPUTE_ERROR("Interpolation not supported");
@@ -620,7 +623,7 @@ void NEWarpPerspectiveKernel<interpolation>::warp_constant(const Window &window)
*out.ptr() = nearest_interpolation(in.ptr(), xn, yn, stride);
break;
case InterpolationPolicy::BILINEAR:
- *out.ptr() = pixel_bilinear_c1(in.ptr(), stride, xn, yn);
+ *out.ptr() = scale_helpers::pixel_bilinear_c1(in.ptr(), stride, xn, yn);
break;
default:
ARM_COMPUTE_ERROR("Interpolation not supported");
@@ -752,7 +755,7 @@ void NEWarpPerspectiveKernel<interpolation>::warp_replicate(const Window &window
*out.ptr() = nearest_interpolation(in.ptr(), xn, yn, stride);
break;
case InterpolationPolicy::BILINEAR:
- *out.ptr() = pixel_bilinear_c1(in.ptr(), stride, xn, yn);
+ *out.ptr() = scale_helpers::pixel_bilinear_c1(in.ptr(), stride, xn, yn);
break;
default:
ARM_COMPUTE_ERROR("Interpolation not supported");
diff --git a/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp b/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp
index 6a74914ff7..c7fa2d2365 100644
--- a/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp
+++ b/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp
@@ -25,6 +25,8 @@
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
namespace arm_compute
{
diff --git a/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp
index d12b10c69e..90afbd6a19 100644
--- a/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp
@@ -33,6 +33,8 @@
#include "arm_compute/core/Window.h"
#include "src/core/NEON/NEAsymm.h"
#include "src/core/NEON/wrapper/wrapper.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <cstdint>
diff --git a/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp b/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp
index bfe97bfbdb..211ebdec90 100644
--- a/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp
@@ -23,16 +23,18 @@
*/
#include "src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/IAccessWindow.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/kernels/convolution/common/utils.hpp"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/NEON/kernels/convolution/common/utils.hpp"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/MemorySupport.h"
#include "src/core/NEON/kernels/convolution/winograd/winograd_layer.hpp"
diff --git a/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h b/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h
index 94df4f6952..bf5d77fc43 100644
--- a/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h
+++ b/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h
@@ -25,8 +25,8 @@
#define ARM_COMPUTE_NEGEMMWINOGRADCONVOLUTIONLAYERKERNEL_H
#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/NEON/kernels/convolution/common/convolution.hpp"
-#include "arm_compute/core/NEON/kernels/convolution/common/tensor.hpp"
+#include "src/core/NEON/kernels/convolution/common/convolution.hpp"
+#include "src/core/NEON/kernels/convolution/common/tensor.hpp"
#include "src/core/NEON/kernels/convolution/winograd/winograd_layer.hpp"
diff --git a/src/core/NEON/kernels/NEYOLOLayerKernel.cpp b/src/core/NEON/kernels/NEYOLOLayerKernel.cpp
index 591aa1e5e6..48c0616b35 100644
--- a/src/core/NEON/kernels/NEYOLOLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEYOLOLayerKernel.cpp
@@ -23,16 +23,18 @@
*/
#include "arm_compute/core/NEON/kernels/NEYOLOLayerKernel.h"
-#include "arm_compute/core/CPP/Validate.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "src/core/CPP/Validate.h"
#include "src/core/NEON/NEAsymm.h"
#include "src/core/NEON/NEFixedPoint.h"
#include "src/core/NEON/NEMath.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "src/core/NEON/kernels/detail/NEActivationFunctionDetail.h"
diff --git a/src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.cpp b/src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.cpp
index b071be3749..760274dba1 100644
--- a/src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.cpp
+++ b/src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -22,7 +22,7 @@
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h"
+#include "src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
diff --git a/src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h b/src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h
new file mode 100644
index 0000000000..030f1aad12
--- /dev/null
+++ b/src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_INEGEMMWRAPPERKERNEL_H
+#define SRC_INEGEMMWRAPPERKERNEL_H
+
+#include "arm_compute/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Common interface for all the arm_gemm Gemms
+ */
+class INEGEMMWrapperKernel : public INEKernel
+{
+public:
+ /** Parameters defining the dimensions of the matrices being multiplied */
+ struct Params
+ {
+ unsigned int M{ 0 }; /**< Rows in output matrix C (and input matrix A). */
+ unsigned int N{ 0 }; /**< Columns in output matrix C (and input matrix B). */
+ unsigned int K{ 0 }; /**< Columns of input matrix A (= rows of input matrix B). */
+ unsigned int batches{ 0 }; /**< Number of "batched" GEMMs (unique A and C, shared B). */
+ unsigned int multis{ 0 }; /**< Number of "multi" GEMMs (unique A, B and C). */
+ };
+
+ static Params extract_parameters(const ITensor *a, const ITensor *b, const ITensor *c, const GEMMInfo &gemm_info);
+
+ /** Constructor */
+ INEGEMMWrapperKernel();
+ /** Prevent instances of this class from being copied */
+ INEGEMMWrapperKernel(const INEGEMMWrapperKernel &) = delete;
+ /** Prevent instances of this class from being copied */
+ INEGEMMWrapperKernel &operator=(const INEGEMMWrapperKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ INEGEMMWrapperKernel(INEGEMMWrapperKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ INEGEMMWrapperKernel &operator=(INEGEMMWrapperKernel &&) = default;
+ /** Initialise the kernel's input and output.
+ *
+ * @note The input and output tensor must have the same dimensions
+ *
+ * @param[in] a Input tensor (Matrix A)
+ * @param[in] b Input tensor (Matrix B)
+ * @param[out] c Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0.
+ * @param[in] alpha Scalar multiplier to apply to AB matrix product.
+ * @param[in] beta Scalar multiplier to apply to input C matrix before adding product.
+ * @param[in] gemm_info GEMM meta-data
+ */
+ void configure(const ITensor *a, const ITensor *b, ITensor *c, float alpha, float beta, const GEMMInfo &gemm_info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+protected:
+ /** Called as part of configure() after _a, _b, _c and _params have been set.
+ *
+ * @param[in] alpha Scalar multiplier to apply to AB matrix product.
+ * @param[in] beta Scalar multiplier to apply to input C matrix before adding product.
+ *
+ * @return A 3D execution window.
+ */
+ virtual Window configure_internal(float alpha, float beta) = 0;
+
+ /** Run the kernel from the start to the end offset in window.
+ *
+ * @param[in] window Window to use for the iteration
+ * @param[in] start_offset Where to start iterating from (In Window coordinates)
+ * @param[in] end_offset Where to stop iterating (In Window coordinates).
+ * @param[in] info Info about executing thread and CPU.
+ */
+ virtual void run_internal(const Window &window, const Coordinates &start_offset, const Coordinates &end_offset, const ThreadInfo &info) = 0;
+
+ const ITensor *_a;
+ const ITensor *_b;
+ ITensor *_c;
+ Params _params;
+ GEMMInfo _gemm_info;
+
+private:
+ Window _window3d;
+ TensorShape _window_shape;
+};
+
+} // namespace arm_compute
+
+#endif /* SRC_INEGEMMRAPPERKERNEL_H */
diff --git a/src/core/NEON/kernels/assembly/NEDepthwiseConvolutionAssemblyKernelWrapper.h b/src/core/NEON/kernels/assembly/NEDepthwiseConvolutionAssemblyKernelWrapper.h
new file mode 100644
index 0000000000..a2f7e3bd59
--- /dev/null
+++ b/src/core/NEON/kernels/assembly/NEDepthwiseConvolutionAssemblyKernelWrapper.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_ASSEMBLY_DEPTHWISE_CONVOLUTION_ASSEMBLY_WRAPPER_KERNEL_H
+#define SRC_ASSEMBLY_DEPTHWISE_CONVOLUTION_ASSEMBLY_WRAPPER_KERNEL_H
+
+#include "arm_compute/core/NEON/INEKernel.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/Validate.h"
+
+#include "src/core/NEON/kernels/convolution/depthwise/depthwise.hpp"
+
+namespace arm_compute
+{
+// Forward declarations
+class ITensor;
+
+/** This class is a wrapper for the depthwise convolution assembly kernels. */
+class NEDepthwiseConvolutionAssemblyKernelWrapper final : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEDepthwiseConvolutionAssemblyKernelWrapper";
+ }
+
+ /** Default constructor */
+ NEDepthwiseConvolutionAssemblyKernelWrapper()
+ : _kernel(nullptr)
+ {
+ }
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEDepthwiseConvolutionAssemblyKernelWrapper(const NEDepthwiseConvolutionAssemblyKernelWrapper &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEDepthwiseConvolutionAssemblyKernelWrapper &operator=(const NEDepthwiseConvolutionAssemblyKernelWrapper &) = delete;
+ /** Default Move Constructor. */
+ NEDepthwiseConvolutionAssemblyKernelWrapper(NEDepthwiseConvolutionAssemblyKernelWrapper &&) = default;
+ /** Default move assignment operator */
+ NEDepthwiseConvolutionAssemblyKernelWrapper &operator=(NEDepthwiseConvolutionAssemblyKernelWrapper &&) = default;
+
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] kernel Pointer to an assembly kernel implementation.
+ */
+ void configure(depthwise::IDepthwiseConvolution *kernel)
+ {
+ ARM_COMPUTE_ERROR_ON_NULLPTR((reinterpret_cast<void *>(kernel)));
+ _kernel = kernel;
+ Window win;
+ win.set(Window::DimX, Window::Dimension(0, _kernel->get_window(), 1));
+ INEKernel::configure(win);
+ }
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override
+ {
+ ARM_COMPUTE_ERROR_ON_NULLPTR((reinterpret_cast<void *>(_kernel)));
+ ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+ auto first = window.x().start();
+ auto last = window.x().end();
+ _kernel->run(first, last, info.thread_id);
+ }
+
+private:
+ depthwise::IDepthwiseConvolution *_kernel;
+};
+} // namespace arm_compute
+#endif /* SRC_ASSEMBLY_DEPTHWISE_CONVOLUTION_ASSEMBLY_WRAPPER_KERNEL_H */
diff --git a/src/core/NEON/kernels/assembly/arm_gemm_local.hpp b/src/core/NEON/kernels/assembly/arm_gemm_local.hpp
new file mode 100644
index 0000000000..4715f2500a
--- /dev/null
+++ b/src/core/NEON/kernels/assembly/arm_gemm_local.hpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#pragma once
+
+/* This file is used to configure integration-specific aspects of arm_gemm into ACL */
+
+#include "arm_compute/core/CPP/CPPTypes.h"
+
+namespace arm_gemm
+{
+using CPUModel = arm_compute::CPUModel;
+using CPUInfo = arm_compute::CPUInfo;
+} // namespace arm_compute
diff --git a/src/core/NEON/kernels/convolution/common/activation.hpp b/src/core/NEON/kernels/convolution/common/activation.hpp
new file mode 100644
index 0000000000..0c9b7c1368
--- /dev/null
+++ b/src/core/NEON/kernels/convolution/common/activation.hpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2019 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#pragma once
+
+namespace neon_convolution_kernels
+{
+
+enum class ActivationFunction
+{
+ None,
+ ReLU,
+ ReLU6,
+};
+
+}
diff --git a/src/core/NEON/kernels/convolution/common/alloc.hpp b/src/core/NEON/kernels/convolution/common/alloc.hpp
new file mode 100644
index 0000000000..7be3cdaaf5
--- /dev/null
+++ b/src/core/NEON/kernels/convolution/common/alloc.hpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2017 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#pragma once
+
+#ifdef ALLOC_ALIGN
+#define ALLOCATE(x) aligned_alloc(ALLOC_ALIGN, x)
+#else
+#define ALLOCATE(x) malloc(x)
+#endif
diff --git a/src/core/NEON/kernels/convolution/common/arm.hpp b/src/core/NEON/kernels/convolution/common/arm.hpp
new file mode 100644
index 0000000000..b19bf98252
--- /dev/null
+++ b/src/core/NEON/kernels/convolution/common/arm.hpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2017 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/** Sets the macro __arm_any__ if compiling for Aarch32 or Aarch64.
+ * Includes `arm_neon.h` if compiling for either architecture.
+ */
+
+#ifdef __arm__
+#define __arm_any__
+#endif // __arm__
+
+#ifdef __aarch64__
+#define __arm_any__
+#endif // __aarch64__
+
+#ifdef __arm_any__
+#include <arm_neon.h>
+#endif // __arm_any__
diff --git a/src/core/NEON/kernels/convolution/common/convolution.hpp b/src/core/NEON/kernels/convolution/common/convolution.hpp
new file mode 100644
index 0000000000..b1413527c3
--- /dev/null
+++ b/src/core/NEON/kernels/convolution/common/convolution.hpp
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2017 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#pragma once
+
+enum PaddingType {
+ PADDING_SAME, PADDING_VALID
+};
diff --git a/src/core/NEON/kernels/convolution/common/padding.hpp b/src/core/NEON/kernels/convolution/common/padding.hpp
new file mode 100644
index 0000000000..b6f95872c0
--- /dev/null
+++ b/src/core/NEON/kernels/convolution/common/padding.hpp
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2019 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#pragma once
+
+#include <cstddef>
+
+// Utilities for copying tensor tiles and adding/removing padding.
+namespace padding
+{
+
+/* Copy a tile and apply padding to the output copy.
+ */
+template <typename T>
+void copy_and_pad_tile(
+ unsigned int tile_rows,
+ unsigned int tile_cols,
+ unsigned int n_channels,
+ const T *inptr,
+ unsigned int in_row_stride,
+ unsigned int in_col_stride,
+ T* outptr,
+ unsigned int out_row_stride,
+ unsigned int out_col_stride,
+ unsigned int pad_top,
+ unsigned int pad_left,
+ unsigned int pad_bottom,
+ unsigned int pad_right,
+ T pad_value=static_cast<T>(0)
+);
+
+/** Copy a tile and remove padding elements in the output.
+ */
+template <unsigned int TileRows, unsigned int TileCols>
+class CopyCropped
+{
+ public:
+ static void execute(
+ size_t size, // Amount of data to copy
+ const void *inptr,
+ size_t in_row_stride,
+ size_t in_col_stride,
+ void *outptr,
+ size_t out_row_stride,
+ size_t out_col_stride,
+ unsigned int pad_top,
+ unsigned int pad_left,
+ unsigned int pad_bottom,
+ unsigned int pad_right
+ );
+};
+
+template <typename T>
+void crop_and_copy_tile(
+ unsigned int tile_rows,
+ unsigned int tile_cols,
+ unsigned int n_channels,
+ const T *inptr,
+ unsigned int in_row_stride,
+ unsigned int in_col_stride,
+ T *outptr,
+ unsigned int out_row_stride,
+ unsigned int out_col_stride,
+ unsigned int crop_top,
+ unsigned int crop_left,
+ unsigned int crop_bottom,
+ unsigned int crop_right
+);
+
+}
diff --git a/src/core/NEON/kernels/convolution/common/perf.h b/src/core/NEON/kernels/convolution/common/perf.h
new file mode 100644
index 0000000000..fbae4dcdfa
--- /dev/null
+++ b/src/core/NEON/kernels/convolution/common/perf.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2018 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#pragma once
+
+/* Prototypes from perf.c */
+
+void start_counter(int fd);
+long long get_counter(int fd);
+long long stop_counter(int fd);
+int open_instruction_counter(void);
+int open_cycle_counter(void);
diff --git a/src/core/NEON/kernels/convolution/common/qasymm8.hpp b/src/core/NEON/kernels/convolution/common/qasymm8.hpp
new file mode 100644
index 0000000000..88ef7327c0
--- /dev/null
+++ b/src/core/NEON/kernels/convolution/common/qasymm8.hpp
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2019 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#pragma once
+#include <cstdint>
+
+namespace qasymm8
+{
+
+struct QAsymm8Params
+{
+ uint8_t quantize(float value) const;
+ float dequantize(uint8_t value) const;
+
+ uint8_t offset;
+ float scale;
+};
+
+struct QAsymm8RescaleParams
+{
+ static QAsymm8RescaleParams make_rescale_params(
+ const QAsymm8Params& weight_quant,
+ const QAsymm8Params& input_quant,
+ const QAsymm8Params& output_quant
+ );
+
+ QAsymm8RescaleParams(int32_t shift, int32_t multiplier, float rescale);
+
+ const int32_t shift, multiplier;
+ const float rescale;
+};
+
+}
diff --git a/src/core/NEON/kernels/convolution/common/qsymm8.hpp b/src/core/NEON/kernels/convolution/common/qsymm8.hpp
new file mode 100644
index 0000000000..726a02ccfd
--- /dev/null
+++ b/src/core/NEON/kernels/convolution/common/qsymm8.hpp
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2019 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#pragma once
+#include <cstdint>
+#include <vector>
+#include "qasymm8.hpp"
+
+
+namespace qsymm8 {
+
+struct QSymm8Params {
+ int8_t quantize(float value) const;
+ float dequantize(int8_t value) const;
+
+ float scale;
+};
+
+struct QSymm8RescaleParams {
+ static QSymm8RescaleParams
+ make_rescale_params(const QSymm8Params &weight_quant,
+ const QSymm8Params &input_quant,
+ const QSymm8Params &output_quant);
+
+ QSymm8RescaleParams(int32_t shift, int32_t multiplier, float rescale);
+
+ const int32_t shift, multiplier;
+ const float rescale;
+};
+
+struct QSymm8PerChannelParams {
+ int8_t quantize(float value, float scale) const;
+ float dequantize(int8_t value, float scale) const;
+
+ std::vector<float> scales;
+};
+
+struct QSymm8PerChannelRescaleParams {
+ static QSymm8PerChannelRescaleParams
+ make_rescale_params(const QSymm8PerChannelParams &weight_quant,
+ const QSymm8PerChannelParams &input_quant,
+ const QSymm8PerChannelParams &output_quant);
+
+ static QSymm8PerChannelRescaleParams
+ make_rescale_params(const QSymm8PerChannelParams &weight_quant,
+ const qasymm8::QAsymm8Params &input_quant,
+ const qasymm8::QAsymm8Params &output_quant);
+
+ QSymm8PerChannelRescaleParams(std::vector<int32_t>& shift, std::vector<int32_t>& multiplier, std::vector<float>& rescale);
+
+ std::vector<int32_t> shifts, multipliers;
+ std::vector<float> rescales;
+};
+
+} // namespace qsymm8
diff --git a/src/core/NEON/kernels/convolution/common/shims.hpp b/src/core/NEON/kernels/convolution/common/shims.hpp
new file mode 100644
index 0000000000..310bd47b82
--- /dev/null
+++ b/src/core/NEON/kernels/convolution/common/shims.hpp
@@ -0,0 +1,749 @@
+/*
+ * Copyright (c) 2017 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#pragma once
+#ifndef DOXYGEN_SKIP_THIS
+#include <cstdint>
+#endif /* DOXYGEN_SKIP_THIS */
+#include "arm.hpp"
+
+namespace reorder {
+/** Re-order a tensor from NCHW format to NHWC.
+ *
+ * @note The stride parameters are optional and are provided to allow padding in either input or output tensors.
+ *
+ * @param[in] in Input tensor in NCHW format.
+ * @param[out] out Output tensor, to be written in NHWC format.
+ * @param n_batches Number of batches in the tensors.
+ * @param n_channels Number of channels in the tensors
+ * @param n_rows Height of the tensor
+ * @param n_cols Width of the tensor
+ * @param in_batch_stride Stride over batches in the input tensor. If `0` defaults to `n_channels * in_channel_stride`.
+ * @param in_channel_stride Stride over channels in the input tensor. If `0` defaults to `n_rows * in_row_stride`.
+ * @param in_row_stride Stride over rows in the input tensor. If `0` defaults to `n_cols`.
+ * @param out_batch_stride Stride over batches in the output tensor. If `0` defaults to `n_rows * out_row_stride`.
+ * @param out_row_stride Stride over rows in the output tensor. If `0` defaults to `n_cols * out_col_stride`.
+ * @param out_col_stride Stride over columns in the output tensor. If `0` defaults to `n_channels`.
+ */
+template <typename T>
+inline void nchw_to_nhwc(
+ const T* const in,
+ T* const out,
+ const int n_batches,
+ const int n_channels,
+ const int n_rows,
+ const int n_cols,
+ int in_batch_stride=0,
+ int in_channel_stride=0,
+ int in_row_stride=0,
+ int out_batch_stride=0,
+ int out_row_stride=0,
+ int out_col_stride=0
+);
+
+/** Re-order a tensor from NHWC format to NCHW.
+ *
+ * @note The stride parameters are optional and are provided to allow padding in either input or output tensors.
+ *
+ * @param[in] in Input tensor in NHWC format.
+ * @param[out] out Output tensor, to be written in NCHW format.
+ * @param n_batches Number of batches in the tensors.
+ * @param n_rows Height of the tensor
+ * @param n_cols Width of the tensor
+ * @param n_channels Number of channels in the tensors
+ * @param in_batch_stride Stride over batches in the input tensor. If `0` defaults to `n_rows * in_row_stride`.
+ * @param in_row_stride Stride over rows in the input tensor. If `0` defaults to `n_cols * in_col_stride`.
+ * @param in_col_stride Stride over columns in the input tensor. If `0` defaults to `n_channels`.
+ * @param out_batch_stride Stride over batches in the output tensor. If `0` defaults to `n_channels * out_channel_stride`.
+ * @param out_channel_stride Stride over channels in the output tensor. If `0` defaults to `n_rows * out_row_stride`.
+ * @param out_row_stride Stride over rows in the output tensor. If `0` defaults to `n_cols`.
+ */
+template <typename T>
+inline void nhwc_to_nchw(
+ const T* const in, // Input data in NHWC form
+ T* const out, // Output data in NCHW form
+ const int n_batches,
+ const int n_rows,
+ const int n_cols,
+ const int n_channels,
+ int in_batch_stride=0,
+ int in_row_stride=0,
+ int in_col_stride=0,
+ int out_batch_stride=0,
+ int out_channel_stride=0,
+ int out_row_stride=0
+);
+
+/** Re-order a weight tensor from [Output feature map x Input feature map x
+ * Height x Width] format to [Height x Width x Input feature map x Output
+ * feature map] format.
+ */
+template <typename T>
+inline void ofm_ifm_h_w_to_h_w_ifm_ofm(
+ const T* const in, // Input in [Output x Input x Height x Width] form
+ T* const out, // Output in [Height x Width x Input x Output] form
+ const int n_output_feature_maps,
+ const int n_input_feature_maps,
+ const int n_rows,
+ const int n_cols,
+ int in_output_feature_map_stride=0,
+ int in_input_feature_map_stride=0,
+ int in_row_stride=0,
+ int out_row_stride=0,
+ int out_col_stride=0,
+ int out_input_feature_map_stride=0
+);
+
+/** Re-order a weight tensor from [Height x Width x Input feature map x Output
+ * feature map] format to [Output feature map x Input feature map x Height x
+ * Width] format.
+ */
+template <typename T>
+inline void h_w_ifm_ofm_to_ofm_ifm_h_w(
+ const T* const in, // Input in [Height x Width x Input x Output] form
+ T* const out, // Output in [Output x Input x Height x Width] form
+ const int n_rows,
+ const int n_cols,
+ const int n_input_feature_maps,
+ const int n_output_feature_maps,
+ int in_row_stride=0,
+ int in_col_stride=0,
+ int in_input_feature_map_stride=0,
+ int out_output_feature_map_stride=0,
+ int out_input_feature_map_stride=0,
+ int out_row_stride=0
+);
+
+/*****************************************************************************/
+/* 32-bit implementation : NCHW -> NHWC
+ */
+template <>
+inline void nchw_to_nhwc(
+ const int32_t* const in,
+ int32_t* const out,
+ const int n_batches,
+ const int n_channels,
+ const int n_rows,
+ const int n_cols,
+ int in_batch_stride,
+ int in_channel_stride,
+ int in_row_stride,
+ int out_batch_stride,
+ int out_row_stride,
+ int out_col_stride
+)
+{
+ typedef int32_t T;
+
+ // Fill in the stride values
+ in_row_stride = (in_row_stride) ? in_row_stride : n_cols;
+ in_channel_stride = (in_channel_stride) ? in_channel_stride
+ : n_rows * in_row_stride;
+ in_batch_stride = (in_batch_stride) ? in_batch_stride
+ : n_channels * in_channel_stride;
+
+ out_col_stride = (out_col_stride) ? out_col_stride : n_channels;
+ out_row_stride = (out_row_stride) ? out_row_stride : n_cols * out_col_stride;
+ out_batch_stride = (out_batch_stride) ? out_batch_stride
+ : n_rows * out_row_stride;
+
+ // Perform the re-ordering
+ for (int n = 0; n < n_batches; n++)
+ {
+ const T* const in_batch = in + n*in_batch_stride;
+ T* const out_batch = out + n*out_batch_stride;
+
+ for (int i = 0; i < n_rows; i++)
+ {
+ const T* const in_row = in_batch + i*in_row_stride;
+ T* const out_row = out_batch + i*out_row_stride;
+
+ int j = 0, j_remaining = n_cols;
+#ifdef __arm_any__
+ for (; j_remaining >= 4; j += 4, j_remaining -= 4)
+ {
+ int c = 0, c_remaining = n_channels;
+ for (; c_remaining >= 4; c += 4, c_remaining -= 4)
+ {
+ // Read 4 channels worth of 4 columns, then zip to produce 4 columns
+ // worth of 4 channels.
+ int32x4_t channel_pixels[4];
+ channel_pixels[0] = vld1q_s32(in_row + (c + 0)*in_channel_stride + j);
+ channel_pixels[1] = vld1q_s32(in_row + (c + 1)*in_channel_stride + j);
+ channel_pixels[2] = vld1q_s32(in_row + (c + 2)*in_channel_stride + j);
+ channel_pixels[3] = vld1q_s32(in_row + (c + 3)*in_channel_stride + j);
+
+ const auto zip1 = vzipq_s32(channel_pixels[0], channel_pixels[2]);
+ const auto zip2 = vzipq_s32(channel_pixels[1], channel_pixels[3]);
+ const auto out_0 = vzipq_s32(zip1.val[0], zip2.val[0]);
+ const auto out_1 = vzipq_s32(zip1.val[1], zip2.val[1]);
+
+ vst1q_s32(out_row + (j + 0)*out_col_stride + c, out_0.val[0]);
+ vst1q_s32(out_row + (j + 1)*out_col_stride + c, out_0.val[1]);
+ vst1q_s32(out_row + (j + 2)*out_col_stride + c, out_1.val[0]);
+ vst1q_s32(out_row + (j + 3)*out_col_stride + c, out_1.val[1]);
+ }
+ for (; c_remaining; c++, c_remaining--)
+ {
+ for (int _j = 0; _j < 4; _j++)
+ {
+ const T* const in_col = in_row + j + _j;
+ T* const out_col = out_row + (j + _j)*out_col_stride;
+ const T* const in_channel = in_col + c*in_channel_stride;
+ out_col[c] = *(in_channel);
+ }
+ }
+ }
+ for (; j_remaining >= 2; j += 2, j_remaining -= 2)
+ {
+ int c = 0, c_remaining = n_channels;
+ for (; c_remaining >= 2; c += 2, c_remaining -= 2)
+ {
+ // Read 2 channels worth of 2 columns, then zip to produce 2 columns
+ // worth of 2 channels.
+ int32x2_t channel_pixels[2];
+ channel_pixels[0] = vld1_s32(in_row + (c + 0)*in_channel_stride + j);
+ channel_pixels[1] = vld1_s32(in_row + (c + 1)*in_channel_stride + j);
+
+ const auto output = vzip_s32(channel_pixels[0], channel_pixels[1]);
+
+ vst1_s32(out_row + (j + 0)*out_col_stride + c, output.val[0]);
+ vst1_s32(out_row + (j + 1)*out_col_stride + c, output.val[1]);
+ }
+ for (; c_remaining; c++, c_remaining--)
+ {
+ for (int _j = 0; _j < 2; _j++)
+ {
+ const T* const in_col = in_row + j + _j;
+ T* const out_col = out_row + (j + _j)*out_col_stride;
+ const T* const in_channel = in_col + c*in_channel_stride;
+ out_col[c] = *(in_channel);
+ }
+ }
+ }
+#endif // __arm_any__
+ for (; j_remaining; j++, j_remaining--)
+ {
+ const T* const in_col = in_row + j;
+ T* const out_col = out_row + j*out_col_stride;
+
+ for (int c = 0; c < n_channels; c++)
+ {
+ const T* const in_channel = in_col + c*in_channel_stride;
+ out_col[c] = *(in_channel);
+ }
+ }
+ }
+ }
+}
+
+template <>
+inline void nchw_to_nhwc(
+ const uint32_t* const in,
+ uint32_t* const out,
+ const int n_batches,
+ const int n_channels,
+ const int n_rows,
+ const int n_cols,
+ int in_batch_stride,
+ int in_channel_stride,
+ int in_row_stride,
+ int out_batch_stride,
+ int out_row_stride,
+ int out_col_stride
+)
+{
+ nchw_to_nhwc(
+ reinterpret_cast<const int32_t*>(in),
+ reinterpret_cast<int32_t*>(out),
+ n_batches, n_channels, n_rows, n_cols,
+ in_batch_stride, in_channel_stride, in_row_stride,
+ out_batch_stride, out_row_stride, out_col_stride
+ );
+}
+
+template <>
+inline void nchw_to_nhwc(
+ const float* const in,
+ float* const out,
+ const int n_batches,
+ const int n_channels,
+ const int n_rows,
+ const int n_cols,
+ int in_batch_stride,
+ int in_channel_stride,
+ int in_row_stride,
+ int out_batch_stride,
+ int out_row_stride,
+ int out_col_stride
+)
+{
+ nchw_to_nhwc(
+ reinterpret_cast<const int32_t*>(in),
+ reinterpret_cast<int32_t*>(out),
+ n_batches, n_channels, n_rows, n_cols,
+ in_batch_stride, in_channel_stride, in_row_stride,
+ out_batch_stride, out_row_stride, out_col_stride
+ );
+}
+
+/*****************************************************************************/
+/* Generic implementation : NCHW -> NHWC
+ */
+template <typename T>
+inline void nchw_to_nhwc(
+ const T* const in,
+ T* const out,
+ const int n_batches,
+ const int n_channels,
+ const int n_rows,
+ const int n_cols,
+ int in_batch_stride,
+ int in_channel_stride,
+ int in_row_stride,
+ int out_batch_stride,
+ int out_row_stride,
+ int out_col_stride
+)
+{
+ // Fill in the stride values
+ in_row_stride = (in_row_stride) ? in_row_stride : n_cols;
+ in_channel_stride = (in_channel_stride) ? in_channel_stride
+ : n_rows * in_row_stride;
+ in_batch_stride = (in_batch_stride) ? in_batch_stride
+ : n_channels * in_channel_stride;
+
+ out_col_stride = (out_col_stride) ? out_col_stride : n_channels;
+ out_row_stride = (out_row_stride) ? out_row_stride : n_cols * out_col_stride;
+ out_batch_stride = (out_batch_stride) ? out_batch_stride
+ : n_rows * out_row_stride;
+
+ // Perform the re-ordering
+ for (int n = 0; n < n_batches; n++)
+ {
+ const T* const in_batch = in + n*in_batch_stride;
+ T* const out_batch = out + n*out_batch_stride;
+
+ for (int i = 0; i < n_rows; i++)
+ {
+ const T* const in_row = in_batch + i*in_row_stride;
+ T* const out_row = out_batch + i*out_row_stride;
+
+ for (int j = 0; j < n_cols; j++)
+ {
+ const T* const in_col = in_row + j;
+ T* const out_col = out_row + j*out_col_stride;
+
+ for (int c = 0; c < n_channels; c++)
+ {
+ const T* const in_channel = in_col + c*in_channel_stride;
+ out_col[c] = *(in_channel);
+ }
+ }
+ }
+ }
+}
+
+/*****************************************************************************/
+/* 32-bit implementation : NHWC -> NCHW
+ */
+template <>
+inline void nhwc_to_nchw(
+ const int32_t* const in, // Input data in NHWC form
+ int32_t* const out, // Output data in NCHW form
+ const int n_batches,
+ const int n_rows,
+ const int n_cols,
+ const int n_channels,
+ int in_batch_stride,
+ int in_row_stride,
+ int in_col_stride,
+ int out_batch_stride,
+ int out_channel_stride,
+ int out_row_stride
+)
+{
+ typedef int32_t T;
+
+ // Fill in stride values
+ in_col_stride = (in_col_stride) ? in_col_stride : n_channels;
+ in_row_stride = (in_row_stride) ? in_row_stride : n_cols * in_col_stride;
+ in_batch_stride = (in_batch_stride) ? in_batch_stride
+ : n_rows * in_row_stride;
+
+ out_row_stride = (out_row_stride) ? out_row_stride : n_cols;
+ out_channel_stride = (out_channel_stride) ? out_channel_stride
+ : n_rows * out_row_stride;
+ out_batch_stride = (out_batch_stride) ? out_batch_stride
+ : n_channels * out_channel_stride;
+
+ // Perform the re-ordering
+ // For every batch
+ for (int n = 0; n < n_batches; n++)
+ {
+ const T* const in_batch = in + n*in_batch_stride;
+ T* const out_batch = out + n*out_batch_stride;
+
+ // For every row
+ for (int i = 0; i < n_rows; i++)
+ {
+ const T* const in_i = in_batch + i*in_row_stride;
+ T* const out_i = out_batch + i*out_row_stride;
+
+ // For every column, beginning with chunks of 4
+ int j = 0, j_remaining = n_cols;
+#ifdef __arm_any__
+ for (; j_remaining >= 4; j += 4, j_remaining -=4)
+ {
+ // For every channel, beginning with chunks of 4
+ int c = 0, c_remaining = n_channels;
+ for (; c_remaining >= 4; c += 4, c_remaining -= 4)
+ {
+ // Read 4 columns worth of 4 channels then zip to produce 4 channels
+ // worth of 4 columns.
+ int32x4_t pixel_channels[4];
+ pixel_channels[0] = vld1q_s32(in_i + (j + 0)*in_col_stride + c);
+ pixel_channels[1] = vld1q_s32(in_i + (j + 1)*in_col_stride + c);
+ pixel_channels[2] = vld1q_s32(in_i + (j + 2)*in_col_stride + c);
+ pixel_channels[3] = vld1q_s32(in_i + (j + 3)*in_col_stride + c);
+
+ const auto zip1 = vzipq_s32(pixel_channels[0], pixel_channels[2]);
+ const auto zip2 = vzipq_s32(pixel_channels[1], pixel_channels[3]);
+ const auto out_0 = vzipq_s32(zip1.val[0], zip2.val[0]);
+ const auto out_1 = vzipq_s32(zip1.val[1], zip2.val[1]);
+
+ vst1q_s32(out_i + j + (c + 0)*out_channel_stride, out_0.val[0]);
+ vst1q_s32(out_i + j + (c + 1)*out_channel_stride, out_0.val[1]);
+ vst1q_s32(out_i + j + (c + 2)*out_channel_stride, out_1.val[0]);
+ vst1q_s32(out_i + j + (c + 3)*out_channel_stride, out_1.val[1]);
+ }
+ for (; c_remaining; c++, c_remaining--)
+ {
+ for (int _j = 0; _j < 4; _j++)
+ {
+ const T* const in_j = in_i + (j + _j)*in_col_stride;
+ T* const out_j = out_i + (j + _j);
+
+ const T* const in_channel = in_j + c;
+ T* const out_channel = out_j + c*out_channel_stride;
+ *(out_channel) = *(in_channel);
+ }
+ }
+ }
+ for (; j_remaining >= 2; j += 2, j_remaining -=2)
+ {
+ int c = 0, c_remaining = n_channels;
+ for (; c_remaining >= 2; c += 2, c_remaining -= 2)
+ {
+ // Read 2 columns worth of 2 channels then zip to produce 2 channels
+ // worth of 2 columns.
+ int32x2_t pixel_channels[2];
+ pixel_channels[0] = vld1_s32(in_i + (j + 0)*in_col_stride + c);
+ pixel_channels[1] = vld1_s32(in_i + (j + 1)*in_col_stride + c);
+
+ const auto output = vzip_s32(pixel_channels[0], pixel_channels[1]);
+
+ vst1_s32(out_i + j + (c + 0)*out_channel_stride, output.val[0]);
+ vst1_s32(out_i + j + (c + 1)*out_channel_stride, output.val[1]);
+ }
+ for (; c_remaining; c++, c_remaining--)
+ {
+ for (int _j = 0; _j < 2; _j++)
+ {
+ const T* const in_j = in_i + (j + _j)*in_col_stride;
+ T* const out_j = out_i + (j + _j);
+
+ const T* const in_channel = in_j + c;
+ T* const out_channel = out_j + c*out_channel_stride;
+ *(out_channel) = *(in_channel);
+ }
+ }
+ }
+#endif // __arm_any__
+ for (; j_remaining; j++, j_remaining--)
+ {
+ const T* const in_j = in_i + j*in_col_stride;
+ T* const out_j = out_i + j;
+
+ // For every channel
+ for (int c = 0; c < n_channels; c++)
+ {
+ const T* const in_channel = in_j + c;
+ T* const out_channel = out_j + c*out_channel_stride;
+ *(out_channel) = *(in_channel);
+ }
+ }
+ }
+ }
+}
+
+template <>
+inline void nhwc_to_nchw(
+ const uint32_t* const in, // Input data in NHWC form
+ uint32_t* const out, // Output data in NCHW form
+ const int n_batches,
+ const int n_rows,
+ const int n_cols,
+ const int n_channels,
+ int in_batch_stride,
+ int in_row_stride,
+ int in_col_stride,
+ int out_batch_stride,
+ int out_channel_stride,
+ int out_row_stride
+)
+{
+ // Redirect to generic 32-bit implementation
+ nhwc_to_nchw(
+ reinterpret_cast<const int32_t*>(in),
+ reinterpret_cast<int32_t*>(out),
+ n_batches, n_rows, n_cols, n_channels,
+ in_batch_stride, in_row_stride, in_col_stride,
+ out_batch_stride, out_channel_stride, out_row_stride
+ );
+}
+
+template <>
+inline void nhwc_to_nchw(
+ const float* const in, // Input data in NHWC form
+ float* const out, // Output data in NCHW form
+ const int n_batches,
+ const int n_rows,
+ const int n_cols,
+ const int n_channels,
+ int in_batch_stride,
+ int in_row_stride,
+ int in_col_stride,
+ int out_batch_stride,
+ int out_channel_stride,
+ int out_row_stride
+)
+{
+ // Redirect to generic 32-bit implementation
+ nhwc_to_nchw(
+ reinterpret_cast<const int32_t*>(in),
+ reinterpret_cast<int32_t*>(out),
+ n_batches, n_rows, n_cols, n_channels,
+ in_batch_stride, in_row_stride, in_col_stride,
+ out_batch_stride, out_channel_stride, out_row_stride
+ );
+}
+
+/*****************************************************************************/
+/* Generic implementation : NHWC -> NCHW
+ */
+template <typename T>
+inline void nhwc_to_nchw(
+ const T* const in, // Input data in NHWC form
+ T* const out, // Output data in NCHW form
+ const int n_batches,
+ const int n_rows,
+ const int n_cols,
+ const int n_channels,
+ int in_batch_stride,
+ int in_row_stride,
+ int in_col_stride,
+ int out_batch_stride,
+ int out_channel_stride,
+ int out_row_stride
+)
+{
+ // Fill in stride values
+ in_col_stride = (in_col_stride) ? in_col_stride : n_channels;
+ in_row_stride = (in_row_stride) ? in_row_stride : n_cols * in_col_stride;
+ in_batch_stride = (in_batch_stride) ? in_batch_stride
+ : n_rows * in_row_stride;
+
+ out_row_stride = (out_row_stride) ? out_row_stride : n_cols;
+ out_channel_stride = (out_channel_stride) ? out_channel_stride
+ : n_rows * out_row_stride;
+ out_batch_stride = (out_batch_stride) ? out_batch_stride
+ : n_channels * out_channel_stride;
+
+ // Perform the re-ordering
+ // For every batch
+ for (int n = 0; n < n_batches; n++)
+ {
+ const T* const in_batch = in + n*in_batch_stride;
+ T* const out_batch = out + n*out_batch_stride;
+
+ // For every row
+ for (int i = 0; i < n_rows; i++)
+ {
+ const T* const in_i = in_batch + i*in_row_stride;
+ T* const out_i = out_batch + i*out_row_stride;
+
+ // For every column
+ for (int j = 0; j < n_cols; j++)
+ {
+ const T* const in_j = in_i + j*in_col_stride;
+ T* const out_j = out_i + j;
+
+ // For every channel
+ for (int c = 0; c < n_channels; c++)
+ {
+ const T* const in_channel = in_j + c;
+ T* const out_channel = out_j + c*out_channel_stride;
+ *(out_channel) = *(in_channel);
+ }
+ }
+ }
+ }
+}
+
+/*****************************************************************************/
+/* Generic weight re-order implementation.
+ */
+template <typename T>
+inline void ofm_ifm_h_w_to_h_w_ifm_ofm(
+ const T* const in, // Input in [Output x Input x Height x Width] form
+ T* const out, // Output in [Height x Width x Input x Output] form
+ const int n_output_feature_maps,
+ const int n_input_feature_maps,
+ const int n_rows,
+ const int n_cols,
+ int in_output_feature_map_stride,
+ int in_input_feature_map_stride,
+ int in_row_stride,
+ int out_row_stride,
+ int out_col_stride,
+ int out_input_feature_map_stride
+)
+{
+ // Fill in stride values
+ in_row_stride = (in_row_stride)
+ ? in_row_stride
+ : n_cols;
+ in_input_feature_map_stride = (in_input_feature_map_stride)
+ ? in_input_feature_map_stride
+ : n_rows * in_row_stride;
+ in_output_feature_map_stride = (in_output_feature_map_stride)
+ ? in_output_feature_map_stride
+ : n_input_feature_maps * in_input_feature_map_stride;
+
+ out_input_feature_map_stride = (out_input_feature_map_stride)
+ ? out_input_feature_map_stride
+ : n_output_feature_maps;
+ out_col_stride = (out_col_stride)
+ ? out_col_stride
+ : n_input_feature_maps * out_input_feature_map_stride;
+ out_row_stride = (out_row_stride)
+ ? out_row_stride
+ : n_cols * out_col_stride;
+
+ // Perform the re-ordering
+ for (int i = 0; i < n_rows; i++)
+ {
+ const T* const in_row = in + i * in_row_stride;
+ T* out_row = out + i * out_row_stride;
+
+ for (int j = 0; j < n_cols; j++)
+ {
+ const T* const in_col = in_row + j;
+ T* const out_col = out_row + j * out_col_stride;
+
+ for (int ifm = 0; ifm < n_input_feature_maps; ifm++)
+ {
+ const T* const in_ifm = in_col + ifm * in_input_feature_map_stride;
+ T* const out_ifm = out_col + ifm * out_input_feature_map_stride;
+
+ for (int ofm = 0; ofm < n_output_feature_maps; ofm++)
+ {
+ const T* const in_ofm = in_ifm + ofm * in_output_feature_map_stride;
+ T* const out_ofm = out_ifm + ofm;
+ *(out_ofm) = *(in_ofm);
+ }
+ }
+ }
+ }
+}
+
+/*****************************************************************************/
+/* Generic weight re-order implementation.
+ */
+template <typename T>
+inline void h_w_ifm_ofm_to_ofm_ifm_h_w(
+ const T* const in, // Input in [Height x Width x Input x Output] form
+ T* const out, // Output in [Output x Input x Height x Width] form
+ const int n_rows,
+ const int n_cols,
+ const int n_input_feature_maps,
+ const int n_output_feature_maps,
+ int in_row_stride,
+ int in_col_stride,
+ int in_input_feature_map_stride,
+ int out_output_feature_map_stride,
+ int out_input_feature_map_stride,
+ int out_row_stride
+)
+{
+ // Fill in the stride values
+ in_input_feature_map_stride = (in_input_feature_map_stride)
+ ? in_input_feature_map_stride
+ : n_output_feature_maps;
+ in_col_stride = (in_col_stride)
+ ? in_col_stride
+ : n_input_feature_maps * in_input_feature_map_stride;
+ in_row_stride = (in_row_stride)
+ ? in_row_stride
+ : n_cols * in_col_stride;
+
+ out_row_stride = (out_row_stride)
+ ? out_row_stride
+ : n_cols;
+ out_input_feature_map_stride = (out_input_feature_map_stride)
+ ? out_input_feature_map_stride
+ : n_rows * out_row_stride;
+ out_output_feature_map_stride = (out_output_feature_map_stride)
+ ? out_output_feature_map_stride
+ : n_input_feature_maps * out_input_feature_map_stride;
+
+ // Perform the re-ordering
+ for (int i = 0; i < n_rows; i++)
+ {
+ const T* const in_row = in + i * in_row_stride;
+ T* const out_row = out + i * out_row_stride;
+
+ for (int j = 0; j < n_cols; j++)
+ {
+ const T* const in_col = in_row + j * in_col_stride;
+ T* const out_col = out_row + j;
+
+ for (int ifm = 0; ifm < n_input_feature_maps; ifm++)
+ {
+ const T* const in_ifm = in_col + ifm * in_input_feature_map_stride;
+ T* const out_ifm = out_col + ifm * out_input_feature_map_stride;
+
+ for (int ofm = 0; ofm < n_output_feature_maps; ofm++)
+ {
+ const T* const in_ofm = in_ifm + ofm;
+ T* const out_ofm = out_ifm + ofm * out_output_feature_map_stride;
+ *(out_ofm) = *(in_ofm);
+ }
+ }
+ }
+ }
+}
+
+} // namespace reorder
diff --git a/src/core/NEON/kernels/convolution/common/tensor.hpp b/src/core/NEON/kernels/convolution/common/tensor.hpp
new file mode 100644
index 0000000000..7738cdb349
--- /dev/null
+++ b/src/core/NEON/kernels/convolution/common/tensor.hpp
@@ -0,0 +1,178 @@
+/*
+ * Copyright (c) 2017-2019 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#pragma once
+#include <cstdlib>
+#include <random>
+
+#include "alloc.hpp"
+
+enum TensorOrder
+{
+ NHWC, ///< [Batch x Height x Width x Channels]
+ NCHW, ///< [Batch x Channels x Height x Width]
+};
+
+struct Tensor4DShape
+{
+ int n_batches, n_rows, n_cols, n_channels;
+ TensorOrder ordering;
+
+ // Create a new tensor with the default (NHWC) ordering
+ inline Tensor4DShape(
+ const int n_batches,
+ const int n_rows,
+ const int n_cols,
+ const int n_channels,
+ const TensorOrder ordering=NHWC
+ ) : n_batches(n_batches),
+ n_rows(n_rows),
+ n_cols(n_cols),
+ n_channels(n_channels),
+ ordering(ordering)
+ {
+ }
+
+ inline int index(const int n, const int i, const int j, const int c) const
+ {
+ if (this->ordering == NHWC)
+ {
+ return ((n*this->n_rows + i)*this->n_cols + j)*this->n_channels + c;
+ }
+ else // NCHW
+ {
+ return ((n*this->n_channels + c)*this->n_rows + i)*this->n_cols + j;
+ }
+ }
+
+ inline int size() const
+ {
+ return n_batches * n_rows * n_cols * n_channels;
+ }
+
+ inline bool TestEq(const Tensor4DShape& other) const
+ {
+ return (n_batches == other.n_batches &&
+ n_rows == other.n_rows &&
+ n_cols == other.n_cols &&
+ n_channels == other.n_channels);
+ }
+};
+
+
+enum WeightOrder
+{
+ HWIO, ///< [Height x Width x Input channels x Output channels]
+ OIHW, ///< [Output channels x Input channels x Height x Width]
+};
+
+struct KernelShape
+{
+ int n_output_channels, n_rows, n_cols, n_input_channels;
+ WeightOrder ordering;
+
+ inline KernelShape(
+ const int n_output_channels,
+ const int n_rows,
+ const int n_cols,
+ const int n_input_channels,
+ const WeightOrder ordering=HWIO
+ ) : n_output_channels(n_output_channels),
+ n_rows(n_rows),
+ n_cols(n_cols),
+ n_input_channels(n_input_channels),
+ ordering(ordering)
+ {
+ }
+
+ inline int index(int oc, int i, int j, int ic) const
+ {
+ if (this->ordering == HWIO)
+ {
+ return ((i*this->n_cols + j)*this->n_input_channels + ic)*this->n_output_channels + oc;
+ }
+ else // OIHW
+ {
+ return ((oc*this->n_input_channels + ic)*this->n_rows + i)*this->n_cols + j;
+ }
+ }
+
+ inline int size(void) const
+ {
+ return n_output_channels * n_rows * n_cols * n_input_channels;
+ }
+};
+
+
+template <typename ShapeT, typename T>
+class Tensor4D final
+{
+ public:
+ Tensor4D(ShapeT shape) :
+ shape(shape),
+ _data(reinterpret_cast<T*>(ALLOCATE(size_bytes())))
+ {
+ Clear();
+ }
+
+ Tensor4D(const Tensor4D<ShapeT, T>&) = delete;
+ Tensor4D operator=(const Tensor4D<ShapeT, T>&) = delete;
+
+ ~Tensor4D() {
+ free(_data);
+ }
+
+ inline T* ptr() const {
+ return _data;
+ }
+
+ inline size_t size_bytes() const {
+ return shape.size() * sizeof(T);
+ }
+
+ /* Extract an element of the tensor.
+ *
+ * If the shape is a Tensor4DShape then the index is given as batch, row,
+ * column and channel. If the shape is a KernelShape then the index is
+ * given as output channel, row, column and input channel.
+ */
+ inline T& element(const int a, const int b, const int c, const int d) const
+ {
+ return _data[shape.index(a, b, c, d)];
+ }
+
+ inline void Clear() {
+ Fill(static_cast<T>(0));
+ }
+
+ inline void Fill(T val) {
+ for (int i = 0; i < shape.size(); i++)
+ _data[i] = val;
+ }
+
+ const ShapeT shape;
+
+ private:
+ T* const _data;
+};
diff --git a/src/core/NEON/kernels/convolution/common/tensor_utils.hpp b/src/core/NEON/kernels/convolution/common/tensor_utils.hpp
new file mode 100644
index 0000000000..82619f4799
--- /dev/null
+++ b/src/core/NEON/kernels/convolution/common/tensor_utils.hpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2017 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#pragma once
+#include "tensor.hpp"
+
+// Methods to print tensors and weights
+void PrintTensor(const Tensor4D<Tensor4DShape, float>& tensor);
+void PrintWeights(const Tensor4D<KernelShape, float>& weights);
+
+// Test the equivalence of two tensors
+// Counts the instances that |a - b|/|a| > max_err
+bool CmpTensors(
+ const Tensor4D<Tensor4DShape, float>& a,
+ const Tensor4D<Tensor4DShape, float>& b,
+ const float max_err=0.0f
+);
+
+// Fill the tensor with a test pattern
+void TestPattern(Tensor4D<Tensor4DShape, float>& tensor);
+void TestPattern(Tensor4D<KernelShape, float>& weights);
+
+// Fill the tensor with random values
+void Randomise(Tensor4D<Tensor4DShape, float>& tensor, const int seed=0);
+void Randomise(Tensor4D<KernelShape, float>& weights, const int seed=0);
diff --git a/src/core/NEON/kernels/convolution/common/utils.hpp b/src/core/NEON/kernels/convolution/common/utils.hpp
new file mode 100644
index 0000000000..b7a9517c65
--- /dev/null
+++ b/src/core/NEON/kernels/convolution/common/utils.hpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2017-2018 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#pragma once
+
+#include <limits>
+
+void PrintMatrix(const float *const m, const int M, const int N, const int row_stride);
+
+constexpr inline int iceildiv(const int a, const int b)
+{
+ return (a + b - 1) / b;
+}
+
+template <typename T>
+inline T roundup(const T a, const T b)
+{
+ return b * iceildiv(a, b);
+}
+
+template<typename T>
+struct TypeBounds
+{
+ static constexpr T lower() noexcept { return std::numeric_limits<T>::has_infinity
+ ? -std::numeric_limits<T>::infinity()
+ : std::numeric_limits<T>::lowest(); };
+ static constexpr T upper() noexcept { return std::numeric_limits<T>::has_infinity
+ ? std::numeric_limits<T>::infinity()
+ : std::numeric_limits<T>::max(); };
+};
+
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+template<>
+struct TypeBounds<__fp16>
+{
+ static constexpr __fp16 lower() noexcept { return -std::numeric_limits<float>::infinity(); };
+ static constexpr __fp16 upper() noexcept { return std::numeric_limits<float>::infinity(); }
+};
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
diff --git a/src/core/NEON/kernels/convolution/depthwise/depthwise.hpp b/src/core/NEON/kernels/convolution/depthwise/depthwise.hpp
new file mode 100644
index 0000000000..70d6689731
--- /dev/null
+++ b/src/core/NEON/kernels/convolution/depthwise/depthwise.hpp
@@ -0,0 +1,551 @@
+/*
+ * Copyright (c) 2018-2019 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#pragma once
+
+#include <arm_neon.h>
+#include "activation.hpp"
+#include "padding.hpp"
+
+namespace depthwise
+{
+
+namespace nck = neon_convolution_kernels;
+
+class IDepthwiseConvolution
+{
+ public:
+ virtual ~IDepthwiseConvolution() = default;
+
+ virtual int output_size(
+ int dim_size,
+ unsigned int padding_before,
+ unsigned int padding_after
+ ) const = 0;
+
+ /* Set input tensor and stride. */
+ virtual void set_input(const void *inptr) = 0;
+ virtual void set_input(const void *inptr, int column_stride) = 0;
+ virtual void set_input(const void *inptr, int row_stride, int column_stride) = 0;
+ virtual void set_input(const void *inptr, int batch_stride, int row_stride, int column_stride) = 0;
+
+ /* Set output tensor and stride. */
+ virtual void set_output(void *outptr) = 0;
+ virtual void set_output(void *outptr, int column_stride) = 0;
+ virtual void set_output(void *outptr, int row_stride, int column_stride) = 0;
+ virtual void set_output(void *outptr, int batch_stride, int row_stride, int column_stride) = 0;
+
+ /* Weights and biases are re-ordered to improve memory access patterns. Use
+ * these methods to determine the size of the re-pack buffer and to set the
+ * address (and implicitly reorder the weights and biases into) the buffer.
+ */
+ virtual size_t get_packed_params_size(void) const = 0;
+ virtual void set_packed_params_buffer(void *) = 0;
+
+ virtual void pack_params(const void *weights, const void *biases=nullptr) const = 0;
+ virtual void pack_params(void *buffer, const void *weights, const void *biases=nullptr) const = 0;
+ virtual void pack_params(
+ void *buffer,
+ const void* weights,
+ unsigned int weight_row_stride,
+ unsigned int weight_col_stride,
+ const void *biases=nullptr
+ ) const = 0;
+
+ /* Working space is used to pad tensors on the fly. Before running any
+ * inference check the amount of space required, allocate and provide a
+ * pointer to the convolution engine.
+ */
+ virtual size_t get_working_space_size(unsigned int nthreads=1) const = 0;
+ virtual void set_working_space(void *) = 0;
+
+ virtual unsigned int get_window(void) const = 0;
+ virtual void run(
+ unsigned int start,
+ unsigned int stop,
+ unsigned int threadid=0
+ ) = 0;
+};
+
+template <
+ unsigned int OutputTileRows, unsigned int OutputTileCols,
+ unsigned int KernelRows, unsigned int KernelCols,
+ unsigned int StrideRows, unsigned int StrideCols,
+ typename TIn, typename TBias, typename TOut,
+ typename Derived
+>
+class DepthwiseConvolutionBase : public IDepthwiseConvolution
+{
+ public:
+ // Information about the specific convolution instance
+ using InputType = TIn;
+ using BiasType = TBias;
+ using OutputType = TOut;
+ static constexpr int output_tile_rows = OutputTileRows;
+ static constexpr int output_tile_cols = OutputTileCols;
+ static constexpr int kernel_rows = KernelRows;
+ static constexpr int kernel_cols = KernelCols;
+ static constexpr int stride_rows = StrideRows;
+ static constexpr int stride_cols = StrideCols;
+ static constexpr int inner_tile_rows = stride_rows * (output_tile_rows - 1) + kernel_rows;
+ static constexpr int inner_tile_cols = stride_cols * (output_tile_cols - 1) + kernel_cols;
+
+ /** Create a new depthwise convolution engine.
+ *
+ * @param[in] n_batches Number of batches tensors.
+ * @param[in] n_input_rows Number of rows in input tensor.
+ * @param[in] n_input_cols Number of columns in input tensor.
+ * @param[in] n_channels Number of channels in input and output tensors.
+ */
+ DepthwiseConvolutionBase(
+ int n_batches, int n_input_rows, int n_input_cols, int n_channels,
+ nck::ActivationFunction activation,
+ unsigned int padding_top,
+ unsigned int padding_left,
+ unsigned int padding_bottom,
+ unsigned int padding_right
+ );
+
+ /** Create a new depthwise convolution engine.
+ *
+ * @param[in] n_batches Number of batches tensors.
+ * @param[in] n_input_rows Number of rows in input tensor.
+ * @param[in] n_input_cols Number of columns in input tensor.
+ * @param[in] n_channels Number of channels in input and output tensors.
+ */
+ DepthwiseConvolutionBase(
+ int n_batches, int n_input_rows, int n_input_cols, int n_channels,
+ int n_output_rows, int n_output_cols,
+ nck::ActivationFunction activation,
+ unsigned int padding_top,
+ unsigned int padding_left,
+ unsigned int padding_bottom,
+ unsigned int padding_right
+ );
+
+ // Cannot copy or move a DepthwiseConvolution.
+ DepthwiseConvolutionBase(DepthwiseConvolutionBase&) = delete;
+ DepthwiseConvolutionBase operator=(DepthwiseConvolutionBase&) = delete;
+
+ /* Set input tensor and stride. */
+ void set_input(const void *inptr) override;
+ void set_input(const void *inptr, int column_stride) override;
+ void set_input(const void *inptr, int row_stride, int column_stride) override;
+ void set_input(const void *inptr, int batch_stride, int row_stride, int column_stride) override;
+
+ /* Set output tensor and stride. */
+ void set_output(void *outptr) override;
+ void set_output(void *outptr, int column_stride) override;
+ void set_output(void *outptr, int row_stride, int column_stride) override;
+ void set_output(void *outptr, int batch_stride, int row_stride, int column_stride) override;
+
+ /** Get the number of output rows/columns.
+ *
+ * @param[in] dim_size Number of elements in the dimension (rows/columns)
+ * @param[in] same_padding True if the padding is SAME, otherwise false.
+ */
+ static int get_output_size(
+ int dim_size, unsigned int padding_before, unsigned int padding_after
+ );
+
+ int output_size(
+ int dim_size, unsigned int padding_before, unsigned int padding_after
+ ) const override;
+
+ /* Determine how much memory is required to store the packed weights and
+ * biases.
+ */
+ size_t get_packed_params_size(void) const override;
+
+ /* Set the buffer for the packed weights and biases, and perform the
+ * packing.
+ */
+ void set_packed_params_buffer(void *buffer) override;
+
+ void pack_params(const void *weights, const void *biases=nullptr) const override;
+
+ void pack_params(
+ void *buffer,
+ const void *weights,
+ const void *biases=nullptr
+ ) const override;
+
+ void pack_params(
+ void *buffer,
+ const void *weights,
+ unsigned int weight_row_stride,
+ unsigned int weight_col_stride,
+ const void *biases=nullptr
+ ) const override;
+
+ /** Query the amount of working space required.
+ * @param[in] The largest number of threads which will be used to execute
+ * the kernel.
+ */
+ size_t get_working_space_size(unsigned int n_threads=1) const override;
+
+ /** Set the working space buffer.
+ */
+ void set_working_space(void *buffer) override;
+
+ /** Get the window of work to be performed by an instance of the operator.
+ */
+ unsigned int get_window(void) const override;
+
+ /** Perform a portion of the work associated with the operator.
+ *
+ * Will perform the window of work described by $[start, stop)$.
+ *
+ * @param[in] start Start of the window of work to perform.
+ * @param[in] stop End of the work to perform.
+ * @param[in] ID of the thread performing the work.
+ */
+ void run(
+ unsigned int start,
+ unsigned int stop,
+ unsigned int threadid=0
+ ) override;
+
+ protected:
+ /** Get the value to use to pad the tensor.
+ */
+ TIn _input_padding_value(void) const;
+
+ /** Implementation of the parameter packing.
+ */
+ void _pack_params(
+ void *buffer,
+ const void *weights,
+ unsigned int weight_row_stride,
+ unsigned int weight_col_stride,
+ const void *biases=nullptr
+ ) const;
+
+ /** Process a tile-row of the tensors.
+ */
+ void process_tile_row(
+ unsigned int threadid,
+ int n_channels,
+ const void* packed_params,
+ const InputType* inptr,
+ OutputType* outptr,
+ int row_pad_in_top,
+ int row_pad_in_left,
+ int row_pad_in_bottom,
+ int row_pad_out_bottom,
+ int n_tiles,
+ int n_input_cols,
+ int n_output_cols
+ );
+
+ /** Process a single tile of the tensor.
+ *
+ * This method will apply input/output padding (if required) and call the
+ * depthwise tile implementation.
+ */
+ void process_tile(
+ unsigned int threadid,
+ int n_channels,
+ const void* packed_params,
+ const InputType* inptr,
+ OutputType* outptr,
+ int pad_in_top,
+ int pad_in_left,
+ int pad_in_bottom,
+ int pad_in_right,
+ int pad_out_bottom,
+ int pad_out_right
+ );
+
+ /** Perform depthwise convolution on a single tile.
+ */
+ template <nck::ActivationFunction Activation>
+ void execute_tile(
+ int n_channels,
+ const void* packed_params,
+ const InputType* inptr,
+ unsigned int in_row_stride,
+ unsigned int in_col_stride,
+ OutputType* outptr,
+ unsigned int out_row_stride,
+ unsigned int out_col_stride
+ );
+
+ template <nck::ActivationFunction Activation>
+ void execute_tile(
+ int n_channels,
+ const void* packed_params,
+ const InputType* inptrs[inner_tile_rows][inner_tile_cols],
+ OutputType* outptrs[output_tile_rows][output_tile_cols]
+ );
+
+ int n_channels(void) const;
+
+ private:
+ // Member variables of instances of a convolution engine.
+ const InputType* _input;
+ OutputType* _output;
+ void* _packed_parameters;
+ void* _working_space; // Per-thread working space
+ const int _n_batches, _n_input_rows, _n_input_cols, _n_channels,
+ _n_output_rows, _n_output_cols, _n_tile_rows, _n_tile_cols;
+ const unsigned int _padding_top, _padding_left, _padding_bottom, _padding_right;
+ const nck::ActivationFunction _activation;
+
+ // Stride information for a convolution instance
+ int _input_col_stride, _input_row_stride, _input_batch_stride;
+ int _output_col_stride, _output_row_stride, _output_batch_stride;
+
+ // Methods for getting access to working space
+ size_t _get_input_working_space_size(void) const;
+ size_t _get_output_working_space_size(void) const;
+
+ void *_get_input_working_space(unsigned int threadid) const;
+ void *_get_output_working_space(unsigned int threadid) const;
+};
+
+
+template <
+ unsigned int OutputTileRows, unsigned int OutputTileCols,
+ unsigned int KernelRows, unsigned int KernelCols,
+ unsigned int StrideRows, unsigned int StrideCols,
+ typename TIn, typename TBias, typename TOut
+>
+class DepthwiseConvolution : public DepthwiseConvolutionBase<
+ OutputTileRows, OutputTileCols,
+ KernelRows, KernelCols,
+ StrideRows, StrideCols,
+ TIn, TBias, TOut,
+ DepthwiseConvolution<
+ OutputTileRows, OutputTileCols,
+ KernelRows, KernelCols,
+ StrideRows, StrideCols,
+ TIn, TBias, TOut
+ >
+>
+{
+ using Base = DepthwiseConvolutionBase<
+ OutputTileRows, OutputTileCols,
+ KernelRows, KernelCols,
+ StrideRows, StrideCols,
+ TIn, TBias, TOut,
+ DepthwiseConvolution<
+ OutputTileRows, OutputTileCols,
+ KernelRows, KernelCols,
+ StrideRows, StrideCols,
+ TIn, TBias, TOut
+ > >;
+ friend Base;
+ using InputType = typename Base::InputType;
+ using OutputType = typename Base::OutputType;
+
+ public:
+ using Base::DepthwiseConvolutionBase;
+
+ protected:
+ template <nck::ActivationFunction Activation>
+ void execute_tile(
+ int n_channels,
+ const void* packed_params,
+ const TIn* inptr,
+ unsigned int in_row_stride,
+ unsigned int in_col_stride,
+ TOut* outptr,
+ unsigned int out_row_stride,
+ unsigned int out_col_stride
+ );
+
+ template <nck::ActivationFunction Activation>
+ void execute_tile(
+ int n_channels,
+ const void* packed_params,
+ const InputType* inptrs[Base::inner_tile_rows][Base::inner_tile_cols],
+ OutputType* outptrs[Base::output_tile_rows][Base::output_tile_cols]
+ );
+};
+
+
+template <
+ unsigned int OutputTileRows, unsigned int OutputTileCols,
+ unsigned int KernelRows, unsigned int KernelCols,
+ unsigned int StrideRows, unsigned int StrideCols
+>
+class DepthwiseConvolution<
+ OutputTileRows, OutputTileCols,
+ KernelRows, KernelCols,
+ StrideRows, StrideCols,
+ float, float, float
+> : public DepthwiseConvolutionBase<
+ OutputTileRows, OutputTileCols,
+ KernelRows, KernelCols,
+ StrideRows, StrideCols,
+ float, float, float,
+ DepthwiseConvolution<
+ OutputTileRows, OutputTileCols,
+ KernelRows, KernelCols,
+ StrideRows, StrideCols,
+ float, float, float
+ >
+>
+{
+ using Base = DepthwiseConvolutionBase<
+ OutputTileRows, OutputTileCols,
+ KernelRows, KernelCols,
+ StrideRows, StrideCols,
+ float, float, float,
+ DepthwiseConvolution<
+ OutputTileRows, OutputTileCols,
+ KernelRows, KernelCols,
+ StrideRows, StrideCols,
+ float, float, float
+ > >;
+ friend Base;
+ using InputType = typename Base::InputType;
+ using OutputType = typename Base::OutputType;
+
+ public:
+ DepthwiseConvolution(
+ int n_batches, int n_input_rows, int n_input_cols, int n_channels,
+ nck::ActivationFunction activation,
+ unsigned int padding_top,
+ unsigned int padding_left,
+ unsigned int padding_bottom,
+ unsigned int padding_right
+ );
+
+ DepthwiseConvolution(
+ int n_batches, int n_input_rows, int n_input_cols, int n_channels,
+ int n_output_rows, int n_output_cols,
+ nck::ActivationFunction activation,
+ unsigned int padding_top,
+ unsigned int padding_left,
+ unsigned int padding_bottom,
+ unsigned int padding_right
+ );
+
+ protected:
+ template <nck::ActivationFunction Activation>
+ void execute_tile(
+ int n_channels,
+ const void* packed_params,
+ const float* inptr,
+ unsigned int in_row_stride,
+ unsigned int in_col_stride,
+ float* outptr,
+ unsigned int out_row_stride,
+ unsigned int out_col_stride
+ );
+
+ template <nck::ActivationFunction Activation>
+ void execute_tile(
+ int n_channels,
+ const void* packed_params,
+ const float* inptrs[Base::inner_tile_rows][Base::inner_tile_cols],
+ float* outptrs[Base::output_tile_rows][Base::output_tile_cols]
+ );
+};
+
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+template <
+ unsigned int OutputTileRows, unsigned int OutputTileCols,
+ unsigned int KernelRows, unsigned int KernelCols,
+ unsigned int StrideRows, unsigned int StrideCols
+>
+class DepthwiseConvolution<
+ OutputTileRows, OutputTileCols,
+ KernelRows, KernelCols,
+ StrideRows, StrideCols,
+ float16_t, float16_t, float16_t
+> : public DepthwiseConvolutionBase<
+ OutputTileRows, OutputTileCols,
+ KernelRows, KernelCols,
+ StrideRows, StrideCols,
+ float16_t, float16_t, float16_t,
+ DepthwiseConvolution<
+ OutputTileRows, OutputTileCols,
+ KernelRows, KernelCols,
+ StrideRows, StrideCols,
+ float16_t, float16_t, float16_t
+ >
+>
+{
+ using Base = DepthwiseConvolutionBase<
+ OutputTileRows, OutputTileCols,
+ KernelRows, KernelCols,
+ StrideRows, StrideCols,
+ float16_t, float16_t, float16_t,
+ DepthwiseConvolution<
+ OutputTileRows, OutputTileCols,
+ KernelRows, KernelCols,
+ StrideRows, StrideCols,
+ float16_t, float16_t, float16_t
+ > >;
+ friend Base;
+ using InputType = typename Base::InputType;
+ using OutputType = typename Base::OutputType;
+
+ public:
+ DepthwiseConvolution(
+ int n_batches, int n_input_rows, int n_input_cols, int n_channels,
+ nck::ActivationFunction activation,
+ unsigned int padding_top,
+ unsigned int padding_left,
+ unsigned int padding_bottom,
+ unsigned int padding_right
+ );
+
+ DepthwiseConvolution(
+ int n_batches, int n_input_rows, int n_input_cols, int n_channels,
+ int n_output_rows, int n_output_cols,
+ nck::ActivationFunction activation,
+ unsigned int padding_top,
+ unsigned int padding_left,
+ unsigned int padding_bottom,
+ unsigned int padding_right
+ );
+
+ protected:
+ template <nck::ActivationFunction Activation>
+ void execute_tile(
+ int n_channels,
+ const void* packed_params,
+ const float16_t* inptr,
+ unsigned int in_row_stride,
+ unsigned int in_col_stride,
+ float16_t* outptr,
+ unsigned int out_row_stride,
+ unsigned int out_col_stride
+ );
+
+ template <nck::ActivationFunction Activation>
+ void execute_tile(
+ int n_channels,
+ const void* packed_params,
+ const float16_t* inptrs[Base::inner_tile_rows][Base::inner_tile_cols],
+ float16_t* outptrs[Base::output_tile_rows][Base::output_tile_cols]
+ );
+};
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+
+} // namespace depthwise
diff --git a/src/core/NEON/kernels/convolution/depthwise/depthwise_dilated.hpp b/src/core/NEON/kernels/convolution/depthwise/depthwise_dilated.hpp
new file mode 100644
index 0000000000..1bae815613
--- /dev/null
+++ b/src/core/NEON/kernels/convolution/depthwise/depthwise_dilated.hpp
@@ -0,0 +1,156 @@
+/*
+ * Copyright (c) 2019 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#pragma once
+
+#include <deque>
+#include <functional>
+#include <memory>
+
+#include "depthwise.hpp"
+
+namespace depthwise
+{
+
+template <
+ unsigned int OutputTileRows, unsigned int OutputTileCols,
+ unsigned int KernelRows, unsigned int KernelCols,
+ unsigned int StrideRows, unsigned int StrideCols,
+ typename TIn, typename TBias, typename TOut
+>
+class DilatedDepthwiseConvolution : public IDepthwiseConvolution
+{
+ public:
+ /** Create a new dilated depthwise convolution engine.
+ */
+ DilatedDepthwiseConvolution(
+ int n_batches, int n_input_rows, int n_input_cols, int n_channels,
+ int dilation_factor,
+ nck::ActivationFunction activation,
+ unsigned int padding_top,
+ unsigned int padding_left,
+ unsigned int padding_bottom,
+ unsigned int padding_right
+ );
+
+ /** Create a new dilated depthwise convolution engine.
+ */
+ DilatedDepthwiseConvolution(
+ int n_batches, int n_input_rows, int n_input_cols, int n_channels,
+ int dilation_factor, int n_output_rows, int n_output_cols,
+ nck::ActivationFunction activation,
+ unsigned int padding_top,
+ unsigned int padding_left,
+ unsigned int padding_bottom,
+ unsigned int padding_right
+ );
+
+ // Cannot copy or move a DilatedDepthwiseConvolution.
+ DilatedDepthwiseConvolution(DilatedDepthwiseConvolution&) = delete;
+ DilatedDepthwiseConvolution operator=(DilatedDepthwiseConvolution&) = delete;
+
+ /* Set input tensor and stride. */
+ void set_input(const void *inptr) override;
+ void set_input(const void *inptr, int column_stride) override;
+ void set_input(const void *inptr, int row_stride, int column_stride) override;
+ void set_input(const void *inptr, int batch_stride, int row_stride, int column_stride) override;
+
+ /* Set output tensor and stride. */
+ void set_output(void *outptr) override;
+ void set_output(void *outptr, int column_stride) override;
+ void set_output(void *outptr, int row_stride, int column_stride) override;
+ void set_output(void *outptr, int batch_stride, int row_stride, int column_stride) override;
+
+ static int get_output_size(
+ int dim_size,
+ unsigned int padding_before,
+ unsigned int padding_after,
+ int dilation_factor
+ );
+
+ int output_size(
+ int dim_size, unsigned int padding_before, unsigned int padding_after
+ ) const override;
+
+ /* Weights and biases are re-ordered to improve memory access patterns. Use
+ * these methods to determine the size of the re-pack buffer and to set the
+ * address (and implicitly reorder the weights and biases into) the buffer.
+ */
+ size_t get_packed_params_size(void) const override;
+ void set_packed_params_buffer(void *) override;
+
+ void pack_params(const void *weights, const void *biases=nullptr) const override;
+ void pack_params(void *buffer, const void *weights, const void *biases=nullptr) const override;
+ void pack_params(
+ void *buffer,
+ const void* weights,
+ unsigned int weight_row_stride,
+ unsigned int weight_col_stride,
+ const void *biases=nullptr
+ ) const override;
+
+ /* Working space is used to pad tensors on the fly. Before running any
+ * inference check the amount of space required, allocate and provide a
+ * pointer to the convolution engine.
+ */
+ size_t get_working_space_size(unsigned int nthreads=1) const override;
+ void set_working_space(void *) override;
+
+ unsigned int get_window(void) const override;
+ void run(unsigned int start, unsigned int stop, unsigned int threadid=0) override;
+
+ protected:
+ /** Protected constructor which also accepts a function to construct a new
+ * subconvolution
+ */
+ DilatedDepthwiseConvolution(
+ int n_batches, int n_input_rows, int n_input_cols, int n_channels,
+ int dilation_factor, int n_output_rows, int n_output_cols,
+ nck::ActivationFunction activation,
+ unsigned int padding_top,
+ unsigned int padding_left,
+ unsigned int padding_bottom,
+ unsigned int padding_right,
+ std::function<IDepthwiseConvolution *(int, int, int, int, int, int, nck::ActivationFunction, unsigned int, unsigned int, unsigned int, unsigned int)> subconvfn
+ );
+
+ const int _dilation_factor;
+ const int _n_input_rows, _n_input_cols, _n_channels;
+ const int _padding_top, _padding_left;
+ const int _n_output_rows, _n_output_cols;
+
+ /* Dilated depthwise convolution is performed through repeated calls to
+ * non-dilated convolutions. If the dilation factor is $n$, then we perform
+ * $(n + 1)^2$ depthwise convolutions.
+ */
+ using BaseDepthwise = DepthwiseConvolution<
+ OutputTileRows, OutputTileCols,
+ KernelRows, KernelCols,
+ StrideRows, StrideCols,
+ TIn, TBias, TOut
+ >;
+ std::deque<std::deque<std::unique_ptr<IDepthwiseConvolution>>> _convs;
+};
+
+} // namespace depthwise
diff --git a/src/core/NEON/kernels/convolution/depthwise/depthwise_quantized.hpp b/src/core/NEON/kernels/convolution/depthwise/depthwise_quantized.hpp
new file mode 100644
index 0000000000..4343f6ad45
--- /dev/null
+++ b/src/core/NEON/kernels/convolution/depthwise/depthwise_quantized.hpp
@@ -0,0 +1,291 @@
+/*
+ * Copyright (c) 2018-2019 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#pragma once
+#include "depthwise.hpp"
+#include "qasymm8.hpp"
+#include "qsymm8.hpp"
+#pragma once
+
+using namespace neon_convolution_kernels;
+using namespace qasymm8;
+
+inline int32x4_t saturating_doubling_high_mul(const int32x4_t& a, const int32x4_t& b)
+{
+ return vqrdmulhq_s32(a, b);
+}
+
+inline int32x4_t saturating_doubling_high_mul(const int32x4_t& a, const int32_t& b)
+{
+ return vqrdmulhq_n_s32(a, b);
+}
+
+inline int32_t saturating_doubling_high_mul(const int32_t& a, const int32_t& b)
+{
+ return vget_lane_s32(vqrdmulh_n_s32(vdup_n_s32(a), b), 0);
+}
+
+inline int32x4_t rounding_divide_by_exp2(const int32x4_t& x, const int32x4_t shift)
+{
+ const int32x4_t fixup = vshrq_n_s32(vandq_s32(x, shift), 31);
+ const int32x4_t fixed = vqaddq_s32(x, fixup);
+ return vrshlq_s32(fixed, shift);
+}
+
+inline int32x4_t rounding_divide_by_exp2(const int32x4_t& x, const int exponent)
+{
+ const int32x4_t shift = vdupq_n_s32(-exponent);
+ const int32x4_t fixup = vshrq_n_s32(vandq_s32(x, shift), 31);
+ const int32x4_t fixed = vqaddq_s32(x, fixup);
+ return vrshlq_s32(fixed, shift);
+}
+
+inline int32x2_t rounding_divide_by_exp2(const int32x2_t& x, const int exponent)
+{
+ const int32x2_t shift = vdup_n_s32(-exponent);
+ const int32x2_t fixup = vshr_n_s32(vand_s32(x, shift), 31);
+ const int32x2_t fixed = vqadd_s32(x, fixup);
+ return vrshl_s32(fixed, shift);
+}
+
+inline int32_t rounding_divide_by_exp2(const int32_t& x, const int exponent)
+{
+ const int32x2_t xs = vdup_n_s32(x);
+ return vget_lane_s32(rounding_divide_by_exp2(xs, exponent), 0);
+}
+
+namespace depthwise
+{
+
+namespace nck = neon_convolution_kernels;
+
+template <
+ unsigned int OutputTileRows, unsigned int OutputTileCols,
+ unsigned int KernelRows, unsigned int KernelCols,
+ unsigned int StrideRows, unsigned int StrideCols
+>
+class QAsymm8DepthwiseConvolution : public DepthwiseConvolutionBase<
+ OutputTileRows, OutputTileCols,
+ KernelRows, KernelCols,
+ StrideRows, StrideCols,
+ uint8_t, int32_t, uint8_t,
+ QAsymm8DepthwiseConvolution<OutputTileRows, OutputTileCols, KernelRows, KernelCols, StrideRows, StrideCols>
+>
+{
+ using Base = DepthwiseConvolutionBase<
+ OutputTileRows, OutputTileCols,
+ KernelRows, KernelCols,
+ StrideRows, StrideCols,
+ uint8_t, int32_t, uint8_t,
+ QAsymm8DepthwiseConvolution<OutputTileRows, OutputTileCols, KernelRows, KernelCols, StrideRows, StrideCols>
+ >;
+ friend Base;
+ using InputType = typename Base::InputType;
+ using OutputType = typename Base::OutputType;
+
+ public:
+ QAsymm8DepthwiseConvolution(
+ int n_batches, int n_input_rows, int n_input_cols, int n_channels,
+ nck::ActivationFunction activation,
+ const qasymm8::QAsymm8Params& weight_quantisation,
+ const qasymm8::QAsymm8Params& input_quantisation,
+ const qasymm8::QAsymm8Params& output_quantisation,
+ unsigned int padding_top,
+ unsigned int padding_left,
+ unsigned int padding_bottom,
+ unsigned int padding_right
+ );
+
+ QAsymm8DepthwiseConvolution(
+ int n_batches, int n_input_rows, int n_input_cols, int n_channels,
+ int n_output_rows, int n_output_cols,
+ nck::ActivationFunction activation,
+ const qasymm8::QAsymm8Params& weight_quantisation,
+ const qasymm8::QAsymm8Params& input_quantisation,
+ const qasymm8::QAsymm8Params& output_quantisation,
+ unsigned int padding_top,
+ unsigned int padding_left,
+ unsigned int padding_bottom,
+ unsigned int padding_right
+ );
+
+ QAsymm8DepthwiseConvolution(
+ int n_batches, int n_input_rows, int n_input_cols, int n_channels,
+ nck::ActivationFunction activation,
+ const qasymm8::QAsymm8Params& weight_quantisation,
+ const qasymm8::QAsymm8Params& input_quantisation,
+ const qasymm8::QAsymm8Params& output_quantisation,
+ const qasymm8::QAsymm8RescaleParams& rescale_parameters,
+ unsigned int padding_top,
+ unsigned int padding_left,
+ unsigned int padding_bottom,
+ unsigned int padding_right
+ );
+
+ QAsymm8DepthwiseConvolution(
+ int n_batches, int n_input_rows, int n_input_cols, int n_channels,
+ int n_output_rows, int n_output_cols,
+ nck::ActivationFunction activation,
+ const qasymm8::QAsymm8Params& weight_quantisation,
+ const qasymm8::QAsymm8Params& input_quantisation,
+ const qasymm8::QAsymm8Params& output_quantisation,
+ const qasymm8::QAsymm8RescaleParams& rescale_parameters,
+ unsigned int padding_top,
+ unsigned int padding_left,
+ unsigned int padding_bottom,
+ unsigned int padding_right
+ );
+
+ protected:
+ uint8_t _input_padding_value(void) const;
+
+ void _pack_params(
+ void *buffer,
+ const void *weights,
+ unsigned int weight_row_stride,
+ unsigned int weight_col_stride,
+ const void *biases=nullptr
+ ) const;
+
+ template <nck::ActivationFunction Activation>
+ void execute_tile(
+ int n_channels,
+ const void* packed_params,
+ const uint8_t* inptr,
+ unsigned int in_row_stride,
+ unsigned int in_col_stride,
+ uint8_t* outptr,
+ unsigned int out_row_stride,
+ unsigned int out_col_stride
+ );
+
+ template <nck::ActivationFunction Activation>
+ void execute_tile(
+ int n_channels,
+ const void* packed_params,
+ const uint8_t* inptrs[Base::inner_tile_rows][Base::inner_tile_cols],
+ uint8_t* outptrs[Base::output_tile_rows][Base::output_tile_cols]
+ );
+
+ private:
+ // Quantization parameters
+ const qasymm8::QAsymm8Params _weights_quant, _inputs_quant, _output_quant;
+ const qasymm8::QAsymm8RescaleParams rescale_parameters;
+};
+
+template <
+ unsigned int OutputTileRows, unsigned int OutputTileCols,
+ unsigned int KernelRows, unsigned int KernelCols,
+ unsigned int StrideRows, unsigned int StrideCols
+>
+class QSymm8HybridPerChannelDepthwiseConvolution : public DepthwiseConvolutionBase<
+ OutputTileRows, OutputTileCols,
+ KernelRows, KernelCols,
+ StrideRows, StrideCols,
+ uint8_t, int32_t, uint8_t,
+ QSymm8HybridPerChannelDepthwiseConvolution<OutputTileRows, OutputTileCols, KernelRows, KernelCols, StrideRows, StrideCols>
+>
+{
+ using Base = DepthwiseConvolutionBase<
+ OutputTileRows, OutputTileCols,
+ KernelRows, KernelCols,
+ StrideRows, StrideCols,
+ uint8_t, int32_t, uint8_t,
+ QSymm8HybridPerChannelDepthwiseConvolution<OutputTileRows, OutputTileCols, KernelRows, KernelCols, StrideRows, StrideCols>
+ >;
+ friend Base;
+ using InputType = typename Base::InputType;
+ using OutputType = typename Base::OutputType;
+
+ public:
+ QSymm8HybridPerChannelDepthwiseConvolution(
+ int n_batches, int n_input_rows, int n_input_cols, int n_channels,
+ nck::ActivationFunction activation,
+ const qsymm8::QSymm8PerChannelParams& weight_quantisation,
+ const qasymm8::QAsymm8Params& input_quantisation,
+ const qasymm8::QAsymm8Params& output_quantisation,
+ unsigned int padding_top,
+ unsigned int padding_left,
+ unsigned int padding_bottom,
+ unsigned int padding_right
+ );
+
+ QSymm8HybridPerChannelDepthwiseConvolution(
+ int n_batches, int n_input_rows, int n_input_cols, int n_channels,
+ nck::ActivationFunction activation,
+ const qsymm8::QSymm8PerChannelParams& weight_quantisation,
+ const qasymm8::QAsymm8Params& input_quantisation,
+ const qasymm8::QAsymm8Params& output_quantisation,
+ const qsymm8::QSymm8PerChannelRescaleParams& rescale_parameters,
+ unsigned int padding_top,
+ unsigned int padding_left,
+ unsigned int padding_bottom,
+ unsigned int padding_right
+ );
+
+ size_t get_packed_params_size(void) const override
+ {
+ return this->n_channels() * (sizeof(int8_t)*KernelRows*KernelCols + 3*sizeof(int32_t));
+
+ }
+
+ protected:
+ uint8_t _input_padding_value(void) const;
+
+ void _pack_params(
+ void *buffer,
+ const void *weights,
+ unsigned int weight_row_stride,
+ unsigned int weight_col_stride,
+ const void *biases=nullptr
+ ) const;
+
+ template <nck::ActivationFunction Activation>
+ void execute_tile(
+ int n_channels,
+ const void* packed_params,
+ const uint8_t* inptr,
+ unsigned int in_row_stride,
+ unsigned int in_col_stride,
+ uint8_t* outptr,
+ unsigned int out_row_stride,
+ unsigned int out_col_stride
+ );
+
+ template <nck::ActivationFunction Activation>
+ void execute_tile(
+ int n_channels,
+ const void* packed_params,
+ const uint8_t* inptrs[Base::inner_tile_rows][Base::inner_tile_cols],
+ uint8_t* outptrs[Base::output_tile_rows][Base::output_tile_cols]
+ );
+
+ private:
+ // Quantization parameters
+ const qsymm8::QSymm8PerChannelParams _weights_quant;
+ const qasymm8::QAsymm8Params _input_quant, _output_quant;
+ const qsymm8::QSymm8PerChannelRescaleParams _rescale_parameters;
+};
+
+} // namespace depthwise
diff --git a/src/core/NEON/kernels/convolution/depthwise/depthwise_quantized_dilated.hpp b/src/core/NEON/kernels/convolution/depthwise/depthwise_quantized_dilated.hpp
new file mode 100644
index 0000000000..a11b0981c9
--- /dev/null
+++ b/src/core/NEON/kernels/convolution/depthwise/depthwise_quantized_dilated.hpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2019 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#pragma once
+#include "depthwise_dilated.hpp"
+#include "depthwise_quantized.hpp"
+
+namespace depthwise {
+
+template <unsigned int OutputTileRows, unsigned int OutputTileCols,
+ unsigned int KernelRows, unsigned int KernelCols,
+ unsigned int StrideRows, unsigned int StrideCols>
+class QAsymm8DilatedDepthwiseConvolution
+ : public DilatedDepthwiseConvolution<
+ OutputTileRows, OutputTileCols, KernelRows, KernelCols, StrideRows,
+ StrideCols, uint8_t, int32_t, uint8_t> {
+public:
+ /** Create a new dilated depthwise convolution engine.
+ */
+ QAsymm8DilatedDepthwiseConvolution(
+ int n_batches, int n_input_rows, int n_input_cols, int n_channels,
+ int dilation_factor, nck::ActivationFunction activation,
+ const qasymm8::QAsymm8Params &weight_quantisation,
+ const qasymm8::QAsymm8Params &input_quantisation,
+ const qasymm8::QAsymm8Params &output_quantisation,
+ unsigned int padding_top, unsigned int padding_left,
+ unsigned int padding_bottom, unsigned int padding_right);
+
+ /** Create a new dilated depthwise convolution engine.
+ */
+ QAsymm8DilatedDepthwiseConvolution(
+ int n_batches, int n_input_rows, int n_input_cols, int n_channels,
+ int dilation_factor, int n_output_rows, int n_output_cols,
+ nck::ActivationFunction activation,
+ const qasymm8::QAsymm8Params &weight_quantisation,
+ const qasymm8::QAsymm8Params &input_quantisation,
+ const qasymm8::QAsymm8Params &output_quantisation,
+ unsigned int padding_top, unsigned int padding_left,
+ unsigned int padding_bottom, unsigned int padding_right);
+
+ /** Create a new dilated depthwise convolution engine.
+ */
+ QAsymm8DilatedDepthwiseConvolution(
+ int n_batches, int n_input_rows, int n_input_cols, int n_channels,
+ int dilation_factor, nck::ActivationFunction activation,
+ const qasymm8::QAsymm8Params &weight_quantisation,
+ const qasymm8::QAsymm8Params &input_quantisation,
+ const qasymm8::QAsymm8Params &output_quantisation,
+ const qasymm8::QAsymm8RescaleParams &rescale_parameters,
+ unsigned int padding_top, unsigned int padding_left,
+ unsigned int padding_bottom, unsigned int padding_right);
+
+ /** Create a new dilated depthwise convolution engine.
+ */
+ QAsymm8DilatedDepthwiseConvolution(
+ int n_batches, int n_input_rows, int n_input_cols, int n_channels,
+ int dilation_factor, int n_output_rows, int n_output_cols,
+ nck::ActivationFunction activation,
+ const qasymm8::QAsymm8Params &weight_quantisation,
+ const qasymm8::QAsymm8Params &input_quantisation,
+ const qasymm8::QAsymm8Params &output_quantisation,
+ const qasymm8::QAsymm8RescaleParams& rescale_parameters,
+ unsigned int padding_top, unsigned int padding_left,
+ unsigned int padding_bottom, unsigned int padding_right);
+};
+
+} // namespace depthwise
diff --git a/src/core/NEON/kernels/detail/NEDirectConvolutionDetail.h b/src/core/NEON/kernels/detail/NEDirectConvolutionDetail.h
index d7ee70a1cd..59f5c6c6b3 100644
--- a/src/core/NEON/kernels/detail/NEDirectConvolutionDetail.h
+++ b/src/core/NEON/kernels/detail/NEDirectConvolutionDetail.h
@@ -25,10 +25,10 @@
#ifndef ARM_COMPUTE_NEDIRECTCONVOLUTIONDETAIL_H
#define ARM_COMPUTE_NEDIRECTCONVOLUTIONDETAIL_H
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/utils/misc/Requires.h"
+#include "src/core/AccessWindowStatic.h"
#include "src/core/NEON/NEFixedPoint.h"
#include "src/core/NEON/wrapper/wrapper.h"
+#include "support/Requires.h"
#include <arm_neon.h>
diff --git a/src/core/TensorInfo.cpp b/src/core/TensorInfo.cpp
index c1a1c2ecf0..414c128a27 100644
--- a/src/core/TensorInfo.cpp
+++ b/src/core/TensorInfo.cpp
@@ -28,6 +28,7 @@
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/helpers/Utils.h"
#include "support/MemorySupport.h"
using namespace arm_compute;
diff --git a/src/core/helpers/AutoConfiguration.h b/src/core/helpers/AutoConfiguration.h
new file mode 100644
index 0000000000..6880a6cb66
--- /dev/null
+++ b/src/core/helpers/AutoConfiguration.h
@@ -0,0 +1,176 @@
+/*
+* Copyright (c) 2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_CORE_HELPERS_AUTOCONFIGURATION_H
+#define SRC_CORE_HELPERS_AUTOCONFIGURATION_H
+
+#include "arm_compute/core/ITensorInfo.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+/** Auto initialize the tensor info (shape, number of channels and data type) if the current assignment is empty.
+ *
+ * @param[in,out] info Tensor info used to check and assign.
+ * @param[in] shape New shape.
+ * @param[in] num_channels New number of channels.
+ * @param[in] data_type New data type
+ * @param[in] quantization_info (Optional) New quantization info
+ *
+ * @return True if the tensor info has been initialized
+ */
+inline bool auto_init_if_empty(ITensorInfo &info,
+ const TensorShape &shape,
+ int num_channels, DataType data_type,
+ QuantizationInfo quantization_info = QuantizationInfo())
+{
+ if(info.tensor_shape().total_size() == 0)
+ {
+ info.set_data_type(data_type);
+ info.set_num_channels(num_channels);
+ info.set_tensor_shape(shape);
+ info.set_quantization_info(quantization_info);
+ return true;
+ }
+
+ return false;
+}
+
+/** Auto initialize the tensor info using another tensor info.
+*
+* @param info_sink Tensor info used to check and assign
+* @param info_source Tensor info used to assign
+*
+* @return True if the tensor info has been initialized
+*/
+inline bool auto_init_if_empty(ITensorInfo &info_sink, const ITensorInfo &info_source)
+{
+ if(info_sink.tensor_shape().total_size() == 0)
+ {
+ info_sink.set_data_type(info_source.data_type());
+ info_sink.set_num_channels(info_source.num_channels());
+ info_sink.set_tensor_shape(info_source.tensor_shape());
+ info_sink.set_quantization_info(info_source.quantization_info());
+ info_sink.set_data_layout(info_source.data_layout());
+ return true;
+ }
+
+ return false;
+}
+
+/** Set the shape to the specified value if the current assignment is empty.
+ *
+ * @param[in,out] info Tensor info used to check and assign.
+ * @param[in] shape New shape.
+ *
+ * @return True if the shape has been changed.
+ */
+inline bool set_shape_if_empty(ITensorInfo &info, const TensorShape &shape)
+{
+ if(info.tensor_shape().total_size() == 0)
+ {
+ info.set_tensor_shape(shape);
+ return true;
+ }
+
+ return false;
+}
+
+/** Set the format, data type and number of channels to the specified value if
+ * the current data type is unknown.
+ *
+ * @param[in,out] info Tensor info used to check and assign.
+ * @param[in] format New format.
+ *
+ * @return True if the format has been changed.
+ */
+inline bool set_format_if_unknown(ITensorInfo &info, Format format)
+{
+ if(info.data_type() == DataType::UNKNOWN)
+ {
+ info.set_format(format);
+ return true;
+ }
+
+ return false;
+}
+
+/** Set the data type and number of channels to the specified value if
+ * the current data type is unknown.
+ *
+ * @param[in,out] info Tensor info used to check and assign.
+ * @param[in] data_type New data type.
+ *
+ * @return True if the data type has been changed.
+ */
+inline bool set_data_type_if_unknown(ITensorInfo &info, DataType data_type)
+{
+ if(info.data_type() == DataType::UNKNOWN)
+ {
+ info.set_data_type(data_type);
+ return true;
+ }
+
+ return false;
+}
+
+/** Set the data layout to the specified value if
+ * the current data layout is unknown.
+ *
+ * @param[in,out] info Tensor info used to check and assign.
+ * @param[in] data_layout New data layout.
+ *
+ * @return True if the data type has been changed.
+ */
+inline bool set_data_layout_if_unknown(ITensorInfo &info, DataLayout data_layout)
+{
+ if(info.data_layout() == DataLayout::UNKNOWN)
+ {
+ info.set_data_layout(data_layout);
+ return true;
+ }
+
+ return false;
+}
+
+/** Set the quantization info to the specified value if
+ * the current quantization info is empty and the data type of asymmetric quantized type
+ *
+ * @param[in,out] info Tensor info used to check and assign.
+ * @param[in] quantization_info Quantization info
+ *
+ * @return True if the quantization info has been changed.
+ */
+inline bool set_quantization_info_if_empty(ITensorInfo &info, QuantizationInfo quantization_info)
+{
+ if(info.quantization_info().empty() && (is_data_type_quantized_asymmetric(info.data_type())))
+ {
+ info.set_quantization_info(quantization_info);
+ return true;
+ }
+
+ return false;
+}
+} // namespace arm_compute
+
+#endif /* SRC_CORE_HELPERS_AUTOCONFIGURATION_H */
diff --git a/src/core/helpers/NormalizationHelpers.h b/src/core/helpers/NormalizationHelpers.h
new file mode 100644
index 0000000000..d94d5e3602
--- /dev/null
+++ b/src/core/helpers/NormalizationHelpers.h
@@ -0,0 +1,47 @@
+/*
+* Copyright (c) 2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_CORE_HELPERS_NORMALIZATIONHELPERS_H
+#define SRC_CORE_HELPERS_NORMALIZATIONHELPERS_H
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+/** Calculate the normalization dimension index for a given normalization type
+ *
+ * @param[in] layout Data layout of the input and output tensor
+ * @param[in] info Normalization info
+ *
+ * @return Normalization dimension index
+ */
+inline unsigned int get_normalization_dimension_index(DataLayout layout, const NormalizationLayerInfo &info)
+{
+ const unsigned int width_idx = get_data_layout_dimension_index(layout, DataLayoutDimension::WIDTH);
+ const unsigned int channel_idx = get_data_layout_dimension_index(layout, DataLayoutDimension::CHANNEL);
+
+ return info.is_in_map() ? width_idx : channel_idx;
+}
+} // namespace arm_compute
+#endif /* SRC_CORE_HELPERS_NORMALIZATIONHELPERS_H */
diff --git a/src/core/helpers/ScaleHelpers.h b/src/core/helpers/ScaleHelpers.h
new file mode 100644
index 0000000000..827bbef4cd
--- /dev/null
+++ b/src/core/helpers/ScaleHelpers.h
@@ -0,0 +1,331 @@
+/*
+* Copyright (c) 2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_CORE_HELPERS_SCALEHELPERS_H
+#define SRC_CORE_HELPERS_SCALEHELPERS_H
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/QuantizationInfo.h"
+
+#include <algorithm>
+#include <cmath>
+#include <cstddef>
+#include <cstdint>
+
+namespace arm_compute
+{
+namespace scale_helpers
+{
+/** Computes bilinear interpolation using the pointer to the top-left pixel and the pixel's distance between
+ * the real coordinates and the smallest following integer coordinates. Input must be in single channel format.
+ *
+ * @param[in] pixel_ptr Pointer to the top-left pixel value of a single channel input.
+ * @param[in] stride Stride to access the bottom-left and bottom-right pixel values
+ * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer
+ * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer
+ *
+ * @note dx and dy must be in the range [0, 1.0]
+ *
+ * @return The bilinear interpolated pixel value
+ */
+template <typename T>
+inline T delta_bilinear_c1(const T *pixel_ptr, size_t stride, float dx, float dy)
+{
+ ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr);
+
+ const float dx1 = 1.0f - dx;
+ const float dy1 = 1.0f - dy;
+
+ const T a00 = *pixel_ptr;
+ const T a01 = *(pixel_ptr + 1);
+ const T a10 = *(pixel_ptr + stride);
+ const T a11 = *(pixel_ptr + stride + 1);
+
+ const float w1 = dx1 * dy1;
+ const float w2 = dx * dy1;
+ const float w3 = dx1 * dy;
+ const float w4 = dx * dy;
+
+ return static_cast<T>(a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4);
+}
+
+/** Computes bilinear interpolation for quantized input and output, using the pointer to the top-left pixel and the pixel's distance between
+ * the real coordinates and the smallest following integer coordinates. Input must be QASYMM8 and in single channel format.
+ *
+ * @param[in] pixel_ptr Pointer to the top-left pixel value of a single channel input.
+ * @param[in] stride Stride to access the bottom-left and bottom-right pixel values
+ * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer
+ * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer
+ * @param[in] iq_info Input QuantizationInfo
+ * @param[in] oq_info Output QuantizationInfo
+ *
+ * @note dx and dy must be in the range [0, 1.0]
+ *
+ * @return The bilinear interpolated pixel value
+ */
+inline uint8_t delta_bilinear_c1_quantized(const uint8_t *pixel_ptr, size_t stride, float dx, float dy,
+ UniformQuantizationInfo iq_info, UniformQuantizationInfo oq_info)
+{
+ ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr);
+
+ const float dx1 = 1.0f - dx;
+ const float dy1 = 1.0f - dy;
+
+ const float a00 = dequantize_qasymm8(*pixel_ptr, iq_info);
+ const float a01 = dequantize_qasymm8(*(pixel_ptr + 1), iq_info);
+ const float a10 = dequantize_qasymm8(*(pixel_ptr + stride), iq_info);
+ const float a11 = dequantize_qasymm8(*(pixel_ptr + stride + 1), iq_info);
+
+ const float w1 = dx1 * dy1;
+ const float w2 = dx * dy1;
+ const float w3 = dx1 * dy;
+ const float w4 = dx * dy;
+ float res = a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4;
+ return static_cast<uint8_t>(quantize_qasymm8(res, oq_info));
+}
+
+/** Computes bilinear interpolation for quantized input and output, using the pointer to the top-left pixel and the pixel's distance between
+ * the real coordinates and the smallest following integer coordinates. Input must be QASYMM8_SIGNED and in single channel format.
+ *
+ * @param[in] pixel_ptr Pointer to the top-left pixel value of a single channel input.
+ * @param[in] stride Stride to access the bottom-left and bottom-right pixel values
+ * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer
+ * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer
+ * @param[in] iq_info Input QuantizationInfo
+ * @param[in] oq_info Output QuantizationInfo
+ *
+ * @note dx and dy must be in the range [0, 1.0]
+ *
+ * @return The bilinear interpolated pixel value
+ */
+inline int8_t delta_bilinear_c1_quantized(const int8_t *pixel_ptr, size_t stride, float dx, float dy,
+ UniformQuantizationInfo iq_info, UniformQuantizationInfo oq_info)
+{
+ ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr);
+
+ const float dx1 = 1.0f - dx;
+ const float dy1 = 1.0f - dy;
+
+ const float a00 = dequantize_qasymm8_signed(*pixel_ptr, iq_info);
+ const float a01 = dequantize_qasymm8_signed(*(pixel_ptr + 1), iq_info);
+ const float a10 = dequantize_qasymm8_signed(*(pixel_ptr + stride), iq_info);
+ const float a11 = dequantize_qasymm8_signed(*(pixel_ptr + stride + 1), iq_info);
+
+ const float w1 = dx1 * dy1;
+ const float w2 = dx * dy1;
+ const float w3 = dx1 * dy;
+ const float w4 = dx * dy;
+ float res = a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4;
+ return static_cast<int8_t>(quantize_qasymm8_signed(res, oq_info));
+}
+
+/** Computes linear interpolation using the pointer to the top pixel and the pixel's distance between
+ * the real coordinates and the smallest following integer coordinates. Input must be in single channel format.
+ *
+ * @param[in] pixel_ptr Pointer to the top pixel value of a single channel input.
+ * @param[in] stride Stride to access the bottom pixel value
+ * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer
+ *
+ * @note dy must be in the range [0, 1.0]
+ *
+ * @return The linear interpolated pixel value
+ */
+template <typename T>
+inline T delta_linear_c1_y(const T *pixel_ptr, size_t stride, float dy)
+{
+ ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr);
+
+ const float dy1 = 1.0f - dy;
+
+ const T a00 = *pixel_ptr;
+ const T a10 = *(pixel_ptr + stride);
+
+ const float w1 = dy1;
+ const float w3 = dy;
+
+ return static_cast<T>(a00 * w1 + a10 * w3);
+}
+
+/** Computes linear interpolation using the pointer to the left pixel and the pixel's distance between
+ * the real coordinates and the smallest following integer coordinates. Input must be in single channel format.
+ *
+ * @param[in] pixel_ptr Pointer to the left pixel value of a single channel input.
+ * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer
+ *
+ * @note dx must be in the range [0, 1.0]
+ *
+ * @return The linear interpolated pixel value
+ */
+template <typename T>
+inline T delta_linear_c1_x(const T *pixel_ptr, float dx)
+{
+ ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr);
+
+ const T a00 = *pixel_ptr;
+ const T a01 = *(pixel_ptr + 1);
+
+ const float dx1 = 1.0f - dx;
+
+ const float w1 = dx1;
+ const float w2 = dx;
+
+ return static_cast<T>(a00 * w1 + a01 * w2);
+}
+
+/** Return the pixel at (x,y) using bilinear interpolation.
+ *
+ * @warning Only works if the iterator was created with an IImage
+ *
+ * @param[in] first_pixel_ptr Pointer to the first pixel of a single channel input.
+ * @param[in] stride Stride in bytes of the image;
+ * @param[in] x X position of the wanted pixel
+ * @param[in] y Y position of the wanted pixel
+ *
+ * @return The pixel at (x, y) using bilinear interpolation.
+ */
+template <typename T>
+inline T pixel_bilinear_c1(const T *first_pixel_ptr, size_t stride, float x, float y)
+{
+ ARM_COMPUTE_ERROR_ON(first_pixel_ptr == nullptr);
+
+ const int32_t xi = std::floor(x);
+ const int32_t yi = std::floor(y);
+
+ const float dx = x - xi;
+ const float dy = y - yi;
+
+ return delta_bilinear_c1(first_pixel_ptr + xi + yi * stride, stride, dx, dy);
+}
+
+/** Return the pixel at (x,y) using bilinear interpolation by clamping when out of borders. The image must be single channel input
+ *
+ * @warning Only works if the iterator was created with an IImage
+ *
+ * @param[in] first_pixel_ptr Pointer to the first pixel of a single channel image.
+ * @param[in] stride Stride in bytes of the image
+ * @param[in] width Width of the image
+ * @param[in] height Height of the image
+ * @param[in] x X position of the wanted pixel
+ * @param[in] y Y position of the wanted pixel
+ *
+ * @return The pixel at (x, y) using bilinear interpolation.
+ */
+template <typename T>
+inline uint8_t
+pixel_bilinear_c1_clamp(const T *first_pixel_ptr, size_t stride, size_t width, size_t height, float x, float y)
+{
+ ARM_COMPUTE_ERROR_ON(first_pixel_ptr == nullptr);
+
+ x = std::max(-1.f, std::min(x, static_cast<float>(width)));
+ y = std::max(-1.f, std::min(y, static_cast<float>(height)));
+
+ const float xi = std::floor(x);
+ const float yi = std::floor(y);
+
+ const float dx = x - xi;
+ const float dy = y - yi;
+
+ if(dx == 0.0f)
+ {
+ if(dy == 0.0f)
+ {
+ return static_cast<T>(first_pixel_ptr[static_cast<int32_t>(xi) + static_cast<int32_t>(yi) * stride]);
+ }
+ return delta_linear_c1_y(first_pixel_ptr + static_cast<int32_t>(xi) + static_cast<int32_t>(yi) * stride,
+ stride, dy);
+ }
+ if(dy == 0.0f)
+ {
+ return delta_linear_c1_x(first_pixel_ptr + static_cast<int32_t>(xi) + static_cast<int32_t>(yi) * stride,
+ dx);
+ }
+ return delta_bilinear_c1(first_pixel_ptr + static_cast<int32_t>(xi) + static_cast<int32_t>(yi) * stride, stride,
+ dx, dy);
+}
+
+/** Return the pixel at (x,y) using area interpolation by clamping when out of borders. The image must be single channel U8
+ *
+ * @note The interpolation area depends on the width and height ration of the input and output images
+ * @note Currently average of the contributing pixels is calculated
+ *
+ * @param[in] first_pixel_ptr Pointer to the first pixel of a single channel U8 image.
+ * @param[in] stride Stride in bytes of the image
+ * @param[in] width Width of the image
+ * @param[in] height Height of the image
+ * @param[in] wr Width ratio among the input image width and output image width.
+ * @param[in] hr Height ratio among the input image height and output image height.
+ * @param[in] x X position of the wanted pixel
+ * @param[in] y Y position of the wanted pixel
+ *
+ * @return The pixel at (x, y) using area interpolation.
+ */
+inline uint8_t
+pixel_area_c1u8_clamp(const uint8_t *first_pixel_ptr, size_t stride, size_t width, size_t height, float wr,
+ float hr, int x, int y)
+{
+ ARM_COMPUTE_ERROR_ON(first_pixel_ptr == nullptr);
+
+ // Calculate sampling position
+ float in_x = (x + 0.5f) * wr - 0.5f;
+ float in_y = (y + 0.5f) * hr - 0.5f;
+
+ // Get bounding box offsets
+ int x_from = std::floor(x * wr - 0.5f - in_x);
+ int y_from = std::floor(y * hr - 0.5f - in_y);
+ int x_to = std::ceil((x + 1) * wr - 0.5f - in_x);
+ int y_to = std::ceil((y + 1) * hr - 0.5f - in_y);
+
+ // Clamp position to borders
+ in_x = std::max(-1.f, std::min(in_x, static_cast<float>(width)));
+ in_y = std::max(-1.f, std::min(in_y, static_cast<float>(height)));
+
+ // Clamp bounding box offsets to borders
+ x_from = ((in_x + x_from) < -1) ? -1 : x_from;
+ y_from = ((in_y + y_from) < -1) ? -1 : y_from;
+ x_to = ((in_x + x_to) > width) ? (width - in_x) : x_to;
+ y_to = ((in_y + y_to) > height) ? (height - in_y) : y_to;
+
+ // Get pixel index
+ const int xi = std::floor(in_x);
+ const int yi = std::floor(in_y);
+
+ // Bounding box elements in each dimension
+ const int x_elements = (x_to - x_from + 1);
+ const int y_elements = (y_to - y_from + 1);
+ ARM_COMPUTE_ERROR_ON(x_elements == 0 || y_elements == 0);
+
+ // Sum pixels in area
+ int sum = 0;
+ for(int j = yi + y_from, je = yi + y_to; j <= je; ++j)
+ {
+ const uint8_t *ptr = first_pixel_ptr + j * stride + xi + x_from;
+ sum = std::accumulate(ptr, ptr + x_elements, sum);
+ }
+
+ // Return average
+ return sum / (x_elements * y_elements);
+}
+} // namespace scale_helpers
+} // namespace arm_compute
+
+#endif /* SRC_CORE_HELPERS_SCALEHELPERS_H */
diff --git a/src/core/helpers/SoftmaxHelpers.cpp b/src/core/helpers/SoftmaxHelpers.cpp
new file mode 100644
index 0000000000..71b971af31
--- /dev/null
+++ b/src/core/helpers/SoftmaxHelpers.cpp
@@ -0,0 +1,45 @@
+/*
+* Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/core/helpers/SoftmaxHelpers.h"
+
+namespace arm_compute
+{
+namespace softmax_helpers
+{
+PermutationVector get_permutation_vector_from_softmax_axis(size_t axis)
+{
+ switch(axis)
+ {
+ case 1:
+ return PermutationVector(1U, 0U, 2U, 3U);
+ case 2:
+ return PermutationVector(2U, 1U, 0U, 3U);
+ case 3:
+ return PermutationVector(3U, 1U, 2U, 0U);
+ default:
+ ARM_COMPUTE_ERROR("Axis not supported");
+ }
+}
+} // namespace softmax_helpers
+} // namespace arm_compute
diff --git a/src/core/helpers/SoftmaxHelpers.h b/src/core/helpers/SoftmaxHelpers.h
new file mode 100644
index 0000000000..de5490a14d
--- /dev/null
+++ b/src/core/helpers/SoftmaxHelpers.h
@@ -0,0 +1,50 @@
+/*
+* Copyright (c) 2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_CORE_HELPERS_SOFTMAXHELPERS_H
+#define SRC_CORE_HELPERS_SOFTMAXHELPERS_H
+
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+namespace softmax_helpers
+{
+/** Given a softmax axis, this function returns the permutation vector required to put the axis to the front
+ *
+ * @note This function assumes a tensor rank <= 4
+ *
+ * Axis selects the dimension on which softmax is performed.
+ * E.g. For input of shape 4x5x6 and axis=1, softmax will be applied to 4x6=24 vectors of size 5.
+ * Interally softmax kernels is always performed on the first dimension (front dimension), therefore permutation is
+ * required to put the dimension specified by @p axis to the first dimension.
+ *
+ * @param[in] axis Axis on which to perform softmax. Supported: 1, 2, 3 (0 implies no permutation needed)
+ *
+ * @return the permutation vector
+ */
+PermutationVector get_permutation_vector_from_softmax_axis(size_t axis);
+} // namespace softmax_helpers
+} // namespace arm_compute
+
+#endif /* SRC_CORE_HELPERS_SOFTMAXHELPERS_H */
diff --git a/src/core/helpers/Utils.h b/src/core/helpers/Utils.h
new file mode 100644
index 0000000000..3c3b2b93f9
--- /dev/null
+++ b/src/core/helpers/Utils.h
@@ -0,0 +1,97 @@
+/*
+* Copyright (c) 2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_CORE_HELPERS_UTILS_H
+#define SRC_CORE_HELPERS_UTILS_H
+
+#include "arm_compute/core/ITensorInfo.h"
+
+namespace arm_compute
+{
+/** Create a strides object based on the provided strides and the tensor dimensions.
+ *
+ * @param[in] info Tensor info object providing the shape of the tensor for unspecified strides.
+ * @param[in] stride_x Stride to be used in X dimension (in bytes).
+ * @param[in] fixed_strides Strides to be used in higher dimensions starting at Y (in bytes).
+ *
+ * @return Strides object based on the specified strides. Missing strides are
+ * calculated based on the tensor shape and the strides of lower dimensions.
+ */
+template <typename T, typename... Ts>
+inline Strides compute_strides(const ITensorInfo &info, T stride_x, Ts &&... fixed_strides)
+{
+ const TensorShape &shape = info.tensor_shape();
+
+ // Create strides object
+ Strides strides(stride_x, fixed_strides...);
+
+ for(size_t i = 1 + sizeof...(Ts); i < info.num_dimensions(); ++i)
+ {
+ strides.set(i, shape[i - 1] * strides[i - 1]);
+ }
+
+ return strides;
+}
+
+/** Create a strides object based on the tensor dimensions.
+ *
+ * @param[in] info Tensor info object used to compute the strides.
+ *
+ * @return Strides object based on element size and tensor shape.
+ */
+template <typename... Ts>
+inline Strides compute_strides(const ITensorInfo &info)
+{
+ return compute_strides(info, info.element_size());
+}
+
+/** Given an integer value, this function returns the next power of two
+ *
+ * @param[in] x Input value
+ *
+ * @return the next power of two
+ */
+inline unsigned int get_next_power_two(unsigned int x)
+{
+ // Decrement by 1
+ x--;
+
+ // Shift right by 1
+ x |= x >> 1u;
+ // Shift right by 2
+ x |= x >> 2u;
+ // Shift right by 4
+ x |= x >> 4u;
+ // Shift right by 8
+ x |= x >> 8u;
+ // Shift right by 16
+ x |= x >> 16u;
+
+ // Increment by 1
+ x++;
+
+ return x;
+}
+} // namespace arm_compute
+
+#endif /* SRC_CORE_HELPERS_UTILS_H */
diff --git a/src/core/helpers/WindowHelpers.cpp b/src/core/helpers/WindowHelpers.cpp
new file mode 100644
index 0000000000..ba10eb9775
--- /dev/null
+++ b/src/core/helpers/WindowHelpers.cpp
@@ -0,0 +1,183 @@
+/*
+* Copyright (c) 2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/core/helpers/WindowHelpers.h"
+
+namespace arm_compute
+{
+Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
+{
+ if(!skip_border)
+ {
+ border_size = BorderSize(0);
+ }
+
+ const Coordinates &anchor = valid_region.anchor;
+ const TensorShape &shape = valid_region.shape;
+
+ Window window;
+
+ window.set(0, Window::Dimension(
+ // Skip the border left of the image
+ anchor[0] + border_size.left,
+ // Skip the border right of the image
+ // Make sure the window width is a multiple of the step size
+ anchor[0] + border_size.left + ceil_to_multiple(std::max(0, static_cast<int>(shape[0]) - static_cast<int>(border_size.left) - static_cast<int>(border_size.right)), steps[0]),
+ steps[0]));
+
+ size_t n = 1;
+
+ if(anchor.num_dimensions() > 1)
+ {
+ window.set(1, Window::Dimension(
+ // Skip the border above the image
+ anchor[1] + border_size.top,
+ // Skip the border below the image
+ anchor[1] + border_size.top + ceil_to_multiple(std::max(0, static_cast<int>(shape[1]) - static_cast<int>(border_size.top) - static_cast<int>(border_size.bottom)), steps[1]),
+ steps[1]));
+
+ ++n;
+ }
+
+ if(anchor.num_dimensions() > 2)
+ {
+ window.set(2, Window::Dimension(anchor[2], std::max<size_t>(1, shape[2]), steps[2]));
+
+ ++n;
+ }
+
+ for(; n < anchor.num_dimensions(); ++n)
+ {
+ window.set(n, Window::Dimension(anchor[n], std::max<size_t>(1, shape[n])));
+ }
+
+ for(; n < Coordinates::num_max_dimensions; ++n)
+ {
+ window.set(n, Window::Dimension(0, 1));
+ }
+
+ return window;
+}
+
+Window calculate_max_enlarged_window(const ValidRegion &valid_region, const Steps &steps, BorderSize border_size)
+{
+ const Coordinates &anchor = valid_region.anchor;
+ const TensorShape &shape = valid_region.shape;
+
+ Window window;
+
+ window.set(0, Window::Dimension(
+ // move the anchor to the start from the border
+ anchor[0] - border_size.left,
+ // move the anchor to include the right end border
+ // Make sure the window width is a multiple of the step size
+ anchor[0] - border_size.left + ceil_to_multiple(shape[0] + border_size.left + border_size.right, steps[0]),
+ steps[0]));
+
+ size_t n = 1;
+
+ if(anchor.num_dimensions() > 1)
+ {
+ window.set(1, Window::Dimension(
+ // Include the border above the image
+ anchor[1] - border_size.top,
+ // Include the border below the image
+ anchor[1] - border_size.top + ceil_to_multiple(shape[1] + border_size.top + border_size.bottom, steps[1]),
+ steps[1]));
+
+ ++n;
+ }
+
+ if(anchor.num_dimensions() > 2)
+ {
+ window.set(2, Window::Dimension(0, std::max<size_t>(1, shape[n]), steps[2]));
+
+ ++n;
+ }
+
+ for(; n < anchor.num_dimensions(); ++n)
+ {
+ window.set(n, Window::Dimension(anchor[n], std::max<size_t>(1, shape[n])));
+ }
+
+ for(; n < Coordinates::num_max_dimensions; ++n)
+ {
+ window.set(n, Window::Dimension(0, 1));
+ }
+
+ return window;
+}
+
+Window calculate_max_window_horizontal(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
+{
+ if(skip_border)
+ {
+ border_size.top = 0;
+ border_size.bottom = 0;
+ }
+ else
+ {
+ border_size.left = 0;
+ border_size.right = 0;
+ }
+
+ const Coordinates &anchor = valid_region.anchor;
+ const TensorShape &shape = valid_region.shape;
+
+ Window window;
+
+ window.set(0, Window::Dimension(
+ // Skip the border left of the image
+ anchor[0] + border_size.left,
+ // Skip the border right of the image
+ // Make sure the window width is a multiple of the step size
+ anchor[0] + border_size.left + ceil_to_multiple(std::max(0, static_cast<int>(shape[0]) - static_cast<int>(border_size.left) - static_cast<int>(border_size.right)), steps[0]),
+ steps[0]));
+
+ size_t n = 1;
+
+ if(anchor.num_dimensions() > 1)
+ {
+ window.set(1, Window::Dimension(
+ // Skip the border above the image
+ anchor[1] - border_size.top,
+ // Skip the border below the image
+ anchor[1] + shape[1] + border_size.bottom,
+ 1));
+
+ ++n;
+ }
+
+ for(; n < anchor.num_dimensions(); ++n)
+ {
+ window.set(n, Window::Dimension(anchor[n], std::max<size_t>(1, shape[n])));
+ }
+
+ for(; n < Coordinates::num_max_dimensions; ++n)
+ {
+ window.set(n, Window::Dimension(0, 1));
+ }
+
+ return window;
+}
+} // namespace arm_compute
diff --git a/src/core/helpers/WindowHelpers.h b/src/core/helpers/WindowHelpers.h
new file mode 100644
index 0000000000..9bc2135b6d
--- /dev/null
+++ b/src/core/helpers/WindowHelpers.h
@@ -0,0 +1,172 @@
+/*
+* Copyright (c) 2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_CORE_HELPERS_WINDOWHELPERS_H
+#define SRC_CORE_HELPERS_WINDOWHELPERS_H
+
+#include "arm_compute/core/IAccessWindow.h"
+#include "arm_compute/core/Steps.h"
+#include "arm_compute/core/Window.h"
+
+namespace arm_compute
+{
+/** Update window and padding size for each of the access patterns.
+ *
+ * First the window size is reduced based on all access patterns that are not
+ * allowed to modify the padding of the underlying tensor. Then the padding of
+ * the remaining tensors is increased to match the window.
+ *
+ * @param[in] win Window that is used by the kernel.
+ * @param[in] patterns Access patterns used to calculate the final window and padding.
+ *
+ * @return True if the window has been changed. Changes to the padding do not
+ * influence the returned value.
+ */
+template <typename... Ts>
+bool update_window_and_padding(Window &win, Ts &&... patterns)
+{
+ bool window_changed = false;
+
+ utility::for_each([&](const IAccessWindow & w)
+ {
+ window_changed |= w.update_window_if_needed(win);
+ },
+ patterns...);
+
+ bool padding_changed = false;
+
+ utility::for_each([&](IAccessWindow & w)
+ {
+ padding_changed |= w.update_padding_if_needed(win);
+ },
+ patterns...);
+
+ return window_changed;
+}
+
+/** Intersect multiple valid regions.
+ *
+ * @param[in] regions Valid regions.
+ *
+ * @return Intersection of all regions.
+ */
+template <typename... Ts>
+ValidRegion intersect_valid_regions(const Ts &... regions)
+{
+ auto intersect = [](const ValidRegion & r1, const ValidRegion & r2) -> ValidRegion
+ {
+ ValidRegion region;
+
+ for(size_t d = 0; d < std::min(r1.anchor.num_dimensions(), r2.anchor.num_dimensions()); ++d)
+ {
+ region.anchor.set(d, std::max(r1.anchor[d], r2.anchor[d]));
+ }
+
+ for(size_t d = 0; d < std::min(r1.shape.num_dimensions(), r2.shape.num_dimensions()); ++d)
+ {
+ region.shape.set(d, std::min(r1.shape[d], r2.shape[d]));
+ }
+
+ return region;
+ };
+
+ return utility::foldl(intersect, regions...);
+}
+
+#ifndef DOXYGEN_SKIP_THIS
+/** Calculate the maximum window for a given tensor shape and border setting
+ *
+ * @param[in] valid_region Valid region object defining the shape of the tensor space for which the window is created.
+ * @param[in] steps (Optional) Number of elements processed for each step.
+ * @param[in] skip_border (Optional) If true exclude the border region from the window.
+ * @param[in] border_size (Optional) Border size.
+ *
+ * @return The maximum window the kernel can be executed on.
+ */
+Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize());
+
+/** Calculate the maximum window for a given tensor shape and border setting
+ *
+ * @param[in] info Tensor info object defining the shape of the object for which the window is created.
+ * @param[in] steps (Optional) Number of elements processed for each step.
+ * @param[in] skip_border (Optional) If true exclude the border region from the window.
+ * @param[in] border_size (Optional) Border size.
+ *
+ * @return The maximum window the kernel can be executed on.
+ */
+inline Window calculate_max_window(const ITensorInfo &info, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize())
+{
+ return calculate_max_window(info.valid_region(), steps, skip_border, border_size);
+}
+
+/** Calculate the maximum window used by a horizontal kernel for a given tensor shape and border setting
+ *
+ * @param[in] valid_region Valid region object defining the shape of the tensor space for which the window is created.
+ * @param[in] steps (Optional) Number of elements processed for each step.
+ * @param[in] skip_border (Optional) If true exclude the border region from the window.
+ * @param[in] border_size (Optional) Border size. The border region will be excluded from the window.
+ *
+ * @return The maximum window the kernel can be executed on.
+ */
+Window calculate_max_window_horizontal(const ValidRegion &valid_region, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize());
+
+/** Calculate the maximum window used by a horizontal kernel for a given tensor shape and border setting
+ *
+ * @param[in] info Tensor info object defining the shape of the object for which the window is created.
+ * @param[in] steps (Optional) Number of elements processed for each step.
+ * @param[in] skip_border (Optional) If true exclude the border region from the window.
+ * @param[in] border_size (Optional) Border size.
+ *
+ * @return The maximum window the kernel can be executed on.
+ */
+inline Window calculate_max_window_horizontal(const ITensorInfo &info, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize())
+{
+ return calculate_max_window_horizontal(info.valid_region(), steps, skip_border, border_size);
+}
+
+/** Calculate the maximum window for a given tensor shape and border setting. The window will also includes the border.
+ *
+ * @param[in] valid_region Valid region object defining the shape of the tensor space for which the window is created.
+ * @param[in] steps (Optional) Number of elements processed for each step.
+ * @param[in] border_size (Optional) Border size. The border region will be included in the window.
+ *
+ * @return The maximum window the kernel can be executed on.
+ */
+Window calculate_max_enlarged_window(const ValidRegion &valid_region, const Steps &steps = Steps(), BorderSize border_size = BorderSize());
+
+/** Calculate the maximum window for a given tensor shape and border setting. The window will also includes the border.
+ *
+ * @param[in] info Tensor info object defining the shape of the object for which the window is created.
+ * @param[in] steps (Optional) Number of elements processed for each step.
+ * @param[in] border_size (Optional) Border size. The border region will be included in the window.
+ *
+ * @return The maximum window the kernel can be executed on.
+ */
+inline Window calculate_max_enlarged_window(const ITensorInfo &info, const Steps &steps = Steps(), BorderSize border_size = BorderSize())
+{
+ return calculate_max_enlarged_window(info.valid_region(), steps, border_size);
+}
+#endif /* DOXYGEN_SKIP_THIS */
+} // namespace arm_compute
+
+#endif /* SRC_CORE_HELPERS_WINDOWHELPERS_H */
diff --git a/src/core/utils/helpers/bit_ops.h b/src/core/utils/helpers/bit_ops.h
new file mode 100644
index 0000000000..ef60214c9f
--- /dev/null
+++ b/src/core/utils/helpers/bit_ops.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_UTILS_HELPERS_BIT_OPS_H
+#define ARM_COMPUTE_UTILS_HELPERS_BIT_OPS_H
+
+#include "support/Requires.h"
+
+#include <type_traits>
+
+namespace arm_compute
+{
+namespace helpers
+{
+namespace bit_ops
+{
+/** Checks if the idx-th bit is set in an integral type
+ *
+ * @param[in] v Integral input
+ * @param[in] idx Index of the bit to check
+ *
+ * @return True if the idx-th bit is set else false
+ */
+template <typename T, REQUIRES_TA(std::is_integral<T>::value)>
+bool is_bit_set(T v, unsigned int idx)
+{
+ return (v & 1 << idx) != 0;
+}
+} // namespace bit_ops
+} // namespace helpers
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_UTILS_HELPERS_BIT_OPS_H */
diff --git a/src/core/utils/helpers/fft.cpp b/src/core/utils/helpers/fft.cpp
index 4c2f8fa494..64633c643d 100644
--- a/src/core/utils/helpers/fft.cpp
+++ b/src/core/utils/helpers/fft.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/utils/helpers/fft.h"
+#include "src/core/utils/helpers/fft.h"
#include <numeric>
diff --git a/src/core/utils/helpers/fft.h b/src/core/utils/helpers/fft.h
new file mode 100644
index 0000000000..f7b99dd7b8
--- /dev/null
+++ b/src/core/utils/helpers/fft.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_UTILS_HELPERS_FFT_H
+#define ARM_COMPUTE_UTILS_HELPERS_FFT_H
+
+#include <set>
+#include <vector>
+
+namespace arm_compute
+{
+namespace helpers
+{
+namespace fft
+{
+/** Decompose a given 1D input size using the provided supported factors.
+ *
+ * @param[in] N Input size to be decomposed.
+ * @param[in] supported_factors Supported factors that can be used for decomposition.
+ *
+ * @return A vector with the stages of the decomposition. Will be empty if decomposition failed.
+ */
+std::vector<unsigned int> decompose_stages(unsigned int N, const std::set<unsigned int> &supported_factors);
+/** Calculate digit reverse index vector given fft size and the decomposed stages
+ *
+ * @param N Input size to calculate digit reverse for
+ * @param fft_stages A vector with the FFT decomposed stages
+ *
+ * @return A vector with the digit reverse indices. Will be empty if it failed.
+ */
+std::vector<unsigned int> digit_reverse_indices(unsigned int N, const std::vector<unsigned int> &fft_stages);
+} // namespace fft
+} // namespace helpers
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_UTILS_HELPERS_FFT_H */
diff --git a/src/core/utils/helpers/float_ops.h b/src/core/utils/helpers/float_ops.h
new file mode 100644
index 0000000000..a475a23b59
--- /dev/null
+++ b/src/core/utils/helpers/float_ops.h
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_UTILS_HELPERS_FLOAT_OPS_H
+#define ARM_COMPUTE_UTILS_HELPERS_FLOAT_OPS_H
+
+namespace arm_compute
+{
+namespace helpers
+{
+namespace float_ops
+{
+union RawFloat
+{
+ /** Constructor
+ *
+ * @param[in] val Floating-point value
+ */
+ explicit RawFloat(float val)
+ : f32(val)
+ {
+ }
+ /** Extract sign of floating point number
+ *
+ * @return Sign of floating point number
+ */
+ int32_t sign() const
+ {
+ return i32 >> 31;
+ }
+ /** Extract exponent of floating point number
+ *
+ * @return Exponent of floating point number
+ */
+ int32_t exponent() const
+ {
+ return (i32 >> 23) & 0xFF;
+ }
+ /** Extract mantissa of floating point number
+ *
+ * @return Mantissa of floating point number
+ */
+ int32_t mantissa() const
+ {
+ return i32 & 0x007FFFFF;
+ }
+
+ int32_t i32;
+ float f32;
+};
+
+/** Checks if two floating point numbers are equal given an allowed number of ULPs
+ *
+ * @param[in] a First number to compare
+ * @param[in] b Second number to compare
+ * @param[in] max_allowed_ulps (Optional) Number of allowed ULPs
+ *
+ * @return True if number is close else false
+ */
+inline bool is_equal_ulps(float a, float b, int max_allowed_ulps = 0)
+{
+ RawFloat ra(a);
+ RawFloat rb(b);
+
+ // Check ULP distance
+ const int ulps = std::abs(ra.i32 - rb.i32);
+ return ulps <= max_allowed_ulps;
+}
+
+/** Checks if the input floating point number is 1.0f checking if the difference is within a range defined with epsilon
+ *
+ * @param[in] a Input floating point number
+ * @param[in] epsilon (Optional) Epsilon used to define the error bounds
+ *
+ * @return True if number is close to 1.0f
+ */
+inline bool is_one(float a, float epsilon = 0.00001f)
+{
+ return std::abs(1.0f - a) <= epsilon;
+}
+
+/** Checks if the input floating point number is 0.0f checking if the difference is within a range defined with epsilon
+ *
+ * @param[in] a Input floating point number
+ * @param[in] epsilon (Optional) Epsilon used to define the error bounds
+ *
+ * @return True if number is close to 0.0f
+ */
+inline bool is_zero(float a, float epsilon = 0.00001f)
+{
+ return std::abs(0.0f - a) <= epsilon;
+}
+} // namespace float_ops
+} // namespace helpers
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_UTILS_HELPERS_FLOAT_OPS_H */
diff --git a/src/core/utils/helpers/tensor_info.h b/src/core/utils/helpers/tensor_info.h
new file mode 100644
index 0000000000..9279532e2a
--- /dev/null
+++ b/src/core/utils/helpers/tensor_info.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_UTILS_HELPERS_TENSOR_INFO_H
+#define ARM_COMPUTE_UTILS_HELPERS_TENSOR_INFO_H
+
+#include "arm_compute/core/ITensorInfo.h"
+
+namespace arm_compute
+{
+namespace helpers
+{
+namespace tensor_info
+{
+/** Checks if the quantization info of given tensors are different
+ *
+ * @param tensor_info_1 Tensor info of the first tensor
+ * @param tensor_info_2 Tensor info of the second tensor
+ * @param tensor_infos Tensor infos of the rest tensors
+ *
+ * @return True if tensors have mismatching quantization info else false.
+ */
+template <typename... Ts>
+inline bool tensors_have_different_quantization_info(const ITensorInfo *tensor_info_1, const ITensorInfo *tensor_info_2, Ts... tensor_infos)
+{
+ const QuantizationInfo first_quantization_info = tensor_info_1->quantization_info();
+
+ const std::array < const ITensorInfo *, 1 + sizeof...(Ts) > tensor_infos_array{ { tensor_info_2, std::forward<Ts>(tensor_infos)... } };
+ return std::any_of(tensor_infos_array.begin(), tensor_infos_array.end(), [&](const ITensorInfo * tensor_info)
+ {
+ return tensor_info->quantization_info() != first_quantization_info;
+ });
+}
+} // namespace tensor_info
+} // namespace helpers
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_UTILS_HELPERS_TENSOR_INFO_H */
diff --git a/src/core/utils/helpers/tensor_transform.cpp b/src/core/utils/helpers/tensor_transform.cpp
index 84302ea19f..f2216995a9 100644
--- a/src/core/utils/helpers/tensor_transform.cpp
+++ b/src/core/utils/helpers/tensor_transform.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,7 +23,7 @@
*/
#include "arm_compute/core/utils/helpers/tensor_transform.h"
-#include "arm_compute/core/utils/helpers/bit_ops.h"
+#include "bit_ops.h"
namespace arm_compute
{
diff --git a/src/graph/algorithms/TopologicalSort.cpp b/src/graph/algorithms/TopologicalSort.cpp
index 3647e13e92..3a69352471 100644
--- a/src/graph/algorithms/TopologicalSort.cpp
+++ b/src/graph/algorithms/TopologicalSort.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,7 +25,7 @@
#include "arm_compute/graph/Graph.h"
-#include "arm_compute/core/utils/misc/Iterable.h"
+#include "support/Iterable.h"
#include <list>
#include <stack>
@@ -185,4 +185,4 @@ std::vector<NodeID> dfs(Graph &g)
return dfs_order_vector;
}
} // namespace graph
-} // namespace arm_compute \ No newline at end of file
+} // namespace arm_compute
diff --git a/src/graph/backends/CL/CLFunctionsFactory.cpp b/src/graph/backends/CL/CLFunctionsFactory.cpp
index 5f2f46f72a..cd732553be 100644
--- a/src/graph/backends/CL/CLFunctionsFactory.cpp
+++ b/src/graph/backends/CL/CLFunctionsFactory.cpp
@@ -23,12 +23,12 @@
*/
#include "arm_compute/graph/backends/CL/CLFunctionFactory.h"
-#include "arm_compute/core/utils/misc/Cast.h"
#include "arm_compute/graph/Graph.h"
#include "arm_compute/graph/GraphContext.h"
#include "arm_compute/graph/backends/FunctionHelpers.h"
#include "arm_compute/runtime/CL/CLFunctions.h"
#include "arm_compute/runtime/CPP/CPPFunctions.h"
+#include "support/Cast.h"
using namespace arm_compute::utils::cast;
diff --git a/src/graph/backends/CL/CLNodeValidator.cpp b/src/graph/backends/CL/CLNodeValidator.cpp
index 8b2ecaf20e..8c1fedd93f 100644
--- a/src/graph/backends/CL/CLNodeValidator.cpp
+++ b/src/graph/backends/CL/CLNodeValidator.cpp
@@ -26,9 +26,9 @@
#include "arm_compute/graph/backends/ValidateHelpers.h"
#include "arm_compute/graph/nodes/Nodes.h"
-#include "arm_compute/core/utils/misc/Cast.h"
#include "arm_compute/runtime/CL/CLFunctions.h"
#include "arm_compute/runtime/CPP/CPPFunctions.h"
+#include "support/Cast.h"
using namespace arm_compute::utils::cast;
diff --git a/src/graph/backends/CL/CLSubTensorHandle.cpp b/src/graph/backends/CL/CLSubTensorHandle.cpp
index ada0d686ed..b97d25890a 100644
--- a/src/graph/backends/CL/CLSubTensorHandle.cpp
+++ b/src/graph/backends/CL/CLSubTensorHandle.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,7 +23,7 @@
*/
#include "arm_compute/graph/backends/CL/CLSubTensorHandle.h"
-#include "arm_compute/core/utils/misc/Cast.h"
+#include "support/Cast.h"
namespace arm_compute
{
diff --git a/src/graph/backends/GLES/GCFunctionsFactory.cpp b/src/graph/backends/GLES/GCFunctionsFactory.cpp
index 8ecb593e11..7d9d388ebe 100644
--- a/src/graph/backends/GLES/GCFunctionsFactory.cpp
+++ b/src/graph/backends/GLES/GCFunctionsFactory.cpp
@@ -23,11 +23,11 @@
*/
#include "arm_compute/graph/backends/GLES/GCFunctionFactory.h"
-#include "arm_compute/core/utils/misc/Cast.h"
#include "arm_compute/graph/Graph.h"
#include "arm_compute/graph/GraphContext.h"
#include "arm_compute/graph/backends/FunctionHelpers.h"
#include "arm_compute/runtime/GLES_COMPUTE/GCFunctions.h"
+#include "support/Cast.h"
using namespace arm_compute::utils::cast;
diff --git a/src/graph/backends/GLES/GCNodeValidator.cpp b/src/graph/backends/GLES/GCNodeValidator.cpp
index 159e51246a..13a93a2556 100644
--- a/src/graph/backends/GLES/GCNodeValidator.cpp
+++ b/src/graph/backends/GLES/GCNodeValidator.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,8 +26,8 @@
#include "arm_compute/graph/backends/ValidateHelpers.h"
#include "arm_compute/graph/nodes/Nodes.h"
-#include "arm_compute/core/utils/misc/Cast.h"
#include "arm_compute/runtime/GLES_COMPUTE/GCFunctions.h"
+#include "support/Cast.h"
using namespace arm_compute::utils::cast;
diff --git a/src/graph/backends/NEON/NEFunctionFactory.cpp b/src/graph/backends/NEON/NEFunctionFactory.cpp
index 61df45d8d0..95c6631830 100644
--- a/src/graph/backends/NEON/NEFunctionFactory.cpp
+++ b/src/graph/backends/NEON/NEFunctionFactory.cpp
@@ -23,7 +23,6 @@
*/
#include "arm_compute/graph/backends/NEON/NEFunctionFactory.h"
-#include "arm_compute/core/utils/misc/Cast.h"
#include "arm_compute/graph/Graph.h"
#include "arm_compute/graph/GraphContext.h"
#include "arm_compute/graph/Logger.h"
@@ -33,6 +32,7 @@
#include "arm_compute/graph/nodes/Nodes.h"
#include "arm_compute/runtime/CPP/CPPFunctions.h"
#include "arm_compute/runtime/NEON/NEFunctions.h"
+#include "support/Cast.h"
#include "support/ToolchainSupport.h"
using namespace arm_compute::utils::cast;
diff --git a/src/graph/backends/NEON/NENodeValidator.cpp b/src/graph/backends/NEON/NENodeValidator.cpp
index 19c96eab50..63e8ff910f 100644
--- a/src/graph/backends/NEON/NENodeValidator.cpp
+++ b/src/graph/backends/NEON/NENodeValidator.cpp
@@ -26,9 +26,9 @@
#include "arm_compute/graph/backends/ValidateHelpers.h"
#include "arm_compute/graph/nodes/Nodes.h"
-#include "arm_compute/core/utils/misc/Cast.h"
#include "arm_compute/runtime/CPP/CPPFunctions.h"
#include "arm_compute/runtime/NEON/NEFunctions.h"
+#include "support/Cast.h"
using namespace arm_compute::utils::cast;
diff --git a/src/graph/backends/NEON/NETensorHandle.cpp b/src/graph/backends/NEON/NETensorHandle.cpp
index c8fc3f1ae2..4393156e8a 100644
--- a/src/graph/backends/NEON/NETensorHandle.cpp
+++ b/src/graph/backends/NEON/NETensorHandle.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,8 +23,8 @@
*/
#include "arm_compute/graph/backends/NEON/NETensorHandle.h"
-#include "arm_compute/core/utils/misc/Cast.h"
#include "arm_compute/runtime/MemoryGroup.h"
+#include "support/Cast.h"
namespace arm_compute
{
diff --git a/src/graph/detail/CrossLayerMemoryManagerHelpers.cpp b/src/graph/detail/CrossLayerMemoryManagerHelpers.cpp
index fd16625780..b45f453f23 100644
--- a/src/graph/detail/CrossLayerMemoryManagerHelpers.cpp
+++ b/src/graph/detail/CrossLayerMemoryManagerHelpers.cpp
@@ -33,7 +33,7 @@
#include "arm_compute/graph/backends/BackendRegistry.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/utils/misc/Cast.h"
+#include "support/Cast.h"
#include <algorithm>
#include <map>
diff --git a/src/graph/mutators/DepthConcatSubTensorMutator.cpp b/src/graph/mutators/DepthConcatSubTensorMutator.cpp
index fa63f5625b..963b948432 100644
--- a/src/graph/mutators/DepthConcatSubTensorMutator.cpp
+++ b/src/graph/mutators/DepthConcatSubTensorMutator.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -30,8 +30,8 @@
#include "arm_compute/graph/backends/BackendRegistry.h"
#include "arm_compute/graph/nodes/ConcatenateLayerNode.h"
-#include "arm_compute/core/utils/misc/Cast.h"
-#include "arm_compute/core/utils/misc/Iterable.h"
+#include "support/Cast.h"
+#include "support/Iterable.h"
namespace arm_compute
{
diff --git a/src/graph/mutators/GroupedConvolutionMutator.cpp b/src/graph/mutators/GroupedConvolutionMutator.cpp
index e3d3812c1d..b7c551ce8b 100644
--- a/src/graph/mutators/GroupedConvolutionMutator.cpp
+++ b/src/graph/mutators/GroupedConvolutionMutator.cpp
@@ -30,7 +30,7 @@
#include "arm_compute/graph/backends/BackendRegistry.h"
#include "arm_compute/graph/nodes/Nodes.h"
-#include "arm_compute/core/utils/misc/Cast.h"
+#include "support/Cast.h"
#include "support/StringSupport.h"
diff --git a/src/graph/mutators/NodeExecutionMethodMutator.cpp b/src/graph/mutators/NodeExecutionMethodMutator.cpp
index 48bb9f7fc0..09a3cf50c0 100644
--- a/src/graph/mutators/NodeExecutionMethodMutator.cpp
+++ b/src/graph/mutators/NodeExecutionMethodMutator.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -29,7 +29,7 @@
#include "arm_compute/graph/backends/BackendRegistry.h"
#include "arm_compute/graph/nodes/Nodes.h"
-#include "arm_compute/core/utils/misc/Cast.h"
+#include "support/Cast.h"
namespace arm_compute
{
diff --git a/src/graph/mutators/NodeFusionMutator.cpp b/src/graph/mutators/NodeFusionMutator.cpp
index 2a80825b36..1d47668cf2 100644
--- a/src/graph/mutators/NodeFusionMutator.cpp
+++ b/src/graph/mutators/NodeFusionMutator.cpp
@@ -30,7 +30,7 @@
#include "arm_compute/graph/nodes/FusedConvolutionBatchNormalizationNode.h"
#include "arm_compute/graph/nodes/Nodes.h"
-#include "arm_compute/core/utils/misc/Cast.h"
+#include "support/Cast.h"
#include <set>
@@ -300,11 +300,12 @@ IGraphMutator::MutationType NodeFusionMutator::type() const
void NodeFusionMutator::mutate(Graph &g)
{
// Supported activations when fusing
- const std::set<Activation> supported_fused_activations = { Activation::ABS, Activation::BOUNDED_RELU, Activation::ELU,
- Activation::HARD_SWISH, Activation::IDENTITY, Activation::LEAKY_RELU,
- Activation::LINEAR, Activation::LOGISTIC, Activation::LU_BOUNDED_RELU,
- Activation::RELU, Activation::SOFT_RELU, Activation::SQRT,
- Activation::SQUARE, Activation::TANH };
+ const std::set<Activation> supported_fused_activations = { Activation::ABS, Activation::BOUNDED_RELU, Activation::ELU,
+ Activation::HARD_SWISH, Activation::IDENTITY, Activation::LEAKY_RELU,
+ Activation::LINEAR, Activation::LOGISTIC, Activation::LU_BOUNDED_RELU,
+ Activation::RELU, Activation::SOFT_RELU, Activation::SQRT,
+ Activation::SQUARE, Activation::TANH
+ };
// Preconditions
auto empty_prec = [](INode &)
diff --git a/src/graph/mutators/SplitLayerSubTensorMutator.cpp b/src/graph/mutators/SplitLayerSubTensorMutator.cpp
index 359bba47ef..2c28a1a2d1 100644
--- a/src/graph/mutators/SplitLayerSubTensorMutator.cpp
+++ b/src/graph/mutators/SplitLayerSubTensorMutator.cpp
@@ -30,8 +30,8 @@
#include "arm_compute/graph/backends/BackendRegistry.h"
#include "arm_compute/graph/nodes/SplitLayerNode.h"
-#include "arm_compute/core/utils/misc/Cast.h"
-#include "arm_compute/core/utils/misc/Iterable.h"
+#include "support/Cast.h"
+#include "support/Iterable.h"
namespace arm_compute
{
diff --git a/src/graph/mutators/SyntheticDataTypeMutator.cpp b/src/graph/mutators/SyntheticDataTypeMutator.cpp
index dbbebdfb2b..532c0e821b 100644
--- a/src/graph/mutators/SyntheticDataTypeMutator.cpp
+++ b/src/graph/mutators/SyntheticDataTypeMutator.cpp
@@ -29,7 +29,7 @@
#include "arm_compute/graph/Utils.h"
#include "arm_compute/graph/nodes/Nodes.h"
-#include "arm_compute/core/utils/misc/Cast.h"
+#include "support/Cast.h"
#include <set>
diff --git a/src/runtime/CL/CLHelpers.cpp b/src/runtime/CL/CLHelpers.cpp
index adfdc3c917..5f1842f76d 100644
--- a/src/runtime/CL/CLHelpers.cpp
+++ b/src/runtime/CL/CLHelpers.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,6 +26,7 @@
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/CL/CLRuntimeContext.h"
namespace
diff --git a/src/runtime/CL/CLMemory.cpp b/src/runtime/CL/CLMemory.cpp
index efbc68f50e..a1743c56e6 100644
--- a/src/runtime/CL/CLMemory.cpp
+++ b/src/runtime/CL/CLMemory.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,7 +24,7 @@
#include "arm_compute/runtime/CL/CLMemory.h"
#include "arm_compute/core/Error.h"
-#include "arm_compute/core/utils/misc/Cast.h"
+#include "support/Cast.h"
namespace arm_compute
{
diff --git a/src/runtime/CL/CLRuntimeContext.cpp b/src/runtime/CL/CLRuntimeContext.cpp
index 2fc7f93adf..571e30931c 100644
--- a/src/runtime/CL/CLRuntimeContext.cpp
+++ b/src/runtime/CL/CLRuntimeContext.cpp
@@ -26,6 +26,8 @@
#include "arm_compute/runtime/CL/CLHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "support/MemorySupport.h"
+
namespace arm_compute
{
CLRuntimeContext::CLRuntimeContext()
diff --git a/src/runtime/CL/CLTensorAllocator.cpp b/src/runtime/CL/CLTensorAllocator.cpp
index 90d77883f6..f37fc779fe 100644
--- a/src/runtime/CL/CLTensorAllocator.cpp
+++ b/src/runtime/CL/CLTensorAllocator.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -28,6 +28,8 @@
#include "arm_compute/runtime/CL/CLRuntimeContext.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "support/MemorySupport.h"
+
namespace arm_compute
{
const cl::Buffer CLTensorAllocator::_empty_buffer = cl::Buffer();
diff --git a/src/runtime/CL/functions/CLArgMinMaxLayer.cpp b/src/runtime/CL/functions/CLArgMinMaxLayer.cpp
index ad6e7ba97b..57c4f685f6 100644
--- a/src/runtime/CL/functions/CLArgMinMaxLayer.cpp
+++ b/src/runtime/CL/functions/CLArgMinMaxLayer.cpp
@@ -24,13 +24,14 @@
#include "arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/runtime/Utils.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/runtime/Utils.h"
namespace arm_compute
{
@@ -47,7 +48,7 @@ Status CLArgMinMaxLayer::validate(const ITensorInfo *input, int axis, const ITen
ARM_COMPUTE_RETURN_ERROR_ON_MSG(op != ReductionOperation::ARG_IDX_MAX && op != ReductionOperation::ARG_IDX_MIN, "Invalid reduction operation");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= static_cast<int>(TensorShape::num_max_dimensions), "Reduction axis greater than max number of dimensions");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis > 3, "Unsupported reduction axis");
- const unsigned int num_of_stages = calculate_number_of_stages_only_x_axis(input->dimension(0), axis);
+ const unsigned int num_of_stages = utils::calculate_number_of_stages_only_x_axis(input->dimension(0), axis);
DataType output_data_type = DataType::S32;
TensorInfo not_reshaped_output;
@@ -115,7 +116,7 @@ void CLArgMinMaxLayer::configure(const ICLTensor *input, int axis, ICLTensor *ou
void CLArgMinMaxLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, int axis, ICLTensor *output, const ReductionOperation &op)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- _num_of_stages = calculate_number_of_stages_only_x_axis(input->info()->dimension(0), axis);
+ _num_of_stages = utils::calculate_number_of_stages_only_x_axis(input->info()->dimension(0), axis);
_reduction_axis = axis;
const TensorShape output_shape = arm_compute::misc::shape_calculator::compute_reduced_shape(input->info()->tensor_shape(), axis, false);
@@ -172,4 +173,4 @@ void CLArgMinMaxLayer::run()
}
_reshape.run();
}
-} // namespace arm_compute \ No newline at end of file
+} // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLConcatenateLayer.cpp b/src/runtime/CL/functions/CLConcatenateLayer.cpp
index 4214813446..2eb310b893 100644
--- a/src/runtime/CL/functions/CLConcatenateLayer.cpp
+++ b/src/runtime/CL/functions/CLConcatenateLayer.cpp
@@ -36,6 +36,7 @@
#include "arm_compute/core/Error.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Types.h"
+#include "src/core/helpers/AutoConfiguration.h"
#include "support/MemorySupport.h"
namespace arm_compute
diff --git a/src/runtime/CL/functions/CLConvertFullyConnectedWeights.cpp b/src/runtime/CL/functions/CLConvertFullyConnectedWeights.cpp
index 4c787673b5..b291ae5b88 100644
--- a/src/runtime/CL/functions/CLConvertFullyConnectedWeights.cpp
+++ b/src/runtime/CL/functions/CLConvertFullyConnectedWeights.cpp
@@ -23,6 +23,8 @@
*/
#include "arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h"
+#include "support/MemorySupport.h"
+
namespace arm_compute
{
void CLConvertFullyConnectedWeights::configure(const ICLTensor *input, ICLTensor *output, const TensorShape &original_input_shape,
diff --git a/src/runtime/CL/functions/CLConvolutionLayer.cpp b/src/runtime/CL/functions/CLConvolutionLayer.cpp
index 630352e4e6..85355f0f17 100644
--- a/src/runtime/CL/functions/CLConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLConvolutionLayer.cpp
@@ -30,6 +30,8 @@
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "support/MemorySupport.h"
+
#include <cmath>
#include <memory>
#include <tuple>
diff --git a/src/runtime/CL/functions/CLCropResize.cpp b/src/runtime/CL/functions/CLCropResize.cpp
index 529f7bfb3e..6167e9de0a 100644
--- a/src/runtime/CL/functions/CLCropResize.cpp
+++ b/src/runtime/CL/functions/CLCropResize.cpp
@@ -25,6 +25,10 @@
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
+
+#include "support/MemorySupport.h"
#include <cstddef>
diff --git a/src/runtime/CL/functions/CLDeconvolutionLayer.cpp b/src/runtime/CL/functions/CLDeconvolutionLayer.cpp
index cd55336d9a..e6717b6d01 100644
--- a/src/runtime/CL/functions/CLDeconvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLDeconvolutionLayer.cpp
@@ -29,6 +29,8 @@
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "support/MemorySupport.h"
+
#include <cmath>
#include <memory>
#include <tuple>
diff --git a/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp b/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp
index c1055dda36..07e7a18941 100644
--- a/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp
@@ -43,7 +43,7 @@ void CLDirectConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weig
}
void CLDirectConvolutionLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,
- const PadStrideInfo &conv_info,
+ const PadStrideInfo &conv_info,
const ActivationLayerInfo &act_info)
{
// Set GPU target
diff --git a/src/runtime/CL/functions/CLDirectDeconvolutionLayer.cpp b/src/runtime/CL/functions/CLDirectDeconvolutionLayer.cpp
index 3515c25d82..0ffafa0221 100644
--- a/src/runtime/CL/functions/CLDirectDeconvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLDirectDeconvolutionLayer.cpp
@@ -28,6 +28,7 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/helpers/AutoConfiguration.h"
#include <memory>
#include <tuple>
diff --git a/src/runtime/CL/functions/CLFFT1D.cpp b/src/runtime/CL/functions/CLFFT1D.cpp
index 7d15d33ab5..1269cba90d 100644
--- a/src/runtime/CL/functions/CLFFT1D.cpp
+++ b/src/runtime/CL/functions/CLFFT1D.cpp
@@ -25,8 +25,8 @@
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/utils/helpers/fft.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/utils/helpers/fft.h"
namespace arm_compute
{
diff --git a/src/runtime/CL/functions/CLFFTConvolutionLayer.cpp b/src/runtime/CL/functions/CLFFTConvolutionLayer.cpp
index 1def674bb6..4d0eab81ee 100644
--- a/src/runtime/CL/functions/CLFFTConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLFFTConvolutionLayer.cpp
@@ -26,10 +26,13 @@
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/utils/helpers/fft.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/CPP/CPPScheduler.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/utils/helpers/fft.h"
+
+#include "support/MemorySupport.h"
namespace arm_compute
{
diff --git a/src/runtime/CL/functions/CLFill.cpp b/src/runtime/CL/functions/CLFill.cpp
index 6c0f1786f0..a89383ec31 100644
--- a/src/runtime/CL/functions/CLFill.cpp
+++ b/src/runtime/CL/functions/CLFill.cpp
@@ -26,6 +26,8 @@
#include "arm_compute/core/CL/kernels/CLMemsetKernel.h"
#include "arm_compute/core/Types.h"
+#include "support/MemorySupport.h"
+
#include <utility>
namespace arm_compute
diff --git a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
index 4f365b6a61..75e87c382b 100644
--- a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
+++ b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
@@ -25,10 +25,10 @@
#include "arm_compute/core/Size2D.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/utils/misc/Cast.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "support/Cast.h"
#include "support/MemorySupport.h"
#include <algorithm>
diff --git a/src/runtime/CL/functions/CLGEMM.cpp b/src/runtime/CL/functions/CLGEMM.cpp
index d56b341abf..ccae6713a6 100644
--- a/src/runtime/CL/functions/CLGEMM.cpp
+++ b/src/runtime/CL/functions/CLGEMM.cpp
@@ -23,10 +23,7 @@
*/
#include "arm_compute/runtime/CL/functions/CLGEMM.h"
-#include "arm_compute/core/CL/ICLGEMMKernelConfiguration.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfiguration.h"
-#include "arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/GPUTarget.h"
#include "arm_compute/core/Helpers.h"
@@ -35,12 +32,18 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/utils/helpers/float_ops.h"
-#include "arm_compute/core/utils/misc/Cast.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "arm_compute/runtime/CL/gemm/CLGEMMKernelSelection.h"
#include "arm_compute/runtime/ITensorAllocator.h"
+#include "src/core/CL/ICLGEMMKernelConfiguration.h"
+#include "src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfiguration.h"
+#include "src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/utils/helpers/float_ops.h"
+#include "src/runtime/CL/gemm/CLGEMMKernelSelection.h"
+#include "support/Cast.h"
+
+#include "support/MemorySupport.h"
namespace arm_compute
{
diff --git a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
index ee90b39c2b..e871b39805 100644
--- a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
@@ -27,10 +27,11 @@
#include "arm_compute/core/Size2D.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/utils/misc/Cast.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "support/Cast.h"
#include <cmath>
#include <memory>
diff --git a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
index 30dce5b8fe..7a8de6c1f5 100644
--- a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
+++ b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
@@ -24,8 +24,6 @@
#include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfiguration.h"
-#include "arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/KernelDescriptors.h"
@@ -35,7 +33,10 @@
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "arm_compute/runtime/CL/gemm/CLGEMMKernelSelection.h"
+#include "src/core/CL/gemm/native/CLGEMMNativeKernelConfiguration.h"
+#include "src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/runtime/CL/gemm/CLGEMMKernelSelection.h"
namespace arm_compute
{
diff --git a/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp b/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp
index 45dc402449..5291de074a 100644
--- a/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp
+++ b/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp
@@ -25,6 +25,7 @@
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Types.h"
+#include "src/core/helpers/AutoConfiguration.h"
namespace arm_compute
{
diff --git a/src/runtime/CL/functions/CLInstanceNormalizationLayer.cpp b/src/runtime/CL/functions/CLInstanceNormalizationLayer.cpp
index fce1fe43a2..4a60ee9d08 100644
--- a/src/runtime/CL/functions/CLInstanceNormalizationLayer.cpp
+++ b/src/runtime/CL/functions/CLInstanceNormalizationLayer.cpp
@@ -26,6 +26,8 @@
#include "arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernel.h"
#include "arm_compute/core/Types.h"
+#include "support/MemorySupport.h"
+
namespace arm_compute
{
CLInstanceNormalizationLayer::CLInstanceNormalizationLayer()
diff --git a/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp b/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp
index e30b1dbb86..76a531b1c9 100644
--- a/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp
+++ b/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp
@@ -27,6 +27,7 @@
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
+#include "src/core/helpers/AutoConfiguration.h"
#include <memory>
diff --git a/src/runtime/CL/functions/CLPriorBoxLayer.cpp b/src/runtime/CL/functions/CLPriorBoxLayer.cpp
index 1907c7cc08..fefbff639d 100644
--- a/src/runtime/CL/functions/CLPriorBoxLayer.cpp
+++ b/src/runtime/CL/functions/CLPriorBoxLayer.cpp
@@ -31,6 +31,8 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "support/MemorySupport.h"
+
using namespace arm_compute;
CLPriorBoxLayer::CLPriorBoxLayer()
diff --git a/src/runtime/CL/functions/CLQLSTMLayer.cpp b/src/runtime/CL/functions/CLQLSTMLayer.cpp
index 15a54c7928..c493471667 100644
--- a/src/runtime/CL/functions/CLQLSTMLayer.cpp
+++ b/src/runtime/CL/functions/CLQLSTMLayer.cpp
@@ -30,6 +30,7 @@
#include "arm_compute/core/utils/misc/InfoHelpers.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/helpers/WindowHelpers.h"
namespace arm_compute
{
diff --git a/src/runtime/CL/functions/CLReduceMean.cpp b/src/runtime/CL/functions/CLReduceMean.cpp
index 0e2ede7167..4ea7f7642f 100644
--- a/src/runtime/CL/functions/CLReduceMean.cpp
+++ b/src/runtime/CL/functions/CLReduceMean.cpp
@@ -23,12 +23,13 @@
*/
#include "arm_compute/runtime/CL/functions/CLReduceMean.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/kernels/CLReductionOperationKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
namespace arm_compute
{
diff --git a/src/runtime/CL/functions/CLReductionOperation.cpp b/src/runtime/CL/functions/CLReductionOperation.cpp
index 54e91fb8d8..208371c45d 100644
--- a/src/runtime/CL/functions/CLReductionOperation.cpp
+++ b/src/runtime/CL/functions/CLReductionOperation.cpp
@@ -30,7 +30,9 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "arm_compute/runtime/Utils.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/runtime/Utils.h"
+
#include "support/MemorySupport.h"
namespace arm_compute
@@ -47,7 +49,7 @@ Status CLReductionOperation::validate(const ITensorInfo *input, const ITensorInf
ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions, "Reduction axis greater than max number of dimensions");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis > 3, "Unsupported reduction axis");
- const unsigned int num_of_stages = calculate_number_of_stages_only_x_axis(input->dimension(0), axis);
+ const unsigned int num_of_stages = utils::calculate_number_of_stages_only_x_axis(input->dimension(0), axis);
const bool is_serial = needs_serialized_reduction(op, input->data_type(), axis);
const bool is_reshape_required = !keep_dims;
@@ -194,7 +196,7 @@ void CLReductionOperation::configure(ICLTensor *input, ICLTensor *output, unsign
void CLReductionOperation::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op, bool keep_dims)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- _num_of_stages = calculate_number_of_stages_only_x_axis(input->info()->dimension(0), axis);
+ _num_of_stages = utils::calculate_number_of_stages_only_x_axis(input->info()->dimension(0), axis);
_reduction_axis = axis;
_is_serial = needs_serialized_reduction(op, input->info()->data_type(), axis);
_is_reshape_required = !keep_dims;
diff --git a/src/runtime/CL/functions/CLRemap.cpp b/src/runtime/CL/functions/CLRemap.cpp
index 60b72c5f87..1e3d614402 100644
--- a/src/runtime/CL/functions/CLRemap.cpp
+++ b/src/runtime/CL/functions/CLRemap.cpp
@@ -42,7 +42,7 @@ void CLRemap::configure(ICLTensor *input, const ICLTensor *map_x, const ICLTenso
void CLRemap::configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, InterpolationPolicy policy,
BorderMode border_mode,
- uint8_t constant_border_value)
+ uint8_t constant_border_value)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
diff --git a/src/runtime/CL/functions/CLSelect.cpp b/src/runtime/CL/functions/CLSelect.cpp
index c7d7df75d2..ef8010847b 100644
--- a/src/runtime/CL/functions/CLSelect.cpp
+++ b/src/runtime/CL/functions/CLSelect.cpp
@@ -27,6 +27,8 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "support/MemorySupport.h"
+
using namespace arm_compute;
namespace arm_compute
diff --git a/src/runtime/CL/functions/CLSoftmaxLayer.cpp b/src/runtime/CL/functions/CLSoftmaxLayer.cpp
index 720f9111a5..759c8706a1 100644
--- a/src/runtime/CL/functions/CLSoftmaxLayer.cpp
+++ b/src/runtime/CL/functions/CLSoftmaxLayer.cpp
@@ -31,6 +31,7 @@
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/helpers/SoftmaxHelpers.h"
namespace arm_compute
{
@@ -63,7 +64,7 @@ void CLSoftmaxLayerGeneric<IS_LOG>::configure(const CLCompileContext &compile_co
{
_memory_group.manage(&_input_permuted);
_memory_group.manage(&_output_permuted);
- _permute_input.configure(compile_context, input, &_input_permuted, get_permutation_vector_from_softmax_axis(actual_axis));
+ _permute_input.configure(compile_context, input, &_input_permuted, softmax_helpers::get_permutation_vector_from_softmax_axis(actual_axis));
tmp_output = &_output_permuted;
}
@@ -99,7 +100,7 @@ void CLSoftmaxLayerGeneric<IS_LOG>::configure(const CLCompileContext &compile_co
_sum.allocator()->allocate();
if(_needs_permute)
{
- _permute_output.configure(compile_context, &_output_permuted, output, get_permutation_vector_from_softmax_axis(actual_axis));
+ _permute_output.configure(compile_context, &_output_permuted, output, softmax_helpers::get_permutation_vector_from_softmax_axis(actual_axis));
_input_permuted.allocator()->allocate();
_output_permuted.allocator()->allocate();
}
@@ -117,7 +118,7 @@ Status CLSoftmaxLayerGeneric<IS_LOG>::validate(const ITensorInfo *input, const I
const bool needs_permute = actual_axis != 0;
if(needs_permute)
{
- const PermutationVector permutation_vector = get_permutation_vector_from_softmax_axis(actual_axis);
+ const PermutationVector permutation_vector = softmax_helpers::get_permutation_vector_from_softmax_axis(actual_axis);
const TensorShape permuted_shape = misc::shape_calculator::compute_permutation_output_shape(*input, permutation_vector);
TensorInfo input_permuted(input->clone()->set_tensor_shape(permuted_shape));
ARM_COMPUTE_RETURN_ON_ERROR(CLPermute::validate(input, &input_permuted, permutation_vector));
diff --git a/src/runtime/CL/functions/CLSplit.cpp b/src/runtime/CL/functions/CLSplit.cpp
index db0b14b9a2..0b27371e3f 100644
--- a/src/runtime/CL/functions/CLSplit.cpp
+++ b/src/runtime/CL/functions/CLSplit.cpp
@@ -30,6 +30,7 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/helpers/AutoConfiguration.h"
namespace arm_compute
{
diff --git a/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp b/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp
index 09a35a6f27..7ad017f918 100644
--- a/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp
@@ -102,7 +102,7 @@ void CLWinogradConvolutionLayer::configure(ICLTensor *input, const ICLTensor *we
}
void CLWinogradConvolutionLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,
- const PadStrideInfo &conv_info,
+ const PadStrideInfo &conv_info,
const ActivationLayerInfo &act_info, bool enable_fast_math)
{
// Get indices for the width and height
diff --git a/src/runtime/CL/gemm/CLGEMMKernelSelection.h b/src/runtime/CL/gemm/CLGEMMKernelSelection.h
new file mode 100644
index 0000000000..f6fad7e4ff
--- /dev/null
+++ b/src/runtime/CL/gemm/CLGEMMKernelSelection.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_CLGEMMKERNELSELECTION_H
+#define SRC_CLGEMMKERNELSELECTION_H
+
+#include "arm_compute/runtime/CL/ICLGEMMKernelSelection.h"
+#include "src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.h"
+#include "src/runtime/CL/gemm/CLGEMMKernelSelectionMidgard.h"
+#include "src/runtime/CL/gemm/CLGEMMKernelSelectionValhall.h"
+
+#include "support/MemorySupport.h"
+
+namespace arm_compute
+{
+namespace cl_gemm
+{
+/** CLGEMMKernelSelection factory class */
+class CLGEMMKernelSelectionFactory final
+{
+public:
+ /** Static method to select the GEMM kernel accordingly with the GPU target and GEMM's dimensionality
+ *
+ * @param[in] gpu GPU target
+ *
+ * @return CLGEMMKernelSelection class
+ */
+ static std::unique_ptr<ICLGEMMKernelSelection> create(GPUTarget gpu)
+ {
+ switch(get_arch_from_target(gpu))
+ {
+ case GPUTarget::MIDGARD:
+ return support::cpp14::make_unique<CLGEMMKernelSelectionMidgard>(gpu);
+ case GPUTarget::BIFROST:
+ return support::cpp14::make_unique<CLGEMMKernelSelectionBifrost>(gpu);
+ case GPUTarget::VALHALL:
+ return support::cpp14::make_unique<CLGEMMKernelSelectionValhall>(gpu);
+ default:
+ ARM_COMPUTE_ERROR("Not supported GPU target");
+ }
+ }
+};
+} // namespace cl_gemm
+} // namespace arm_compute
+#endif /* SRC_CLGEMMKERNELSELECTION_H */
diff --git a/src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.cpp b/src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.cpp
index b1dd690ca5..73b90568f5 100644
--- a/src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.cpp
+++ b/src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.cpp
@@ -21,11 +21,11 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.h"
+#include "src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/gemm/CLGEMMHelpers.h"
+#include "src/core/CL/gemm/CLGEMMHelpers.h"
#include <map>
#include <utility>
diff --git a/src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.h b/src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.h
new file mode 100644
index 0000000000..a495b48301
--- /dev/null
+++ b/src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_CLGEMMKERNELSELECTIONBIFROST_H
+#define SRC_CLGEMMKERNELSELECTIONBIFROST_H
+
+#include "arm_compute/runtime/CL/ICLGEMMKernelSelection.h"
+
+namespace arm_compute
+{
+namespace cl_gemm
+{
+/** Bifrost based OpenCL GEMMKernel selection */
+class CLGEMMKernelSelectionBifrost final : public ICLGEMMKernelSelection
+{
+public:
+ /** Constructor
+ *
+ * @param[in] gpu GPU target
+ */
+ CLGEMMKernelSelectionBifrost(GPUTarget gpu);
+
+ // Inherited overridden method
+ CLGEMMKernelType select_kernel(const CLGEMMKernelSelectionParams &params) override;
+
+private:
+ CLGEMMKernelType g76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant);
+ CLGEMMKernelType g71_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant);
+ CLGEMMKernelType default_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant);
+ CLGEMMKernelType default_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant);
+ CLGEMMKernelType default_q8(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant);
+};
+} // namespace cl_gemm
+} // namespace arm_compute
+#endif /* SRC_CLGEMMKERNELSELECTIONBIFROST_H */
diff --git a/src/runtime/CL/gemm/CLGEMMKernelSelectionMidgard.cpp b/src/runtime/CL/gemm/CLGEMMKernelSelectionMidgard.cpp
index 324c2f7dca..d172a827b5 100644
--- a/src/runtime/CL/gemm/CLGEMMKernelSelectionMidgard.cpp
+++ b/src/runtime/CL/gemm/CLGEMMKernelSelectionMidgard.cpp
@@ -21,12 +21,12 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/runtime/CL/gemm/CLGEMMKernelSelectionMidgard.h"
+#include "src/runtime/CL/gemm/CLGEMMKernelSelectionMidgard.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/gemm/CLGEMMHelpers.h"
#include "arm_compute/core/GPUTarget.h"
+#include "src/core/CL/gemm/CLGEMMHelpers.h"
#include <map>
#include <utility>
diff --git a/src/runtime/CL/gemm/CLGEMMKernelSelectionMidgard.h b/src/runtime/CL/gemm/CLGEMMKernelSelectionMidgard.h
new file mode 100644
index 0000000000..3f6003f7dc
--- /dev/null
+++ b/src/runtime/CL/gemm/CLGEMMKernelSelectionMidgard.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_CLGEMMKERNELSELECTIONMIDGARD_H
+#define SRC_CLGEMMKERNELSELECTIONMIDGARD_H
+
+#include "arm_compute/runtime/CL/ICLGEMMKernelSelection.h"
+
+namespace arm_compute
+{
+namespace cl_gemm
+{
+/** Midgard based OpenCL GEMMKernel selection */
+class CLGEMMKernelSelectionMidgard final : public ICLGEMMKernelSelection
+{
+public:
+ /** Constructor
+ *
+ * @param[in] gpu GPU target
+ */
+ CLGEMMKernelSelectionMidgard(GPUTarget gpu);
+
+ // Inherited overridden method
+ CLGEMMKernelType select_kernel(const CLGEMMKernelSelectionParams &params) override;
+
+private:
+ CLGEMMKernelType default_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant);
+ CLGEMMKernelType default_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant);
+ CLGEMMKernelType default_q8(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant);
+};
+} // namespace cl_gemm
+} // namespace arm_compute
+#endif /* SRC_CLGEMMKERNELSELECTIONMIDGARD_H */
diff --git a/src/runtime/CL/gemm/CLGEMMKernelSelectionValhall.cpp b/src/runtime/CL/gemm/CLGEMMKernelSelectionValhall.cpp
index c50c7ae76b..acae0e7565 100644
--- a/src/runtime/CL/gemm/CLGEMMKernelSelectionValhall.cpp
+++ b/src/runtime/CL/gemm/CLGEMMKernelSelectionValhall.cpp
@@ -21,11 +21,11 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/runtime/CL/gemm/CLGEMMKernelSelectionValhall.h"
+#include "src/runtime/CL/gemm/CLGEMMKernelSelectionValhall.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/gemm/CLGEMMHelpers.h"
+#include "src/core/CL/gemm/CLGEMMHelpers.h"
#include <map>
#include <utility>
diff --git a/src/runtime/CL/gemm/CLGEMMKernelSelectionValhall.h b/src/runtime/CL/gemm/CLGEMMKernelSelectionValhall.h
new file mode 100644
index 0000000000..cbea9ea548
--- /dev/null
+++ b/src/runtime/CL/gemm/CLGEMMKernelSelectionValhall.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_CLGEMMKERNELSELECTIONVALHALL_H
+#define SRC_CLGEMMKERNELSELECTIONVALHALL_H
+
+#include "arm_compute/runtime/CL/ICLGEMMKernelSelection.h"
+
+namespace arm_compute
+{
+namespace cl_gemm
+{
+/** Valhall based OpenCL GEMMKernel selection */
+class CLGEMMKernelSelectionValhall final : public ICLGEMMKernelSelection
+{
+public:
+ /** Constructor
+ *
+ * @param[in] gpu GPU target
+ */
+ CLGEMMKernelSelectionValhall(GPUTarget gpu);
+
+ // Inherited overridden method
+ CLGEMMKernelType select_kernel(const CLGEMMKernelSelectionParams &params) override;
+
+private:
+ CLGEMMKernelType default_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant);
+ CLGEMMKernelType default_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant);
+ CLGEMMKernelType default_q8(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant);
+};
+} // namespace cl_gemm
+} // namespace arm_compute
+#endif /* SRC_CLGEMMKERNELSELECTIONVALHALL_H */
diff --git a/src/runtime/CL/tuners/BifrostTuner.cpp b/src/runtime/CL/tuners/BifrostTuner.cpp
index 52644bf192..a6474c9835 100644
--- a/src/runtime/CL/tuners/BifrostTuner.cpp
+++ b/src/runtime/CL/tuners/BifrostTuner.cpp
@@ -25,7 +25,7 @@
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernels.h"
-#include "arm_compute/core/utils/misc/Cast.h"
+#include "support/Cast.h"
namespace arm_compute
{
diff --git a/src/runtime/CL/tuners/MidgardTuner.cpp b/src/runtime/CL/tuners/MidgardTuner.cpp
index e49e15508b..58b0d579d2 100644
--- a/src/runtime/CL/tuners/MidgardTuner.cpp
+++ b/src/runtime/CL/tuners/MidgardTuner.cpp
@@ -25,7 +25,7 @@
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernels.h"
-#include "arm_compute/core/utils/misc/Cast.h"
+#include "support/Cast.h"
namespace arm_compute
{
diff --git a/src/runtime/CPP/CPPScheduler.cpp b/src/runtime/CPP/CPPScheduler.cpp
index f017006de7..e6b0ec20b8 100644
--- a/src/runtime/CPP/CPPScheduler.cpp
+++ b/src/runtime/CPP/CPPScheduler.cpp
@@ -27,7 +27,8 @@
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Utils.h"
-#include "arm_compute/runtime/CPUUtils.h"
+#include "src/runtime/CPUUtils.h"
+#include "support/MemorySupport.h"
#include "support/Mutex.h"
#include <atomic>
diff --git a/src/runtime/CPP/functions/CPPDetectionOutputLayer.cpp b/src/runtime/CPP/functions/CPPDetectionOutputLayer.cpp
index 9d62733384..fdb4c9f0f6 100644
--- a/src/runtime/CPP/functions/CPPDetectionOutputLayer.cpp
+++ b/src/runtime/CPP/functions/CPPDetectionOutputLayer.cpp
@@ -26,6 +26,7 @@
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/helpers/AutoConfiguration.h"
#include <list>
diff --git a/src/runtime/CPP/functions/CPPDetectionPostProcessLayer.cpp b/src/runtime/CPP/functions/CPPDetectionPostProcessLayer.cpp
index 3507a3ac45..31f1fafd69 100644
--- a/src/runtime/CPP/functions/CPPDetectionPostProcessLayer.cpp
+++ b/src/runtime/CPP/functions/CPPDetectionPostProcessLayer.cpp
@@ -26,6 +26,7 @@
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/helpers/AutoConfiguration.h"
#include <cstddef>
#include <ios>
diff --git a/src/runtime/CPUUtils.cpp b/src/runtime/CPUUtils.cpp
index 4d6caaee01..a7dd464540 100644
--- a/src/runtime/CPUUtils.cpp
+++ b/src/runtime/CPUUtils.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/runtime/CPUUtils.h"
+#include "src/runtime/CPUUtils.h"
#include "arm_compute/core/CPP/CPPTypes.h"
#include "arm_compute/core/Error.h"
@@ -352,6 +352,10 @@ int get_max_cpus()
namespace arm_compute
{
+namespace utils
+{
+namespace cpu
+{
void get_cpu_configuration(CPUInfo &cpuinfo)
{
#if !defined(BARE_METAL) && (defined(__arm__) || defined(__aarch64__))
@@ -460,5 +464,6 @@ unsigned int get_threads_hint()
return num_threads_hint;
}
-
+} // namespace cpu
+} // namespace utils
} // namespace arm_compute
diff --git a/src/runtime/CPUUtils.h b/src/runtime/CPUUtils.h
new file mode 100644
index 0000000000..452d3d58ca
--- /dev/null
+++ b/src/runtime/CPUUtils.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_RUNTIME_CPU_UTILS_H
+#define ARM_COMPUTE_RUNTIME_CPU_UTILS_H
+
+namespace arm_compute
+{
+class CPUInfo;
+
+namespace utils
+{
+namespace cpu
+{
+/** This function will try to detect the CPU configuration on the system and will fill
+ * the cpuinfo object accordingly to reflect this.
+ *
+ * @param[out] cpuinfo @ref CPUInfo to be used to hold the system's cpu configuration.
+ */
+void get_cpu_configuration(CPUInfo &cpuinfo);
+/** Some systems have both big and small cores, this fuction computes the minimum number of cores
+ * that are exactly the same on the system. To maximize performance the library attempts to process
+ * workloads concurrently using as many threads as big cores are available on the system.
+ *
+ * @return The minumum number of common cores.
+ */
+unsigned int get_threads_hint();
+} // namespace cpu
+} // namespace utils
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_RUNTIME_CPU_UTILS_H */
diff --git a/src/runtime/DeviceProperties.cpp b/src/runtime/DeviceProperties.cpp
index 5d7ae020d7..ec9f4a16ed 100644
--- a/src/runtime/DeviceProperties.cpp
+++ b/src/runtime/DeviceProperties.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,12 +23,12 @@
*/
#include "arm_compute/runtime/DeviceProperties.h"
-#include "arm_compute/runtime/CPUUtils.h"
+#include "src/runtime/CPUUtils.h"
namespace arm_compute
{
DeviceProperties::DeviceProperties()
{
- get_cpu_configuration(cpu_info);
+ utils::cpu::get_cpu_configuration(cpu_info);
}
} // namespace arm_compute
diff --git a/src/runtime/GLES_COMPUTE/GCMemory.cpp b/src/runtime/GLES_COMPUTE/GCMemory.cpp
index 998f8a5cc4..4d74555f4e 100644
--- a/src/runtime/GLES_COMPUTE/GCMemory.cpp
+++ b/src/runtime/GLES_COMPUTE/GCMemory.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,8 +23,8 @@
*/
#include "arm_compute/runtime/GLES_COMPUTE/GCMemory.h"
-#include "arm_compute/core/utils/misc/Cast.h"
#include "arm_compute/runtime/GLES_COMPUTE/GCMemoryRegion.h"
+#include "support/Cast.h"
namespace arm_compute
{
diff --git a/src/runtime/GLES_COMPUTE/functions/GCConcatenateLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCConcatenateLayer.cpp
index 9e23974b8d..807412eb17 100644
--- a/src/runtime/GLES_COMPUTE/functions/GCConcatenateLayer.cpp
+++ b/src/runtime/GLES_COMPUTE/functions/GCConcatenateLayer.cpp
@@ -29,6 +29,8 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h"
+#include "src/core/helpers/AutoConfiguration.h"
+
#include "support/MemorySupport.h"
namespace arm_compute
diff --git a/src/runtime/IScheduler.cpp b/src/runtime/IScheduler.cpp
index 53df3699b0..43df3d5e23 100644
--- a/src/runtime/IScheduler.cpp
+++ b/src/runtime/IScheduler.cpp
@@ -26,17 +26,17 @@
#include "arm_compute/core/CPP/ICPPKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Window.h"
-#include "arm_compute/runtime/CPUUtils.h"
-#include "arm_compute/runtime/SchedulerUtils.h"
+#include "src/runtime/CPUUtils.h"
+#include "src/runtime/SchedulerUtils.h"
namespace arm_compute
{
IScheduler::IScheduler()
: _cpu_info()
{
- get_cpu_configuration(_cpu_info);
+ utils::cpu::get_cpu_configuration(_cpu_info);
// Work out the best possible number of execution threads
- _num_threads_hint = get_threads_hint();
+ _num_threads_hint = utils::cpu::get_threads_hint();
}
CPUInfo &IScheduler::cpu_info()
@@ -74,7 +74,7 @@ void IScheduler::schedule_common(ICPPKernel *kernel, const Hints &hints, ITensor
//in c++17 this can be swapped for auto [ m_threads, n_threads ] = split_2d(...
unsigned m_threads, n_threads;
- std::tie(m_threads, n_threads) = split_2d(this->num_threads(), m, n);
+ std::tie(m_threads, n_threads) = scheduler_utils::split_2d(this->num_threads(), m, n);
std::vector<IScheduler::Workload> workloads;
for(unsigned int ni = 0; ni != n_threads; ++ni)
diff --git a/src/runtime/NEON/INESimpleFunctionNoBorder.cpp b/src/runtime/NEON/INESimpleFunctionNoBorder.cpp
index 82316c49c6..f2181e0a74 100644
--- a/src/runtime/NEON/INESimpleFunctionNoBorder.cpp
+++ b/src/runtime/NEON/INESimpleFunctionNoBorder.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,7 +24,7 @@
#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
-#include "arm_compute/runtime/Utils.h"
+#include "src/runtime/Utils.h"
namespace arm_compute
{
@@ -36,6 +36,6 @@ INESimpleFunctionNoBorder::INESimpleFunctionNoBorder(IRuntimeContext *ctx)
void INESimpleFunctionNoBorder::run()
{
- schedule_kernel_on_ctx(_ctx, _kernel.get(), Window::DimY);
+ utils::schedule_kernel_on_ctx(_ctx, _kernel.get(), Window::DimY);
}
} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEArgMinMaxLayer.cpp b/src/runtime/NEON/functions/NEArgMinMaxLayer.cpp
index 0664d3c9d5..70bbba62ad 100644
--- a/src/runtime/NEON/functions/NEArgMinMaxLayer.cpp
+++ b/src/runtime/NEON/functions/NEArgMinMaxLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -30,6 +30,8 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
+#include "support/MemorySupport.h"
+
namespace arm_compute
{
NEArgMinMaxLayer::NEArgMinMaxLayer(std::shared_ptr<IMemoryManager> memory_manager)
diff --git a/src/runtime/NEON/functions/NEBatchNormalizationLayer.cpp b/src/runtime/NEON/functions/NEBatchNormalizationLayer.cpp
index 5a593e9c74..eab40ac5be 100644
--- a/src/runtime/NEON/functions/NEBatchNormalizationLayer.cpp
+++ b/src/runtime/NEON/functions/NEBatchNormalizationLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -30,6 +30,8 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "support/MemorySupport.h"
+
using namespace arm_compute;
NEBatchNormalizationLayer::NEBatchNormalizationLayer()
diff --git a/src/runtime/NEON/functions/NEBatchToSpaceLayer.cpp b/src/runtime/NEON/functions/NEBatchToSpaceLayer.cpp
index c06a8aa0e0..2705cffe68 100644
--- a/src/runtime/NEON/functions/NEBatchToSpaceLayer.cpp
+++ b/src/runtime/NEON/functions/NEBatchToSpaceLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -29,6 +29,8 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
+#include "support/MemorySupport.h"
+
namespace arm_compute
{
void NEBatchToSpaceLayer::configure(const ITensor *input, const ITensor *block_shape, ITensor *output)
diff --git a/src/runtime/NEON/functions/NEConcatenateLayer.cpp b/src/runtime/NEON/functions/NEConcatenateLayer.cpp
index 8df4f4cb62..72bd9e6b19 100644
--- a/src/runtime/NEON/functions/NEConcatenateLayer.cpp
+++ b/src/runtime/NEON/functions/NEConcatenateLayer.cpp
@@ -35,6 +35,7 @@
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Types.h"
+#include "src/core/helpers/AutoConfiguration.h"
#include "support/MemorySupport.h"
namespace arm_compute
diff --git a/src/runtime/NEON/functions/NECropResize.cpp b/src/runtime/NEON/functions/NECropResize.cpp
index f6ed2ec250..f8f99169aa 100644
--- a/src/runtime/NEON/functions/NECropResize.cpp
+++ b/src/runtime/NEON/functions/NECropResize.cpp
@@ -25,6 +25,8 @@
#include "arm_compute/runtime/NEON/functions/NECropResize.h"
+#include "support/MemorySupport.h"
+
#include <cstddef>
namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
index dff3070239..cb9ab168a7 100644
--- a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
@@ -28,6 +28,7 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/helpers/AutoConfiguration.h"
using namespace arm_compute::misc::shape_calculator;
diff --git a/src/runtime/NEON/functions/NEDepthToSpaceLayer.cpp b/src/runtime/NEON/functions/NEDepthToSpaceLayer.cpp
index e363f89482..0aaa37ec92 100644
--- a/src/runtime/NEON/functions/NEDepthToSpaceLayer.cpp
+++ b/src/runtime/NEON/functions/NEDepthToSpaceLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -29,6 +29,8 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
+#include "support/MemorySupport.h"
+
namespace arm_compute
{
void NEDepthToSpaceLayer::configure(const ITensor *input, ITensor *output, int32_t block_shape)
diff --git a/src/runtime/NEON/functions/NEFFT1D.cpp b/src/runtime/NEON/functions/NEFFT1D.cpp
index 744a91521f..2c53b185df 100644
--- a/src/runtime/NEON/functions/NEFFT1D.cpp
+++ b/src/runtime/NEON/functions/NEFFT1D.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,8 +25,8 @@
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/utils/helpers/fft.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/utils/helpers/fft.h"
namespace arm_compute
{
diff --git a/src/runtime/NEON/functions/NEFFTConvolutionLayer.cpp b/src/runtime/NEON/functions/NEFFTConvolutionLayer.cpp
index cd68788145..a46fc9f45f 100644
--- a/src/runtime/NEON/functions/NEFFTConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEFFTConvolutionLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,8 +26,11 @@
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/utils/helpers/fft.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/utils/helpers/fft.h"
+
+#include "support/MemorySupport.h"
namespace arm_compute
{
diff --git a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp
index 4dcf41e360..d956d16f4d 100644
--- a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp
+++ b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp
@@ -30,6 +30,8 @@
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "support/MemorySupport.h"
+
#include <algorithm>
#include <cmath>
diff --git a/src/runtime/NEON/functions/NEGEMM.cpp b/src/runtime/NEON/functions/NEGEMM.cpp
index 3b8ca44ed7..4166cff97a 100644
--- a/src/runtime/NEON/functions/NEGEMM.cpp
+++ b/src/runtime/NEON/functions/NEGEMM.cpp
@@ -23,7 +23,6 @@
*/
#include "arm_compute/runtime/NEON/functions/NEGEMM.h"
-#include "arm_compute/core/CPP/Validate.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
@@ -34,6 +33,8 @@
#include "arm_compute/runtime/NEON/NEScheduler.h"
#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h"
#include "arm_compute/runtime/TensorAllocator.h"
+#include "src/core/CPP/Validate.h"
+#include "src/core/helpers/AutoConfiguration.h"
#include <cmath>
diff --git a/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp
index ad349cb635..5b0848398d 100644
--- a/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp
+++ b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp
@@ -23,13 +23,13 @@
*/
#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h"
-#include "src/core/NEON/kernels/assembly/arm_gemm.hpp"
-
-#include "arm_compute/core/CPP/Validate.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
-#include "arm_compute/runtime/NEON/functions/NESimpleAssemblyFunction.h"
-
+#include "src/core/CPP/Validate.h"
+#include "src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h"
#include "src/core/NEON/kernels/assembly/NEGEMMAssemblyWrapperKernel.h"
+#include "src/core/NEON/kernels/assembly/arm_gemm.hpp"
+
+#include "support/MemorySupport.h"
#include <arm_neon.h>
diff --git a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp
index 83db146a8a..36357dde41 100644
--- a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp
+++ b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp
@@ -33,6 +33,7 @@
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
#include "arm_compute/runtime/TensorAllocator.h"
+#include "src/core/helpers/AutoConfiguration.h"
#include "support/MemorySupport.h"
namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp b/src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp
index 3d5377892a..13210a06cd 100644
--- a/src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp
+++ b/src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp
@@ -25,6 +25,7 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/helpers/AutoConfiguration.h"
namespace arm_compute
{
diff --git a/src/runtime/NEON/functions/NELSTMLayerQuantized.cpp b/src/runtime/NEON/functions/NELSTMLayerQuantized.cpp
index 11989d3225..7610d15787 100644
--- a/src/runtime/NEON/functions/NELSTMLayerQuantized.cpp
+++ b/src/runtime/NEON/functions/NELSTMLayerQuantized.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,6 +26,7 @@
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
+#include "src/core/helpers/AutoConfiguration.h"
#include <cmath>
#include <memory>
diff --git a/src/runtime/NEON/functions/NEPadLayer.cpp b/src/runtime/NEON/functions/NEPadLayer.cpp
index 21c349ba95..03c597a3bf 100644
--- a/src/runtime/NEON/functions/NEPadLayer.cpp
+++ b/src/runtime/NEON/functions/NEPadLayer.cpp
@@ -27,6 +27,7 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/helpers/AutoConfiguration.h"
namespace arm_compute
{
diff --git a/src/runtime/NEON/functions/NEPriorBoxLayer.cpp b/src/runtime/NEON/functions/NEPriorBoxLayer.cpp
index fda130bf69..bcf6bef9c7 100644
--- a/src/runtime/NEON/functions/NEPriorBoxLayer.cpp
+++ b/src/runtime/NEON/functions/NEPriorBoxLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -31,6 +31,8 @@
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "support/MemorySupport.h"
+
namespace arm_compute
{
void NEPriorBoxLayer::configure(const ITensor *input1, const ITensor *input2, ITensor *output, const PriorBoxLayerInfo &info)
diff --git a/src/runtime/NEON/functions/NEQLSTMLayer.cpp b/src/runtime/NEON/functions/NEQLSTMLayer.cpp
index 5a6b51337a..95f20ae1a9 100644
--- a/src/runtime/NEON/functions/NEQLSTMLayer.cpp
+++ b/src/runtime/NEON/functions/NEQLSTMLayer.cpp
@@ -30,6 +30,7 @@
#include "arm_compute/core/utils/misc/InfoHelpers.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/helpers/WindowHelpers.h"
namespace arm_compute
{
diff --git a/src/runtime/NEON/functions/NEReduceMean.cpp b/src/runtime/NEON/functions/NEReduceMean.cpp
index 021f7b530a..c3c5529c09 100644
--- a/src/runtime/NEON/functions/NEReduceMean.cpp
+++ b/src/runtime/NEON/functions/NEReduceMean.cpp
@@ -23,11 +23,12 @@
*/
#include "arm_compute/runtime/NEON/functions/NEReduceMean.h"
-#include "arm_compute/core/CPP/Validate.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/CPP/Validate.h"
+#include "src/core/helpers/AutoConfiguration.h"
namespace arm_compute
{
diff --git a/src/runtime/NEON/functions/NEReductionOperation.cpp b/src/runtime/NEON/functions/NEReductionOperation.cpp
index 91176bfa45..4938a56b3f 100644
--- a/src/runtime/NEON/functions/NEReductionOperation.cpp
+++ b/src/runtime/NEON/functions/NEReductionOperation.cpp
@@ -26,6 +26,7 @@
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/helpers/AutoConfiguration.h"
namespace arm_compute
{
diff --git a/src/runtime/NEON/functions/NEScale.cpp b/src/runtime/NEON/functions/NEScale.cpp
index 2278f07a1c..bbf8343c2b 100644
--- a/src/runtime/NEON/functions/NEScale.cpp
+++ b/src/runtime/NEON/functions/NEScale.cpp
@@ -30,12 +30,14 @@
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Window.h"
-#include "arm_compute/core/utils/misc/Rounding.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
#include "arm_compute/runtime/TensorAllocator.h"
#include "src/core/utils/ScaleUtils.h"
+#include "support/MemorySupport.h"
+#include "support/Rounding.h"
+
#include <cmath>
#include <cstddef>
#include <utility>
diff --git a/src/runtime/NEON/functions/NESimpleAssemblyFunction.cpp b/src/runtime/NEON/functions/NESimpleAssemblyFunction.cpp
index b0cafae520..d165b2235c 100644
--- a/src/runtime/NEON/functions/NESimpleAssemblyFunction.cpp
+++ b/src/runtime/NEON/functions/NESimpleAssemblyFunction.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/runtime/NEON/functions/NESimpleAssemblyFunction.h"
+#include "src/runtime/NEON/functions/NESimpleAssemblyFunction.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
diff --git a/src/runtime/NEON/functions/NESimpleAssemblyFunction.h b/src/runtime/NEON/functions/NESimpleAssemblyFunction.h
new file mode 100644
index 0000000000..e9be54d35f
--- /dev/null
+++ b/src/runtime/NEON/functions/NESimpleAssemblyFunction.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NESIMPLEASSEMBLYFUNCTION_H
+#define ARM_COMPUTE_NESIMPLEASSEMBLYFUNCTION_H
+
+#include "arm_compute/runtime/IFunction.h"
+#include "src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+/** Basic interface for functions which have a single NEON GEMM wrapper kernel to run */
+class NESimpleAssemblyFunction : public IFunction
+{
+public:
+ /** Constructor */
+ NESimpleAssemblyFunction();
+
+ /** Configure the function with the kernel to run
+ *
+ * @param[in] kernel GEMM Wrapper kernel configured and ready to run
+ *
+ * @note The kernel is expected to have a 1D window. The function will multi-thread this window across the X dimension.
+ */
+ void configure(std::unique_ptr<INEGEMMWrapperKernel> kernel);
+
+ // Inherited methods overridden:
+ void run() override final;
+
+protected:
+ std::unique_ptr<INEGEMMWrapperKernel> _kernel; /**< Kernel to run */
+};
+} //namespace arm_compute
+#endif /*ARM_COMPUTE_NESIMPLEASSEMBLYFUNCTION_H */
diff --git a/src/runtime/NEON/functions/NESoftmaxLayer.cpp b/src/runtime/NEON/functions/NESoftmaxLayer.cpp
index e763caa3a3..4f773861d2 100644
--- a/src/runtime/NEON/functions/NESoftmaxLayer.cpp
+++ b/src/runtime/NEON/functions/NESoftmaxLayer.cpp
@@ -27,6 +27,7 @@
#include "arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/helpers/SoftmaxHelpers.h"
namespace arm_compute
{
@@ -53,7 +54,7 @@ void NESoftmaxLayerGeneric<IS_LOG>::configure(ITensor *input, ITensor *output, f
// Add to the memory manager _input_permuted
_memory_group.manage(&_input_permuted);
- _permute_input.configure(input, &_input_permuted, get_permutation_vector_from_softmax_axis(actual_axis));
+ _permute_input.configure(input, &_input_permuted, softmax_helpers::get_permutation_vector_from_softmax_axis(actual_axis));
}
// We want to deal with a 2D input. Either it is the permuted version of the original input (4D case)
@@ -87,7 +88,7 @@ void NESoftmaxLayerGeneric<IS_LOG>::configure(ITensor *input, ITensor *output, f
_input_permuted.allocator()->allocate();
// Re-permute the permuted output into the requested (4D) output
- _permute_output.configure(&_output_permuted, output, get_permutation_vector_from_softmax_axis(actual_axis));
+ _permute_output.configure(&_output_permuted, output, softmax_helpers::get_permutation_vector_from_softmax_axis(actual_axis));
// Allocate the intermediate permuted tensors
_output_permuted.allocator()->allocate();
@@ -128,7 +129,7 @@ Status NESoftmaxLayerGeneric<IS_LOG>::validate(const ITensorInfo *input, const I
if(needs_permute)
{
- const PermutationVector permutation_vector = get_permutation_vector_from_softmax_axis(actual_axis);
+ const PermutationVector permutation_vector = softmax_helpers::get_permutation_vector_from_softmax_axis(actual_axis);
const TensorShape permuted_shape = misc::shape_calculator::compute_permutation_output_shape(*input, permutation_vector);
TensorInfo input_permuted(input->clone()->set_tensor_shape(permuted_shape));
ARM_COMPUTE_RETURN_ON_ERROR(NEPermute::validate(input, &input_permuted, permutation_vector));
diff --git a/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp b/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp
index 1bad310640..23b9f60c38 100644
--- a/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp
@@ -23,17 +23,17 @@
*/
#include "arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h"
-#include "arm_compute/core/CPP/Validate.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h"
+#include "src/core/CPP/Validate.h"
#include "src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h"
#include "support/MemorySupport.h"
-#include "arm_compute/core/NEON/kernels/convolution/common/utils.hpp"
+#include "src/core/NEON/kernels/convolution/common/utils.hpp"
#include "src/core/NEON/kernels/convolution/winograd/winograd.hpp"
namespace arm_compute
diff --git a/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp b/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp
index 73a7caac8b..11e89cb23b 100644
--- a/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp
+++ b/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp
@@ -24,18 +24,21 @@
#include "arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h"
-#include "arm_compute/core/CPP/Validate.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/kernels/assembly/NEDepthwiseConvolutionAssemblyKernelWrapper.h"
-#include "arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_dilated.hpp"
-#include "arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_quantized_dilated.hpp"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/utils/misc/InfoHelpers.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
+#include "src/core/CPP/Validate.h"
+#include "src/core/NEON/kernels/assembly/NEDepthwiseConvolutionAssemblyKernelWrapper.h"
+#include "src/core/NEON/kernels/convolution/depthwise/depthwise_dilated.hpp"
+#include "src/core/NEON/kernels/convolution/depthwise/depthwise_quantized_dilated.hpp"
+#include "src/core/helpers/AutoConfiguration.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "support/MemorySupport.h"
+
#include <set>
namespace arm_compute
diff --git a/src/runtime/OMP/OMPScheduler.cpp b/src/runtime/OMP/OMPScheduler.cpp
index 4c2f03a53a..bf34b0114b 100644
--- a/src/runtime/OMP/OMPScheduler.cpp
+++ b/src/runtime/OMP/OMPScheduler.cpp
@@ -27,7 +27,7 @@
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Utils.h"
-#include "arm_compute/runtime/CPUUtils.h"
+#include "src/runtime/CPUUtils.h"
#include <omp.h>
namespace arm_compute
diff --git a/src/runtime/SchedulerUtils.cpp b/src/runtime/SchedulerUtils.cpp
index 1c12e3ce58..6f9a32c879 100644
--- a/src/runtime/SchedulerUtils.cpp
+++ b/src/runtime/SchedulerUtils.cpp
@@ -21,6 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+#include "src/runtime/SchedulerUtils.h"
#include "arm_compute/core/Error.h"
@@ -28,6 +29,8 @@
namespace arm_compute
{
+namespace scheduler_utils
+{
#ifndef BARE_METAL
std::pair<unsigned, unsigned> split_2d(unsigned max_threads, std::size_t m, std::size_t n)
{
@@ -76,4 +79,5 @@ std::pair<unsigned, unsigned> split_2d(unsigned max_threads, std::size_t m, std:
}
}
#endif /* #ifndef BARE_METAL */
+} // namespace scheduler_utils
} // namespace arm_compute
diff --git a/src/runtime/SchedulerUtils.h b/src/runtime/SchedulerUtils.h
new file mode 100644
index 0000000000..46644a369e
--- /dev/null
+++ b/src/runtime/SchedulerUtils.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_COMPUTE_SCHEDULER_UTILS_H
+#define SRC_COMPUTE_SCHEDULER_UTILS_H
+
+#include <cstddef>
+#include <utility>
+
+namespace arm_compute
+{
+namespace scheduler_utils
+{
+/** Given two dimensions and a maximum number of threads to utilise, calculate the best
+ * combination of threads that fit in (multiplied together) max_threads.
+ *
+ * This algorithm assumes that work in either of the dimensions is equally difficult
+ * to compute
+ *
+ * @returns [m_nthreads, n_nthreads] A pair of the threads that should be used in each dimension
+ */
+std::pair<unsigned, unsigned> split_2d(unsigned max_threads, std::size_t m, std::size_t n);
+} // namespace scheduler_utils
+} // namespace arm_compute
+#endif /* SRC_COMPUTE_SCHEDULER_UTILS_H */
diff --git a/src/runtime/Utils.cpp b/src/runtime/Utils.cpp
index 534b421f8a..15e9d43a49 100644
--- a/src/runtime/Utils.cpp
+++ b/src/runtime/Utils.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/runtime/Utils.h"
+#include "src/runtime/Utils.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
@@ -31,6 +31,8 @@
namespace arm_compute
{
+namespace utils
+{
#ifndef DOXYGEN_SKIP_THIS
static const std::string information =
#include "arm_compute_version.embed"
@@ -78,4 +80,5 @@ unsigned int calculate_number_of_stages_only_x_axis(size_t input_x_dimension, un
const unsigned int num_of_stages = num_of_wg / 128 + 2;
return num_of_stages;
}
+} // namespace utils
} // namespace arm_compute
diff --git a/src/runtime/Utils.h b/src/runtime/Utils.h
new file mode 100644
index 0000000000..f8775c9612
--- /dev/null
+++ b/src/runtime/Utils.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_RUNTIME_UTILS_H
+#define SRC_RUNTIME_UTILS_H
+
+#include "arm_compute/runtime/IRuntimeContext.h"
+#include "arm_compute/runtime/Scheduler.h"
+
+#include <string>
+
+namespace arm_compute
+{
+namespace utils
+{
+/** Convert a Scheduler::Type into a string.
+ *
+ * @param[in] t @ref Scheduler::Type to be translated to string.
+ *
+ * @return The string describing the scheduler type.
+ */
+const std::string &string_from_scheduler_type(Scheduler::Type t);
+
+/** Schedules a kernel using the context if not nullptr else uses the legacy scheduling flow.
+ *
+ * @param[in] ctx Context to use.
+ * @param[in] kernel Kernel to schedule.
+ * @param[in] hints Hints to use.
+ */
+void schedule_kernel_on_ctx(IRuntimeContext *ctx, ICPPKernel *kernel, const IScheduler::Hints &hints);
+
+/** Calculate number of stages for parallel implementations
+ *
+ * @param[in] input_x_dimension input tensor x dimension
+ * @param[in] axis axis to be used
+ */
+unsigned int calculate_number_of_stages_only_x_axis(size_t input_x_dimension, unsigned int axis);
+} // namespace utils
+} // namespace arm_compute
+#endif /* SRC_RUNTIME_UTILS_H */