From 68dd25fbe6e4d3c3513fa5993863419769aa08fc Mon Sep 17 00:00:00 2001 From: Sang-Hoon Park Date: Mon, 19 Oct 2020 16:00:11 +0100 Subject: COMPMID-3637: Move utility headers from arm_compute to src Signed-off-by: Georgios Pinitas Change-Id: If9d6fa8c900b68c4b6fd373f2fc1f9abb83ea917 Signed-off-by: Michalis Spyrou Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4145 Tested-by: Arm Jenkins Reviewed-by: Sang-Hoon Park Comments-Addressed: Arm Jenkins --- src/core/NEON/NETracePoint.cpp | 4 +- .../NEON/kernels/NEAbsoluteDifferenceKernel.cpp | 2 + src/core/NEON/kernels/NEAccumulateKernel.cpp | 4 +- src/core/NEON/kernels/NEActivationLayerKernel.cpp | 4 +- .../NEON/kernels/NEArithmeticAdditionKernel.cpp | 4 +- .../NEON/kernels/NEArithmeticSubtractionKernel.cpp | 4 +- .../NEON/kernels/NEBatchConcatenateLayerKernel.cpp | 2 + .../kernels/NEBatchNormalizationLayerKernel.cpp | 4 +- .../NEON/kernels/NEBatchToSpaceLayerKernel.cpp | 2 + src/core/NEON/kernels/NEBitwiseAndKernel.cpp | 2 + src/core/NEON/kernels/NEBitwiseNotKernel.cpp | 4 +- src/core/NEON/kernels/NEBitwiseOrKernel.cpp | 4 +- src/core/NEON/kernels/NEBitwiseXorKernel.cpp | 4 +- .../NEON/kernels/NEBoundingBoxTransformKernel.cpp | 6 +- src/core/NEON/kernels/NEBox3x3Kernel.cpp | 5 +- src/core/NEON/kernels/NECannyEdgeKernel.cpp | 8 +- src/core/NEON/kernels/NEChannelCombineKernel.cpp | 4 +- src/core/NEON/kernels/NEChannelExtractKernel.cpp | 4 +- .../NEON/kernels/NEChannelShuffleLayerKernel.cpp | 6 +- src/core/NEON/kernels/NECol2ImKernel.cpp | 4 +- src/core/NEON/kernels/NEColorConvertKernel.cpp | 2 + .../NEConvertFullyConnectedWeightsKernel.cpp | 4 +- .../kernels/NEConvertQuantizedSignednessKernel.cpp | 2 + src/core/NEON/kernels/NEConvolutionKernel.cpp | 4 +- src/core/NEON/kernels/NECopyKernel.cpp | 4 +- src/core/NEON/kernels/NECropKernel.cpp | 9 +- .../kernels/NECumulativeDistributionKernel.cpp | 4 +- .../NEON/kernels/NEDepthConcatenateLayerKernel.cpp | 2 + .../NEON/kernels/NEDepthConvertLayerKernel.cpp | 7 +- .../NEON/kernels/NEDepthToSpaceLayerKernel.cpp | 3 + .../NEDepthwiseConvolutionLayerNativeKernel.cpp | 7 +- .../NEON/kernels/NEDequantizationLayerKernel.cpp | 6 +- src/core/NEON/kernels/NEDerivativeKernel.cpp | 4 +- src/core/NEON/kernels/NEDilateKernel.cpp | 2 + .../kernels/NEDirectConvolutionLayerKernel.cpp | 6 +- .../NEDirectConvolutionLayerOutputStageKernel.cpp | 6 +- .../NEON/kernels/NEElementwiseOperationKernel.cpp | 4 +- src/core/NEON/kernels/NEElementwiseUnaryKernel.cpp | 4 +- src/core/NEON/kernels/NEErodeKernel.cpp | 2 + src/core/NEON/kernels/NEFFTDigitReverseKernel.cpp | 4 +- src/core/NEON/kernels/NEFFTRadixStageKernel.cpp | 7 +- src/core/NEON/kernels/NEFFTScaleKernel.cpp | 2 + src/core/NEON/kernels/NEFastCornersKernel.cpp | 4 +- src/core/NEON/kernels/NEFillArrayKernel.cpp | 1 + src/core/NEON/kernels/NEFillBorderKernel.cpp | 3 +- src/core/NEON/kernels/NEFlattenLayerKernel.cpp | 4 +- src/core/NEON/kernels/NEFloorKernel.cpp | 4 +- .../kernels/NEFuseBatchNormalizationKernel.cpp | 4 +- .../NEON/kernels/NEGEMMInterleave4x4Kernel.cpp | 2 + .../kernels/NEGEMMLowpMatrixMultiplyKernel.cpp | 6 +- .../kernels/NEGEMMLowpOffsetContributionKernel.cpp | 4 +- ...GEMMLowpOffsetContributionOutputStageKernel.cpp | 4 +- .../NEGEMMLowpQuantizeDownInt32ScaleKernel.cpp | 4 +- ...tizeDownInt32ToInt16ScaleByFixedPointKernel.cpp | 4 +- ...ntizeDownInt32ToInt8ScaleByFixedPointKernel.cpp | 4 +- ...tizeDownInt32ToUint8ScaleByFixedPointKernel.cpp | 4 +- .../NEON/kernels/NEGEMMLowpReductionKernel.cpp | 4 +- .../NEON/kernels/NEGEMMMatrixAdditionKernel.cpp | 4 +- .../NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp | 7 +- src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp | 4 +- src/core/NEON/kernels/NEGatherKernel.cpp | 6 +- src/core/NEON/kernels/NEGaussian3x3Kernel.cpp | 4 +- src/core/NEON/kernels/NEGaussian5x5Kernel.cpp | 4 +- src/core/NEON/kernels/NEGaussianPyramidKernel.cpp | 4 +- .../kernels/NEGenerateProposalsLayerKernel.cpp | 8 +- src/core/NEON/kernels/NEHOGDescriptorKernel.cpp | 4 +- src/core/NEON/kernels/NEHOGDetectorKernel.cpp | 4 +- src/core/NEON/kernels/NEHarrisCornersKernel.cpp | 4 +- .../kernels/NEHeightConcatenateLayerKernel.cpp | 2 + src/core/NEON/kernels/NEHistogramKernel.cpp | 4 +- src/core/NEON/kernels/NEIm2ColKernel.cpp | 4 +- .../kernels/NEInstanceNormalizationLayerKernel.cpp | 4 +- src/core/NEON/kernels/NEIntegralImageKernel.cpp | 4 +- src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp | 2 + src/core/NEON/kernels/NELKTrackerKernel.cpp | 6 +- .../NELocallyConnectedMatrixMultiplyKernel.cpp | 4 +- src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp | 4 +- .../NEON/kernels/NEMaxUnpoolingLayerKernel.cpp | 4 +- src/core/NEON/kernels/NEMeanStdDevKernel.cpp | 4 +- .../kernels/NEMeanStdDevNormalizationKernel.cpp | 4 +- src/core/NEON/kernels/NEMedian3x3Kernel.cpp | 4 +- src/core/NEON/kernels/NEMemsetKernel.cpp | 6 +- src/core/NEON/kernels/NEMinMaxLayerKernel.cpp | 4 +- src/core/NEON/kernels/NEMinMaxLocationKernel.cpp | 4 +- src/core/NEON/kernels/NENonLinearFilterKernel.cpp | 4 +- .../kernels/NENonMaximaSuppression3x3Kernel.cpp | 4 +- .../NEON/kernels/NENormalizationLayerKernel.cpp | 7 +- src/core/NEON/kernels/NEPadLayerKernel.cpp | 2 + src/core/NEON/kernels/NEPermuteKernel.cpp | 4 +- .../kernels/NEPixelWiseMultiplicationKernel.cpp | 4 +- src/core/NEON/kernels/NEPoolingLayerKernel.cpp | 6 +- src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp | 2 + .../kernels/NEQLSTMLayerNormalizationKernel.cpp | 4 +- .../NEON/kernels/NEQuantizationLayerKernel.cpp | 4 +- src/core/NEON/kernels/NEROIAlignLayerKernel.cpp | 6 +- src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp | 4 +- src/core/NEON/kernels/NERangeKernel.cpp | 2 + .../NEON/kernels/NEReductionOperationKernel.cpp | 6 +- src/core/NEON/kernels/NERemapKernel.cpp | 9 +- src/core/NEON/kernels/NEReorgLayerKernel.cpp | 4 +- src/core/NEON/kernels/NEReshapeLayerKernel.cpp | 6 +- src/core/NEON/kernels/NEReverseKernel.cpp | 2 + src/core/NEON/kernels/NEScaleKernel.cpp | 12 +- src/core/NEON/kernels/NEScharr3x3Kernel.cpp | 4 +- src/core/NEON/kernels/NESelectKernel.cpp | 4 +- src/core/NEON/kernels/NESobel3x3Kernel.cpp | 4 +- src/core/NEON/kernels/NESobel5x5Kernel.cpp | 4 +- src/core/NEON/kernels/NESobel7x7Kernel.cpp | 4 +- src/core/NEON/kernels/NESoftmaxLayerKernel.cpp | 8 +- .../NEON/kernels/NESpaceToBatchLayerKernel.cpp | 3 + .../NEON/kernels/NESpaceToDepthLayerKernel.cpp | 3 + src/core/NEON/kernels/NEStackLayerKernel.cpp | 2 + src/core/NEON/kernels/NEStridedSliceKernel.cpp | 9 +- src/core/NEON/kernels/NEThresholdKernel.cpp | 2 + src/core/NEON/kernels/NETileKernel.cpp | 4 +- src/core/NEON/kernels/NETransposeKernel.cpp | 6 +- src/core/NEON/kernels/NEUpsampleLayerKernel.cpp | 4 +- src/core/NEON/kernels/NEWarpKernel.cpp | 19 +- src/core/NEON/kernels/NEWeightsReshapeKernel.cpp | 2 + .../NEON/kernels/NEWidthConcatenateLayerKernel.cpp | 2 + .../kernels/NEWinogradConvolutionLayerKernel.cpp | 6 +- .../kernels/NEWinogradConvolutionLayerKernel.h | 4 +- src/core/NEON/kernels/NEYOLOLayerKernel.cpp | 4 +- .../NEON/kernels/assembly/INEGEMMWrapperKernel.cpp | 4 +- .../NEON/kernels/assembly/INEGEMMWrapperKernel.h | 108 +++ .../NEDepthwiseConvolutionAssemblyKernelWrapper.h | 88 +++ src/core/NEON/kernels/assembly/arm_gemm_local.hpp | 34 + .../NEON/kernels/convolution/common/activation.hpp | 37 + src/core/NEON/kernels/convolution/common/alloc.hpp | 31 + src/core/NEON/kernels/convolution/common/arm.hpp | 39 ++ .../kernels/convolution/common/convolution.hpp | 29 + .../NEON/kernels/convolution/common/padding.hpp | 91 +++ src/core/NEON/kernels/convolution/common/perf.h | 32 + .../NEON/kernels/convolution/common/qasymm8.hpp | 54 ++ .../NEON/kernels/convolution/common/qsymm8.hpp | 76 +++ src/core/NEON/kernels/convolution/common/shims.hpp | 749 +++++++++++++++++++++ .../NEON/kernels/convolution/common/tensor.hpp | 178 +++++ .../kernels/convolution/common/tensor_utils.hpp | 46 ++ src/core/NEON/kernels/convolution/common/utils.hpp | 60 ++ .../kernels/convolution/depthwise/depthwise.hpp | 551 +++++++++++++++ .../convolution/depthwise/depthwise_dilated.hpp | 156 +++++ .../convolution/depthwise/depthwise_quantized.hpp | 291 ++++++++ .../depthwise/depthwise_quantized_dilated.hpp | 88 +++ .../kernels/detail/NEDirectConvolutionDetail.h | 4 +- 144 files changed, 3136 insertions(+), 149 deletions(-) create mode 100644 src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h create mode 100644 src/core/NEON/kernels/assembly/NEDepthwiseConvolutionAssemblyKernelWrapper.h create mode 100644 src/core/NEON/kernels/assembly/arm_gemm_local.hpp create mode 100644 src/core/NEON/kernels/convolution/common/activation.hpp create mode 100644 src/core/NEON/kernels/convolution/common/alloc.hpp create mode 100644 src/core/NEON/kernels/convolution/common/arm.hpp create mode 100644 src/core/NEON/kernels/convolution/common/convolution.hpp create mode 100644 src/core/NEON/kernels/convolution/common/padding.hpp create mode 100644 src/core/NEON/kernels/convolution/common/perf.h create mode 100644 src/core/NEON/kernels/convolution/common/qasymm8.hpp create mode 100644 src/core/NEON/kernels/convolution/common/qsymm8.hpp create mode 100644 src/core/NEON/kernels/convolution/common/shims.hpp create mode 100644 src/core/NEON/kernels/convolution/common/tensor.hpp create mode 100644 src/core/NEON/kernels/convolution/common/tensor_utils.hpp create mode 100644 src/core/NEON/kernels/convolution/common/utils.hpp create mode 100644 src/core/NEON/kernels/convolution/depthwise/depthwise.hpp create mode 100644 src/core/NEON/kernels/convolution/depthwise/depthwise_dilated.hpp create mode 100644 src/core/NEON/kernels/convolution/depthwise/depthwise_quantized.hpp create mode 100644 src/core/NEON/kernels/convolution/depthwise/depthwise_quantized_dilated.hpp (limited to 'src/core/NEON') diff --git a/src/core/NEON/NETracePoint.cpp b/src/core/NEON/NETracePoint.cpp index cb0dc1400a..4a6bffa54e 100644 --- a/src/core/NEON/NETracePoint.cpp +++ b/src/core/NEON/NETracePoint.cpp @@ -24,8 +24,8 @@ #include "arm_compute/core/TracePoint.h" #include "arm_compute/core/NEON/kernels/NELKTrackerKernel.h" -#include "arm_compute/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h" -#include "arm_compute/core/NEON/kernels/convolution/common/convolution.hpp" +#include "src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h" +#include "src/core/NEON/kernels/convolution/common/convolution.hpp" #include "utils/TypePrinter.h" #include diff --git a/src/core/NEON/kernels/NEAbsoluteDifferenceKernel.cpp b/src/core/NEON/kernels/NEAbsoluteDifferenceKernel.cpp index 3d4800fe15..acea0af02d 100644 --- a/src/core/NEON/kernels/NEAbsoluteDifferenceKernel.cpp +++ b/src/core/NEON/kernels/NEAbsoluteDifferenceKernel.cpp @@ -30,6 +30,8 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEAccumulateKernel.cpp b/src/core/NEON/kernels/NEAccumulateKernel.cpp index 7c85f698ae..73ef7eb66f 100644 --- a/src/core/NEON/kernels/NEAccumulateKernel.cpp +++ b/src/core/NEON/kernels/NEAccumulateKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -28,6 +28,8 @@ #include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/NEON/kernels/NEActivationLayerKernel.cpp index d80aab7069..9616f4faca 100644 --- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEActivationLayerKernel.cpp @@ -23,15 +23,17 @@ */ #include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Window.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/NEAsymm.h" #include "src/core/NEON/NESymm.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp index 525e2866f2..7f1a35fb55 100644 --- a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp +++ b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp @@ -23,12 +23,14 @@ */ #include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Validate.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp index a3da7508ab..49e503fac4 100644 --- a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp +++ b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp @@ -23,12 +23,14 @@ */ #include "arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/NEAsymm.h" #include "src/core/NEON/NESymm.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" namespace arm_compute { diff --git a/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp index c7169d8932..65ac996f46 100644 --- a/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp +++ b/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp @@ -32,6 +32,8 @@ #include "arm_compute/core/Window.h" #include "src/core/NEON/NEAsymm.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" namespace arm_compute { diff --git a/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp b/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp index 50e46474b5..bda396662f 100644 --- a/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp @@ -23,14 +23,16 @@ */ #include "arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/NEFixedPoint.h" #include "src/core/NEON/NEMath.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "src/core/NEON/kernels/detail/NEActivationFunctionDetail.h" #include "src/core/NEON/wrapper/wrapper.h" diff --git a/src/core/NEON/kernels/NEBatchToSpaceLayerKernel.cpp b/src/core/NEON/kernels/NEBatchToSpaceLayerKernel.cpp index eb28ce0a8b..e24d7b6c0a 100644 --- a/src/core/NEON/kernels/NEBatchToSpaceLayerKernel.cpp +++ b/src/core/NEON/kernels/NEBatchToSpaceLayerKernel.cpp @@ -28,6 +28,8 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" using namespace arm_compute::misc::shape_calculator; diff --git a/src/core/NEON/kernels/NEBitwiseAndKernel.cpp b/src/core/NEON/kernels/NEBitwiseAndKernel.cpp index caaa6c22e8..2d49ff825e 100644 --- a/src/core/NEON/kernels/NEBitwiseAndKernel.cpp +++ b/src/core/NEON/kernels/NEBitwiseAndKernel.cpp @@ -28,6 +28,8 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEBitwiseNotKernel.cpp b/src/core/NEON/kernels/NEBitwiseNotKernel.cpp index 4da07f93b0..eed9b273ae 100644 --- a/src/core/NEON/kernels/NEBitwiseNotKernel.cpp +++ b/src/core/NEON/kernels/NEBitwiseNotKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -27,6 +27,8 @@ #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEBitwiseOrKernel.cpp b/src/core/NEON/kernels/NEBitwiseOrKernel.cpp index 591acf50e1..f96117e860 100644 --- a/src/core/NEON/kernels/NEBitwiseOrKernel.cpp +++ b/src/core/NEON/kernels/NEBitwiseOrKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -27,6 +27,8 @@ #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEBitwiseXorKernel.cpp b/src/core/NEON/kernels/NEBitwiseXorKernel.cpp index b0aec4078f..45d2b0a0db 100644 --- a/src/core/NEON/kernels/NEBitwiseXorKernel.cpp +++ b/src/core/NEON/kernels/NEBitwiseXorKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -27,6 +27,8 @@ #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp b/src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp index 56444dcbc0..5a18e88321 100644 --- a/src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp +++ b/src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp @@ -23,12 +23,14 @@ */ #include "arm_compute/core/NEON/kernels/NEBoundingBoxTransformKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Window.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CPP/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEBox3x3Kernel.cpp b/src/core/NEON/kernels/NEBox3x3Kernel.cpp index d5d03a9def..1177f6f1dd 100644 --- a/src/core/NEON/kernels/NEBox3x3Kernel.cpp +++ b/src/core/NEON/kernels/NEBox3x3Kernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -29,6 +29,9 @@ #include "arm_compute/core/ITensor.h" #include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" + #include using namespace arm_compute; diff --git a/src/core/NEON/kernels/NECannyEdgeKernel.cpp b/src/core/NEON/kernels/NECannyEdgeKernel.cpp index 0278bb08e1..da33c1b1ea 100644 --- a/src/core/NEON/kernels/NECannyEdgeKernel.cpp +++ b/src/core/NEON/kernels/NECannyEdgeKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,7 +23,6 @@ */ #include "arm_compute/core/NEON/kernels/NECannyEdgeKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" @@ -31,6 +30,11 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEChannelCombineKernel.cpp b/src/core/NEON/kernels/NEChannelCombineKernel.cpp index 0de6c4326a..7bd380831b 100644 --- a/src/core/NEON/kernels/NEChannelCombineKernel.cpp +++ b/src/core/NEON/kernels/NEChannelCombineKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -33,6 +33,8 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEChannelExtractKernel.cpp b/src/core/NEON/kernels/NEChannelExtractKernel.cpp index 800c63606f..86245acd05 100644 --- a/src/core/NEON/kernels/NEChannelExtractKernel.cpp +++ b/src/core/NEON/kernels/NEChannelExtractKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -34,6 +34,8 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp b/src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp index 88cd0ae514..6d04d71534 100644 --- a/src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp +++ b/src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,7 +23,6 @@ */ #include "arm_compute/core/NEON/kernels/NEChannelShuffleLayerKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" @@ -31,6 +30,9 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/CPP/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" namespace arm_compute { diff --git a/src/core/NEON/kernels/NECol2ImKernel.cpp b/src/core/NEON/kernels/NECol2ImKernel.cpp index 6a07defd79..f3192370a6 100644 --- a/src/core/NEON/kernels/NECol2ImKernel.cpp +++ b/src/core/NEON/kernels/NECol2ImKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,6 +30,8 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEColorConvertKernel.cpp b/src/core/NEON/kernels/NEColorConvertKernel.cpp index bc8c77543a..f933a2a898 100644 --- a/src/core/NEON/kernels/NEColorConvertKernel.cpp +++ b/src/core/NEON/kernels/NEColorConvertKernel.cpp @@ -33,6 +33,8 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "src/core/NEON/kernels/detail/NEColorConvertHelper.inl" diff --git a/src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.cpp b/src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.cpp index 97bb8ccb8a..8716cfd9b5 100644 --- a/src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.cpp +++ b/src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,6 +25,8 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Types.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" namespace arm_compute { diff --git a/src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.cpp b/src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.cpp index f40f1215d3..bd8ea30fb3 100644 --- a/src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.cpp +++ b/src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.cpp @@ -30,6 +30,8 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" namespace arm_compute { diff --git a/src/core/NEON/kernels/NEConvolutionKernel.cpp b/src/core/NEON/kernels/NEConvolutionKernel.cpp index 7103fa1618..69b65b2816 100644 --- a/src/core/NEON/kernels/NEConvolutionKernel.cpp +++ b/src/core/NEON/kernels/NEConvolutionKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -32,6 +32,8 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NECopyKernel.cpp b/src/core/NEON/kernels/NECopyKernel.cpp index 3d00139263..b299957b57 100644 --- a/src/core/NEON/kernels/NECopyKernel.cpp +++ b/src/core/NEON/kernels/NECopyKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,6 +30,8 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" namespace arm_compute { diff --git a/src/core/NEON/kernels/NECropKernel.cpp b/src/core/NEON/kernels/NECropKernel.cpp index 7c65e71727..5fb55d95a9 100644 --- a/src/core/NEON/kernels/NECropKernel.cpp +++ b/src/core/NEON/kernels/NECropKernel.cpp @@ -23,17 +23,18 @@ */ #include "arm_compute/core/NEON/kernels/NECropKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Window.h" - #include "arm_compute/core/Types.h" -#include "arm_compute/core/utils/helpers/bit_ops.h" +#include "arm_compute/core/Window.h" #include "arm_compute/core/utils/helpers/tensor_transform.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" +#include "src/core/utils/helpers/bit_ops.h" namespace arm_compute { diff --git a/src/core/NEON/kernels/NECumulativeDistributionKernel.cpp b/src/core/NEON/kernels/NECumulativeDistributionKernel.cpp index cec0e1ce60..5628802783 100644 --- a/src/core/NEON/kernels/NECumulativeDistributionKernel.cpp +++ b/src/core/NEON/kernels/NECumulativeDistributionKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,6 +30,8 @@ #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp index 6066326fec..b500268477 100644 --- a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp @@ -34,6 +34,8 @@ #include "src/core/NEON/NEAsymm.h" #include "src/core/NEON/NEFixedPoint.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp b/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp index ee23909bd6..259ece7c6f 100644 --- a/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp @@ -23,17 +23,18 @@ */ #include "arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/misc/SaturateCast.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/NEFixedPoint.h" #include "src/core/NEON/NEMath.h" - #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" +#include "support/SaturateCast.h" using namespace arm_compute; diff --git a/src/core/NEON/kernels/NEDepthToSpaceLayerKernel.cpp b/src/core/NEON/kernels/NEDepthToSpaceLayerKernel.cpp index 6465848999..403e7aac9f 100644 --- a/src/core/NEON/kernels/NEDepthToSpaceLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDepthToSpaceLayerKernel.cpp @@ -29,6 +29,9 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" + #include #include diff --git a/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp b/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp index 0a34ee6a07..533b374594 100644 --- a/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp +++ b/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp @@ -23,13 +23,15 @@ */ #include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/kernels/convolution/depthwise/impl_qa8_qa8.hpp" #include "src/core/NEON/wrapper/traits.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/ToolchainSupport.h" namespace arm_compute @@ -48,7 +50,6 @@ constexpr size_t vector_size = 8; struct DepthwiseConvolutionRunInfo { -public: const size_t num_read_elements_per_iteration; const uint32_t x_start; const uint32_t x_end; diff --git a/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp b/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp index 9352088b1f..2f3c6f431c 100644 --- a/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp @@ -23,16 +23,18 @@ */ #include "arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/NEAsymm.h" #include "src/core/NEON/NESymm.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEDerivativeKernel.cpp b/src/core/NEON/kernels/NEDerivativeKernel.cpp index ad590e9f2b..5d3fc01bd2 100644 --- a/src/core/NEON/kernels/NEDerivativeKernel.cpp +++ b/src/core/NEON/kernels/NEDerivativeKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -29,6 +29,8 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEDilateKernel.cpp b/src/core/NEON/kernels/NEDilateKernel.cpp index c30dab22c6..cc781c699f 100644 --- a/src/core/NEON/kernels/NEDilateKernel.cpp +++ b/src/core/NEON/kernels/NEDilateKernel.cpp @@ -28,6 +28,8 @@ #include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp index c22fa6a2b3..56cd6e62d0 100644 --- a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp @@ -26,8 +26,6 @@ #include "src/core/NEON/kernels/detail/NEDirectConvolutionDetail.h" #include "src/core/NEON/wrapper/wrapper.h" -#include "arm_compute/core/AccessWindowStatic.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/IAccessWindow.h" @@ -36,7 +34,11 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/NEFixedPoint.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp index 8c11574755..abaaf12e92 100644 --- a/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp +++ b/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp @@ -23,8 +23,6 @@ */ #include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" @@ -32,9 +30,13 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/Traits.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/NEAsymm.h" #include "src/core/NEON/NEFixedPoint.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp index f862d04b22..efe6161096 100644 --- a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp +++ b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp @@ -23,12 +23,14 @@ */ #include "arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/IAccessWindow.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/NEAsymm.h" #include "src/core/NEON/NEFixedPoint.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEElementwiseUnaryKernel.cpp b/src/core/NEON/kernels/NEElementwiseUnaryKernel.cpp index 40430bdb81..8e4b7eda30 100644 --- a/src/core/NEON/kernels/NEElementwiseUnaryKernel.cpp +++ b/src/core/NEON/kernels/NEElementwiseUnaryKernel.cpp @@ -23,12 +23,14 @@ */ #include "arm_compute/core/NEON/kernels/NEElementwiseUnaryKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Validate.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/ToolchainSupport.h" namespace arm_compute diff --git a/src/core/NEON/kernels/NEErodeKernel.cpp b/src/core/NEON/kernels/NEErodeKernel.cpp index 4b93c3b4d1..31b0f487d6 100644 --- a/src/core/NEON/kernels/NEErodeKernel.cpp +++ b/src/core/NEON/kernels/NEErodeKernel.cpp @@ -28,6 +28,8 @@ #include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEFFTDigitReverseKernel.cpp b/src/core/NEON/kernels/NEFFTDigitReverseKernel.cpp index d5b20d278d..d8036f2f60 100644 --- a/src/core/NEON/kernels/NEFFTDigitReverseKernel.cpp +++ b/src/core/NEON/kernels/NEFFTDigitReverseKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -28,6 +28,8 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEFFTRadixStageKernel.cpp b/src/core/NEON/kernels/NEFFTRadixStageKernel.cpp index de8ba3f484..1b0af488a2 100644 --- a/src/core/NEON/kernels/NEFFTRadixStageKernel.cpp +++ b/src/core/NEON/kernels/NEFFTRadixStageKernel.cpp @@ -28,15 +28,16 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Window.h" +#include "src/core/NEON/wrapper/traits.h" +#include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include #include #include -#include "src/core/NEON/wrapper/traits.h" -#include "src/core/NEON/wrapper/wrapper.h" - namespace arm_compute { namespace diff --git a/src/core/NEON/kernels/NEFFTScaleKernel.cpp b/src/core/NEON/kernels/NEFFTScaleKernel.cpp index d99ff953fc..0cb8b84db8 100644 --- a/src/core/NEON/kernels/NEFFTScaleKernel.cpp +++ b/src/core/NEON/kernels/NEFFTScaleKernel.cpp @@ -29,6 +29,8 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEFastCornersKernel.cpp b/src/core/NEON/kernels/NEFastCornersKernel.cpp index 7b1d81e12c..99312f5134 100644 --- a/src/core/NEON/kernels/NEFastCornersKernel.cpp +++ b/src/core/NEON/kernels/NEFastCornersKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -27,6 +27,8 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEFillArrayKernel.cpp b/src/core/NEON/kernels/NEFillArrayKernel.cpp index 6b22dadd08..93798db6c3 100644 --- a/src/core/NEON/kernels/NEFillArrayKernel.cpp +++ b/src/core/NEON/kernels/NEFillArrayKernel.cpp @@ -28,6 +28,7 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/WindowHelpers.h" using namespace arm_compute; diff --git a/src/core/NEON/kernels/NEFillBorderKernel.cpp b/src/core/NEON/kernels/NEFillBorderKernel.cpp index dbaec83d04..c1dd5cf81f 100644 --- a/src/core/NEON/kernels/NEFillBorderKernel.cpp +++ b/src/core/NEON/kernels/NEFillBorderKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,6 +30,7 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEFlattenLayerKernel.cpp b/src/core/NEON/kernels/NEFlattenLayerKernel.cpp index 35ebc5b70b..e6b34b6165 100644 --- a/src/core/NEON/kernels/NEFlattenLayerKernel.cpp +++ b/src/core/NEON/kernels/NEFlattenLayerKernel.cpp @@ -23,13 +23,15 @@ */ #include "arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/CPP/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" diff --git a/src/core/NEON/kernels/NEFloorKernel.cpp b/src/core/NEON/kernels/NEFloorKernel.cpp index 301dc7a422..48f964c6a2 100644 --- a/src/core/NEON/kernels/NEFloorKernel.cpp +++ b/src/core/NEON/kernels/NEFloorKernel.cpp @@ -23,12 +23,14 @@ */ #include "arm_compute/core/NEON/kernels/NEFloorKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/Validate.h" +#include "src/core/CPP/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "src/core/NEON/kernels/floor/impl/list.h" #include "src/core/common/Registrars.h" diff --git a/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp b/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp index 00d251f79e..e353df1c39 100644 --- a/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp +++ b/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp @@ -23,14 +23,16 @@ */ #include "arm_compute/core/NEON/kernels/NEFuseBatchNormalizationKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp b/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp index 8b4ad0da23..2997c1d003 100644 --- a/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp +++ b/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp @@ -31,6 +31,8 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp index f3ba2901cb..acc519012b 100644 --- a/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp @@ -31,6 +31,10 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" + #include using namespace arm_compute; @@ -1052,5 +1056,3 @@ void NEGEMMLowpMatrixMultiplyKernel::run(const Window &window, const ThreadInfo } } } // namespace arm_compute - - diff --git a/src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.cpp index 4ac33d1e29..1c76926546 100644 --- a/src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.cpp @@ -23,7 +23,6 @@ */ #include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" @@ -32,6 +31,9 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.cpp index 8d0d7c26a3..6a7d225167 100644 --- a/src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.cpp @@ -23,7 +23,6 @@ */ #include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" @@ -32,8 +31,11 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/AccessWindowStatic.h" #include "src/core/NEON/NEAsymm.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.cpp index 023b798b9a..659c4105c1 100644 --- a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.cpp @@ -23,7 +23,6 @@ */ #include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" @@ -32,7 +31,10 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" +#include "src/core/AccessWindowStatic.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp index 68f16c5fc7..afa8cec76f 100644 --- a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp @@ -23,7 +23,6 @@ */ #include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" @@ -33,7 +32,10 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/AccessWindowStatic.h" #include "src/core/NEON/NESymm.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp index 2ef32c4e81..83416e03e9 100644 --- a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp @@ -23,7 +23,6 @@ */ #include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" @@ -33,7 +32,10 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/AccessWindowStatic.h" #include "src/core/NEON/NEAsymm.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp index 8fc33dcc82..1e8aa0cc0a 100644 --- a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp @@ -23,7 +23,6 @@ */ #include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" @@ -33,7 +32,10 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/AccessWindowStatic.h" #include "src/core/NEON/NEAsymm.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEGEMMLowpReductionKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpReductionKernel.cpp index 1494cd459c..566872f02c 100644 --- a/src/core/NEON/kernels/NEGEMMLowpReductionKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMLowpReductionKernel.cpp @@ -23,11 +23,13 @@ */ #include "arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/KernelDescriptors.h" #include "arm_compute/core/TensorInfo.h" +#include "src/core/AccessWindowStatic.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" namespace arm_compute { diff --git a/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp index bd931469a3..9aee26ca55 100644 --- a/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp @@ -23,12 +23,14 @@ */ #include "arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/NEFixedPoint.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp index 6f74e3fc06..a9236890e3 100644 --- a/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp @@ -23,7 +23,6 @@ */ #include "arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" @@ -32,9 +31,13 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" -#include "arm_compute/core/utils/helpers/float_ops.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/NEFixedPoint.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" +#include "src/core/utils/helpers/float_ops.h" #include diff --git a/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp b/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp index a8adc45645..b9b4fe9e9c 100644 --- a/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp @@ -23,12 +23,14 @@ */ #include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEGatherKernel.cpp b/src/core/NEON/kernels/NEGatherKernel.cpp index 906e8a053e..193fe98c7b 100644 --- a/src/core/NEON/kernels/NEGatherKernel.cpp +++ b/src/core/NEON/kernels/NEGatherKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,7 +23,6 @@ */ #include "arm_compute/core/NEON/kernels/NEGatherKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" @@ -32,6 +31,9 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/CPP/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" namespace arm_compute { diff --git a/src/core/NEON/kernels/NEGaussian3x3Kernel.cpp b/src/core/NEON/kernels/NEGaussian3x3Kernel.cpp index 18dd80e283..5ff5db7266 100644 --- a/src/core/NEON/kernels/NEGaussian3x3Kernel.cpp +++ b/src/core/NEON/kernels/NEGaussian3x3Kernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -28,6 +28,8 @@ #include "arm_compute/core/ITensor.h" #include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEGaussian5x5Kernel.cpp b/src/core/NEON/kernels/NEGaussian5x5Kernel.cpp index 99b5d4b093..5bb3e76ded 100644 --- a/src/core/NEON/kernels/NEGaussian5x5Kernel.cpp +++ b/src/core/NEON/kernels/NEGaussian5x5Kernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,6 +30,8 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEGaussianPyramidKernel.cpp b/src/core/NEON/kernels/NEGaussianPyramidKernel.cpp index 83d2877836..62cf414df2 100644 --- a/src/core/NEON/kernels/NEGaussianPyramidKernel.cpp +++ b/src/core/NEON/kernels/NEGaussianPyramidKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -32,6 +32,8 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp b/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp index c3b105919b..483f204b04 100644 --- a/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp +++ b/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,12 +23,14 @@ */ #include "arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Window.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CPP/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEHOGDescriptorKernel.cpp b/src/core/NEON/kernels/NEHOGDescriptorKernel.cpp index 84bb59ef0e..00f4087cbc 100644 --- a/src/core/NEON/kernels/NEHOGDescriptorKernel.cpp +++ b/src/core/NEON/kernels/NEHOGDescriptorKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -28,6 +28,8 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEHOGDetectorKernel.cpp b/src/core/NEON/kernels/NEHOGDetectorKernel.cpp index eb0d45000a..d5dfa4195d 100644 --- a/src/core/NEON/kernels/NEHOGDetectorKernel.cpp +++ b/src/core/NEON/kernels/NEHOGDetectorKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -28,6 +28,8 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEHarrisCornersKernel.cpp b/src/core/NEON/kernels/NEHarrisCornersKernel.cpp index 340c694a7c..be68b9c44b 100644 --- a/src/core/NEON/kernels/NEHarrisCornersKernel.cpp +++ b/src/core/NEON/kernels/NEHarrisCornersKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -31,6 +31,8 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp index fc7b819f6a..a50712598a 100644 --- a/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp +++ b/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp @@ -33,6 +33,8 @@ #include "arm_compute/core/Window.h" #include "src/core/NEON/NEAsymm.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEHistogramKernel.cpp b/src/core/NEON/kernels/NEHistogramKernel.cpp index 0f8397f117..12d1bb8e7e 100644 --- a/src/core/NEON/kernels/NEHistogramKernel.cpp +++ b/src/core/NEON/kernels/NEHistogramKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,6 +30,8 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Window.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEIm2ColKernel.cpp b/src/core/NEON/kernels/NEIm2ColKernel.cpp index 6eae0541aa..915ea75431 100644 --- a/src/core/NEON/kernels/NEIm2ColKernel.cpp +++ b/src/core/NEON/kernels/NEIm2ColKernel.cpp @@ -23,7 +23,6 @@ */ #include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" @@ -31,6 +30,9 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/CPP/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" diff --git a/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp b/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp index 78acbc399d..7aa23de6eb 100644 --- a/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp @@ -23,7 +23,6 @@ */ #include "arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" @@ -32,8 +31,11 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/NEMath.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEIntegralImageKernel.cpp b/src/core/NEON/kernels/NEIntegralImageKernel.cpp index 58ee3b4bea..5fc6ca65e3 100644 --- a/src/core/NEON/kernels/NEIntegralImageKernel.cpp +++ b/src/core/NEON/kernels/NEIntegralImageKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -28,6 +28,8 @@ #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp b/src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp index d99def53ba..a216981f0f 100644 --- a/src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp +++ b/src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp @@ -31,6 +31,8 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "src/core/NEON/NEMath.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "src/core/NEON/wrapper/wrapper.h" #include diff --git a/src/core/NEON/kernels/NELKTrackerKernel.cpp b/src/core/NEON/kernels/NELKTrackerKernel.cpp index 533c241b9b..6567a8d206 100644 --- a/src/core/NEON/kernels/NELKTrackerKernel.cpp +++ b/src/core/NEON/kernels/NELKTrackerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,7 +23,6 @@ */ #include "arm_compute/core/NEON/kernels/NELKTrackerKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" @@ -31,6 +30,9 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp b/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp index 9eafe18020..b8e6a6d763 100644 --- a/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp +++ b/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp @@ -23,7 +23,6 @@ */ #include "arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/IAccessWindow.h" @@ -33,7 +32,10 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/NEFixedPoint.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp b/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp index a0c1dbc668..8d82e1abd6 100644 --- a/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp +++ b/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -28,6 +28,8 @@ #include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.cpp b/src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.cpp index 821bf53817..87caf00477 100644 --- a/src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.cpp +++ b/src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.cpp @@ -23,11 +23,13 @@ */ #include "arm_compute/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/CPP/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/ToolchainSupport.h" diff --git a/src/core/NEON/kernels/NEMeanStdDevKernel.cpp b/src/core/NEON/kernels/NEMeanStdDevKernel.cpp index 914a21c0a0..c4e036a8b9 100644 --- a/src/core/NEON/kernels/NEMeanStdDevKernel.cpp +++ b/src/core/NEON/kernels/NEMeanStdDevKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -29,6 +29,8 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp b/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp index bcce843638..8ee9ff6f40 100644 --- a/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp +++ b/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp @@ -23,14 +23,16 @@ */ #include "arm_compute/core/NEON/kernels/NEMeanStdDevNormalizationKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Window.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/NEMath.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" namespace arm_compute { diff --git a/src/core/NEON/kernels/NEMedian3x3Kernel.cpp b/src/core/NEON/kernels/NEMedian3x3Kernel.cpp index 72225a4f43..86fcc30e91 100644 --- a/src/core/NEON/kernels/NEMedian3x3Kernel.cpp +++ b/src/core/NEON/kernels/NEMedian3x3Kernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -29,6 +29,8 @@ #include "arm_compute/core/ITensor.h" #include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEMemsetKernel.cpp b/src/core/NEON/kernels/NEMemsetKernel.cpp index 3870fa57f0..fd427cc8c5 100644 --- a/src/core/NEON/kernels/NEMemsetKernel.cpp +++ b/src/core/NEON/kernels/NEMemsetKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,12 +23,14 @@ */ #include "arm_compute/core/NEON/kernels/NEMemsetKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" namespace arm_compute { diff --git a/src/core/NEON/kernels/NEMinMaxLayerKernel.cpp b/src/core/NEON/kernels/NEMinMaxLayerKernel.cpp index b1c2b1c376..f675c391ed 100644 --- a/src/core/NEON/kernels/NEMinMaxLayerKernel.cpp +++ b/src/core/NEON/kernels/NEMinMaxLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -33,6 +33,8 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEMinMaxLocationKernel.cpp b/src/core/NEON/kernels/NEMinMaxLocationKernel.cpp index e956f9a8d0..e1691dc8ff 100644 --- a/src/core/NEON/kernels/NEMinMaxLocationKernel.cpp +++ b/src/core/NEON/kernels/NEMinMaxLocationKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -33,6 +33,8 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/Utility.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NENonLinearFilterKernel.cpp b/src/core/NEON/kernels/NENonLinearFilterKernel.cpp index f20e869272..31919ead03 100644 --- a/src/core/NEON/kernels/NENonLinearFilterKernel.cpp +++ b/src/core/NEON/kernels/NENonLinearFilterKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -29,6 +29,8 @@ #include "arm_compute/core/ITensor.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp b/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp index 3e4c6e29d3..9566ced768 100644 --- a/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp +++ b/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,6 +30,8 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NENormalizationLayerKernel.cpp b/src/core/NEON/kernels/NENormalizationLayerKernel.cpp index 7b888266fb..1b72a3e277 100644 --- a/src/core/NEON/kernels/NENormalizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NENormalizationLayerKernel.cpp @@ -23,17 +23,20 @@ */ #include "arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/NEFixedPoint.h" #include "src/core/NEON/NEMath.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/NormalizationHelpers.h" +#include "src/core/helpers/WindowHelpers.h" namespace arm_compute { diff --git a/src/core/NEON/kernels/NEPadLayerKernel.cpp b/src/core/NEON/kernels/NEPadLayerKernel.cpp index 1b52117bbe..ca9c5419e0 100644 --- a/src/core/NEON/kernels/NEPadLayerKernel.cpp +++ b/src/core/NEON/kernels/NEPadLayerKernel.cpp @@ -31,6 +31,8 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" namespace arm_compute { diff --git a/src/core/NEON/kernels/NEPermuteKernel.cpp b/src/core/NEON/kernels/NEPermuteKernel.cpp index 3f447f90b9..eab11ebfff 100644 --- a/src/core/NEON/kernels/NEPermuteKernel.cpp +++ b/src/core/NEON/kernels/NEPermuteKernel.cpp @@ -30,10 +30,12 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" namespace { -#include "arm_compute/core/NEON/kernels/convolution/common/shims.hpp" +#include "src/core/NEON/kernels/convolution/common/shims.hpp" } // namespace namespace arm_compute diff --git a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp index c5320b9dbf..0847cb1f23 100644 --- a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp +++ b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp @@ -23,11 +23,13 @@ */ #include "arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/TensorInfo.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/NEAsymm.h" #include "src/core/NEON/NESymm.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp index 397eae94ea..f9636dcb8d 100644 --- a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp +++ b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp @@ -23,8 +23,6 @@ */ #include "arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" @@ -33,9 +31,13 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/NEAsymm.h" #include "src/core/NEON/NEFixedPoint.h" #include "src/core/NEON/NEMath.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/ToolchainSupport.h" #include "src/core/NEON/wrapper/wrapper.h" diff --git a/src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp b/src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp index 808b68a0d7..06a1f14e5f 100644 --- a/src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp +++ b/src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp @@ -27,6 +27,8 @@ #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.cpp b/src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.cpp index 6a038f8f44..55585b4e00 100644 --- a/src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.cpp +++ b/src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.cpp @@ -23,16 +23,18 @@ */ #include "arm_compute/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/NEFixedPoint.h" #include "src/core/NEON/NEMath.h" #include "src/core/NEON/NESymm.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "src/core/NEON/kernels/detail/NEActivationFunctionDetail.h" diff --git a/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp b/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp index 6d5202d6b5..990e4b67bc 100644 --- a/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp @@ -31,8 +31,10 @@ #include "src/core/NEON/NEAsymm.h" #include "src/core/NEON/NEMath.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" -#include "arm_compute/core/CPP/Validate.h" +#include "src/core/CPP/Validate.h" #include #include diff --git a/src/core/NEON/kernels/NEROIAlignLayerKernel.cpp b/src/core/NEON/kernels/NEROIAlignLayerKernel.cpp index 955cdc2074..79f7888eba 100644 --- a/src/core/NEON/kernels/NEROIAlignLayerKernel.cpp +++ b/src/core/NEON/kernels/NEROIAlignLayerKernel.cpp @@ -23,14 +23,16 @@ */ #include "arm_compute/core/NEON/kernels/NEROIAlignLayerKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/misc/Utility.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CPP/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp b/src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp index 6a960c74dc..a3171d9aa6 100644 --- a/src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp +++ b/src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp @@ -23,10 +23,12 @@ */ #include "arm_compute/core/NEON/kernels/NEROIPoolingLayerKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/CPP/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/ToolchainSupport.h" #include diff --git a/src/core/NEON/kernels/NERangeKernel.cpp b/src/core/NEON/kernels/NERangeKernel.cpp index 7d8fbb1ec1..3466794b11 100644 --- a/src/core/NEON/kernels/NERangeKernel.cpp +++ b/src/core/NEON/kernels/NERangeKernel.cpp @@ -31,6 +31,8 @@ #include "arm_compute/core/Validate.h" #include "src/core/NEON/NEAsymm.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "arm_compute/core/Utils.h" diff --git a/src/core/NEON/kernels/NEReductionOperationKernel.cpp b/src/core/NEON/kernels/NEReductionOperationKernel.cpp index 9af7f2ab10..716b092396 100644 --- a/src/core/NEON/kernels/NEReductionOperationKernel.cpp +++ b/src/core/NEON/kernels/NEReductionOperationKernel.cpp @@ -23,7 +23,6 @@ */ #include "arm_compute/core/NEON/kernels/NEReductionOperationKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/IAccessWindow.h" @@ -32,9 +31,12 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/misc/SaturateCast.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/NEMath.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" +#include "support/SaturateCast.h" #include "src/core/NEON/wrapper/wrapper.h" #include diff --git a/src/core/NEON/kernels/NERemapKernel.cpp b/src/core/NEON/kernels/NERemapKernel.cpp index 2881161d7f..f698439507 100644 --- a/src/core/NEON/kernels/NERemapKernel.cpp +++ b/src/core/NEON/kernels/NERemapKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,13 +23,16 @@ */ #include "arm_compute/core/NEON/kernels/NERemapKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/ScaleHelpers.h" +#include "src/core/helpers/WindowHelpers.h" #include #include @@ -175,6 +178,8 @@ void NERemapKernel::remap_nearest(const Window &window) void NERemapKernel::remap_bilinear(const Window &window) { + using namespace scale_helpers; + // Don't increment in X and Y direction for the input tensor // A pointer to the start of this plane is needed as base for the precomputed offsets Window win_in(window); diff --git a/src/core/NEON/kernels/NEReorgLayerKernel.cpp b/src/core/NEON/kernels/NEReorgLayerKernel.cpp index 317bc25967..1c48a5c93d 100644 --- a/src/core/NEON/kernels/NEReorgLayerKernel.cpp +++ b/src/core/NEON/kernels/NEReorgLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,6 +30,8 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NEReshapeLayerKernel.cpp b/src/core/NEON/kernels/NEReshapeLayerKernel.cpp index 23b349b443..7946812811 100644 --- a/src/core/NEON/kernels/NEReshapeLayerKernel.cpp +++ b/src/core/NEON/kernels/NEReshapeLayerKernel.cpp @@ -23,8 +23,6 @@ */ #include "arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/IAccessWindow.h" @@ -33,6 +31,10 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CPP/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEReverseKernel.cpp b/src/core/NEON/kernels/NEReverseKernel.cpp index 0c44a7e0c9..2c081cb917 100644 --- a/src/core/NEON/kernels/NEReverseKernel.cpp +++ b/src/core/NEON/kernels/NEReverseKernel.cpp @@ -27,6 +27,8 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" namespace arm_compute { diff --git a/src/core/NEON/kernels/NEScaleKernel.cpp b/src/core/NEON/kernels/NEScaleKernel.cpp index 94f5a18102..e07fcad0ab 100644 --- a/src/core/NEON/kernels/NEScaleKernel.cpp +++ b/src/core/NEON/kernels/NEScaleKernel.cpp @@ -23,15 +23,17 @@ */ #include "arm_compute/core/NEON/kernels/NEScaleKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Window.h" -#include "arm_compute/core/utils/misc/Rounding.h" #include "arm_compute/core/utils/misc/Utility.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/wrapper/wrapper.h" - +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/ScaleHelpers.h" +#include "src/core/helpers/WindowHelpers.h" #include "src/core/utils/ScaleUtils.h" +#include "support/Rounding.h" #include #include @@ -336,6 +338,8 @@ void NEScaleKernel::scale_bilinear_nchw(const Window &window) void NEScaleKernel::scale_area_nchw_u8(const Window &window) { + using namespace scale_helpers; + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(_input, 1, DataType::U8); // Don't increment in width/height/channels for the input tensor diff --git a/src/core/NEON/kernels/NEScharr3x3Kernel.cpp b/src/core/NEON/kernels/NEScharr3x3Kernel.cpp index dcc9362cf0..eb1dc65c0f 100644 --- a/src/core/NEON/kernels/NEScharr3x3Kernel.cpp +++ b/src/core/NEON/kernels/NEScharr3x3Kernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -29,6 +29,8 @@ #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NESelectKernel.cpp b/src/core/NEON/kernels/NESelectKernel.cpp index 286b8a63c8..2f36db2ddb 100644 --- a/src/core/NEON/kernels/NESelectKernel.cpp +++ b/src/core/NEON/kernels/NESelectKernel.cpp @@ -23,7 +23,6 @@ */ #include "arm_compute/core/NEON/kernels/NESelectKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/IAccessWindow.h" @@ -31,7 +30,10 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "utils/TypePrinter.h" #include diff --git a/src/core/NEON/kernels/NESobel3x3Kernel.cpp b/src/core/NEON/kernels/NESobel3x3Kernel.cpp index eb9d3c3020..1c7089b641 100644 --- a/src/core/NEON/kernels/NESobel3x3Kernel.cpp +++ b/src/core/NEON/kernels/NESobel3x3Kernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -29,6 +29,8 @@ #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NESobel5x5Kernel.cpp b/src/core/NEON/kernels/NESobel5x5Kernel.cpp index fc8ccc803d..2421ea72ad 100644 --- a/src/core/NEON/kernels/NESobel5x5Kernel.cpp +++ b/src/core/NEON/kernels/NESobel5x5Kernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,6 +30,8 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NESobel7x7Kernel.cpp b/src/core/NEON/kernels/NESobel7x7Kernel.cpp index 95ab12b6cd..779d67a044 100644 --- a/src/core/NEON/kernels/NESobel7x7Kernel.cpp +++ b/src/core/NEON/kernels/NESobel7x7Kernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,6 +30,8 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include #include diff --git a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp index e71818f213..13f0a54275 100644 --- a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp +++ b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp @@ -23,8 +23,6 @@ */ #include "arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" @@ -32,10 +30,14 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" -#include "arm_compute/core/utils/misc/SaturateCast.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/NEFixedPoint.h" #include "src/core/NEON/NEMath.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" +#include "support/SaturateCast.h" #include #include diff --git a/src/core/NEON/kernels/NESpaceToBatchLayerKernel.cpp b/src/core/NEON/kernels/NESpaceToBatchLayerKernel.cpp index ccad92a685..3293466979 100644 --- a/src/core/NEON/kernels/NESpaceToBatchLayerKernel.cpp +++ b/src/core/NEON/kernels/NESpaceToBatchLayerKernel.cpp @@ -29,6 +29,9 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" + #include #include diff --git a/src/core/NEON/kernels/NESpaceToDepthLayerKernel.cpp b/src/core/NEON/kernels/NESpaceToDepthLayerKernel.cpp index 2667611d2c..7c9cc4996b 100644 --- a/src/core/NEON/kernels/NESpaceToDepthLayerKernel.cpp +++ b/src/core/NEON/kernels/NESpaceToDepthLayerKernel.cpp @@ -29,6 +29,9 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" + #include #include diff --git a/src/core/NEON/kernels/NEStackLayerKernel.cpp b/src/core/NEON/kernels/NEStackLayerKernel.cpp index 1d44be60a0..ad7f1b1300 100644 --- a/src/core/NEON/kernels/NEStackLayerKernel.cpp +++ b/src/core/NEON/kernels/NEStackLayerKernel.cpp @@ -33,6 +33,8 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" using namespace arm_compute; using namespace arm_compute::misc::shape_calculator; diff --git a/src/core/NEON/kernels/NEStridedSliceKernel.cpp b/src/core/NEON/kernels/NEStridedSliceKernel.cpp index 243a60f249..13b2cb5a10 100644 --- a/src/core/NEON/kernels/NEStridedSliceKernel.cpp +++ b/src/core/NEON/kernels/NEStridedSliceKernel.cpp @@ -23,16 +23,17 @@ */ #include "arm_compute/core/NEON/kernels/NEStridedSliceKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Window.h" - #include "arm_compute/core/Types.h" -#include "arm_compute/core/utils/helpers/bit_ops.h" +#include "arm_compute/core/Window.h" #include "arm_compute/core/utils/helpers/tensor_transform.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/CPP/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" +#include "src/core/utils/helpers/bit_ops.h" namespace arm_compute { diff --git a/src/core/NEON/kernels/NEThresholdKernel.cpp b/src/core/NEON/kernels/NEThresholdKernel.cpp index 9e8ec5c106..aad440b120 100644 --- a/src/core/NEON/kernels/NEThresholdKernel.cpp +++ b/src/core/NEON/kernels/NEThresholdKernel.cpp @@ -27,6 +27,8 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "src/core/NEON/wrapper/wrapper.h" diff --git a/src/core/NEON/kernels/NETileKernel.cpp b/src/core/NEON/kernels/NETileKernel.cpp index cc7655a479..99651c8b8a 100644 --- a/src/core/NEON/kernels/NETileKernel.cpp +++ b/src/core/NEON/kernels/NETileKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,6 +30,8 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" namespace arm_compute { diff --git a/src/core/NEON/kernels/NETransposeKernel.cpp b/src/core/NEON/kernels/NETransposeKernel.cpp index 7118e45f1e..6037810a44 100644 --- a/src/core/NEON/kernels/NETransposeKernel.cpp +++ b/src/core/NEON/kernels/NETransposeKernel.cpp @@ -23,14 +23,16 @@ */ #include "arm_compute/core/NEON/kernels/NETransposeKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" -#include "arm_compute/core/AccessWindowTranspose.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/AccessWindowTranspose.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEUpsampleLayerKernel.cpp b/src/core/NEON/kernels/NEUpsampleLayerKernel.cpp index 69324c1693..129c83c695 100644 --- a/src/core/NEON/kernels/NEUpsampleLayerKernel.cpp +++ b/src/core/NEON/kernels/NEUpsampleLayerKernel.cpp @@ -23,7 +23,6 @@ */ #include "arm_compute/core/NEON/kernels/NEUpsampleLayerKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" @@ -31,7 +30,10 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEWarpKernel.cpp b/src/core/NEON/kernels/NEWarpKernel.cpp index d8191dce53..891304f02c 100644 --- a/src/core/NEON/kernels/NEWarpKernel.cpp +++ b/src/core/NEON/kernels/NEWarpKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,7 +23,6 @@ */ #include "arm_compute/core/NEON/kernels/NEWarpKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" @@ -31,6 +30,10 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/ScaleHelpers.h" +#include "src/core/helpers/WindowHelpers.h" #include @@ -184,7 +187,7 @@ void NEWarpAffineKernel::warp_undefined(const Window &window) *out.ptr() = nearest_interpolation(in.ptr(), x0, y0, stride); break; case InterpolationPolicy::BILINEAR: - *out.ptr() = pixel_bilinear_c1(in.ptr(), stride, x0, y0); + *out.ptr() = scale_helpers::pixel_bilinear_c1(in.ptr(), stride, x0, y0); break; default: ARM_COMPUTE_ERROR("Interpolation not supported"); @@ -271,7 +274,7 @@ void NEWarpAffineKernel::warp_constant(const Window &window) *out.ptr() = nearest_interpolation(in.ptr(), x0, y0, stride); break; case InterpolationPolicy::BILINEAR: - *out.ptr() = pixel_bilinear_c1(in.ptr(), stride, x0, y0); + *out.ptr() = scale_helpers::pixel_bilinear_c1(in.ptr(), stride, x0, y0); break; default: ARM_COMPUTE_ERROR("Interpolation not supported"); @@ -386,7 +389,7 @@ void NEWarpAffineKernel::warp_replicate(const Window &window) *out.ptr() = nearest_interpolation(in.ptr(), x0, y0, stride); break; case InterpolationPolicy::BILINEAR: - *out.ptr() = pixel_bilinear_c1(in.ptr(), stride, x0, y0); + *out.ptr() = scale_helpers::pixel_bilinear_c1(in.ptr(), stride, x0, y0); break; default: ARM_COMPUTE_ERROR("Interpolation not supported"); @@ -519,7 +522,7 @@ void NEWarpPerspectiveKernel::warp_undefined(const Window &window *out.ptr() = nearest_interpolation(in.ptr(), xn, yn, stride); break; case InterpolationPolicy::BILINEAR: - *out.ptr() = pixel_bilinear_c1(in.ptr(), stride, xn, yn); + *out.ptr() = scale_helpers::pixel_bilinear_c1(in.ptr(), stride, xn, yn); break; default: ARM_COMPUTE_ERROR("Interpolation not supported"); @@ -620,7 +623,7 @@ void NEWarpPerspectiveKernel::warp_constant(const Window &window) *out.ptr() = nearest_interpolation(in.ptr(), xn, yn, stride); break; case InterpolationPolicy::BILINEAR: - *out.ptr() = pixel_bilinear_c1(in.ptr(), stride, xn, yn); + *out.ptr() = scale_helpers::pixel_bilinear_c1(in.ptr(), stride, xn, yn); break; default: ARM_COMPUTE_ERROR("Interpolation not supported"); @@ -752,7 +755,7 @@ void NEWarpPerspectiveKernel::warp_replicate(const Window &window *out.ptr() = nearest_interpolation(in.ptr(), xn, yn, stride); break; case InterpolationPolicy::BILINEAR: - *out.ptr() = pixel_bilinear_c1(in.ptr(), stride, xn, yn); + *out.ptr() = scale_helpers::pixel_bilinear_c1(in.ptr(), stride, xn, yn); break; default: ARM_COMPUTE_ERROR("Interpolation not supported"); diff --git a/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp b/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp index 6a74914ff7..c7fa2d2365 100644 --- a/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp +++ b/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp @@ -25,6 +25,8 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" namespace arm_compute { diff --git a/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp index d12b10c69e..90afbd6a19 100644 --- a/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp +++ b/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp @@ -33,6 +33,8 @@ #include "arm_compute/core/Window.h" #include "src/core/NEON/NEAsymm.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp b/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp index bfe97bfbdb..211ebdec90 100644 --- a/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp +++ b/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp @@ -23,16 +23,18 @@ */ #include "src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/kernels/convolution/common/utils.hpp" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/NEON/kernels/convolution/common/utils.hpp" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "support/MemorySupport.h" #include "src/core/NEON/kernels/convolution/winograd/winograd_layer.hpp" diff --git a/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h b/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h index 94df4f6952..bf5d77fc43 100644 --- a/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h +++ b/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h @@ -25,8 +25,8 @@ #define ARM_COMPUTE_NEGEMMWINOGRADCONVOLUTIONLAYERKERNEL_H #include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/NEON/kernels/convolution/common/convolution.hpp" -#include "arm_compute/core/NEON/kernels/convolution/common/tensor.hpp" +#include "src/core/NEON/kernels/convolution/common/convolution.hpp" +#include "src/core/NEON/kernels/convolution/common/tensor.hpp" #include "src/core/NEON/kernels/convolution/winograd/winograd_layer.hpp" diff --git a/src/core/NEON/kernels/NEYOLOLayerKernel.cpp b/src/core/NEON/kernels/NEYOLOLayerKernel.cpp index 591aa1e5e6..48c0616b35 100644 --- a/src/core/NEON/kernels/NEYOLOLayerKernel.cpp +++ b/src/core/NEON/kernels/NEYOLOLayerKernel.cpp @@ -23,16 +23,18 @@ */ #include "arm_compute/core/NEON/kernels/NEYOLOLayerKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/NEAsymm.h" #include "src/core/NEON/NEFixedPoint.h" #include "src/core/NEON/NEMath.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "src/core/NEON/kernels/detail/NEActivationFunctionDetail.h" diff --git a/src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.cpp b/src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.cpp index b071be3749..760274dba1 100644 --- a/src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.cpp +++ b/src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,7 +22,7 @@ * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h" +#include "src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h b/src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h new file mode 100644 index 0000000000..030f1aad12 --- /dev/null +++ b/src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_INEGEMMWRAPPERKERNEL_H +#define SRC_INEGEMMWRAPPERKERNEL_H + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Common interface for all the arm_gemm Gemms + */ +class INEGEMMWrapperKernel : public INEKernel +{ +public: + /** Parameters defining the dimensions of the matrices being multiplied */ + struct Params + { + unsigned int M{ 0 }; /**< Rows in output matrix C (and input matrix A). */ + unsigned int N{ 0 }; /**< Columns in output matrix C (and input matrix B). */ + unsigned int K{ 0 }; /**< Columns of input matrix A (= rows of input matrix B). */ + unsigned int batches{ 0 }; /**< Number of "batched" GEMMs (unique A and C, shared B). */ + unsigned int multis{ 0 }; /**< Number of "multi" GEMMs (unique A, B and C). */ + }; + + static Params extract_parameters(const ITensor *a, const ITensor *b, const ITensor *c, const GEMMInfo &gemm_info); + + /** Constructor */ + INEGEMMWrapperKernel(); + /** Prevent instances of this class from being copied */ + INEGEMMWrapperKernel(const INEGEMMWrapperKernel &) = delete; + /** Prevent instances of this class from being copied */ + INEGEMMWrapperKernel &operator=(const INEGEMMWrapperKernel &) = delete; + /** Allow instances of this class to be moved */ + INEGEMMWrapperKernel(INEGEMMWrapperKernel &&) = default; + /** Allow instances of this class to be moved */ + INEGEMMWrapperKernel &operator=(INEGEMMWrapperKernel &&) = default; + /** Initialise the kernel's input and output. + * + * @note The input and output tensor must have the same dimensions + * + * @param[in] a Input tensor (Matrix A) + * @param[in] b Input tensor (Matrix B) + * @param[out] c Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0. + * @param[in] alpha Scalar multiplier to apply to AB matrix product. + * @param[in] beta Scalar multiplier to apply to input C matrix before adding product. + * @param[in] gemm_info GEMM meta-data + */ + void configure(const ITensor *a, const ITensor *b, ITensor *c, float alpha, float beta, const GEMMInfo &gemm_info); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +protected: + /** Called as part of configure() after _a, _b, _c and _params have been set. + * + * @param[in] alpha Scalar multiplier to apply to AB matrix product. + * @param[in] beta Scalar multiplier to apply to input C matrix before adding product. + * + * @return A 3D execution window. + */ + virtual Window configure_internal(float alpha, float beta) = 0; + + /** Run the kernel from the start to the end offset in window. + * + * @param[in] window Window to use for the iteration + * @param[in] start_offset Where to start iterating from (In Window coordinates) + * @param[in] end_offset Where to stop iterating (In Window coordinates). + * @param[in] info Info about executing thread and CPU. + */ + virtual void run_internal(const Window &window, const Coordinates &start_offset, const Coordinates &end_offset, const ThreadInfo &info) = 0; + + const ITensor *_a; + const ITensor *_b; + ITensor *_c; + Params _params; + GEMMInfo _gemm_info; + +private: + Window _window3d; + TensorShape _window_shape; +}; + +} // namespace arm_compute + +#endif /* SRC_INEGEMMRAPPERKERNEL_H */ diff --git a/src/core/NEON/kernels/assembly/NEDepthwiseConvolutionAssemblyKernelWrapper.h b/src/core/NEON/kernels/assembly/NEDepthwiseConvolutionAssemblyKernelWrapper.h new file mode 100644 index 0000000000..a2f7e3bd59 --- /dev/null +++ b/src/core/NEON/kernels/assembly/NEDepthwiseConvolutionAssemblyKernelWrapper.h @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_ASSEMBLY_DEPTHWISE_CONVOLUTION_ASSEMBLY_WRAPPER_KERNEL_H +#define SRC_ASSEMBLY_DEPTHWISE_CONVOLUTION_ASSEMBLY_WRAPPER_KERNEL_H + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Utils.h" +#include "arm_compute/core/Validate.h" + +#include "src/core/NEON/kernels/convolution/depthwise/depthwise.hpp" + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** This class is a wrapper for the depthwise convolution assembly kernels. */ +class NEDepthwiseConvolutionAssemblyKernelWrapper final : public INEKernel +{ +public: + const char *name() const override + { + return "NEDepthwiseConvolutionAssemblyKernelWrapper"; + } + + /** Default constructor */ + NEDepthwiseConvolutionAssemblyKernelWrapper() + : _kernel(nullptr) + { + } + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDepthwiseConvolutionAssemblyKernelWrapper(const NEDepthwiseConvolutionAssemblyKernelWrapper &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDepthwiseConvolutionAssemblyKernelWrapper &operator=(const NEDepthwiseConvolutionAssemblyKernelWrapper &) = delete; + /** Default Move Constructor. */ + NEDepthwiseConvolutionAssemblyKernelWrapper(NEDepthwiseConvolutionAssemblyKernelWrapper &&) = default; + /** Default move assignment operator */ + NEDepthwiseConvolutionAssemblyKernelWrapper &operator=(NEDepthwiseConvolutionAssemblyKernelWrapper &&) = default; + + /** Initialise the kernel's input and output. + * + * @param[in] kernel Pointer to an assembly kernel implementation. + */ + void configure(depthwise::IDepthwiseConvolution *kernel) + { + ARM_COMPUTE_ERROR_ON_NULLPTR((reinterpret_cast(kernel))); + _kernel = kernel; + Window win; + win.set(Window::DimX, Window::Dimension(0, _kernel->get_window(), 1)); + INEKernel::configure(win); + } + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override + { + ARM_COMPUTE_ERROR_ON_NULLPTR((reinterpret_cast(_kernel))); + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + auto first = window.x().start(); + auto last = window.x().end(); + _kernel->run(first, last, info.thread_id); + } + +private: + depthwise::IDepthwiseConvolution *_kernel; +}; +} // namespace arm_compute +#endif /* SRC_ASSEMBLY_DEPTHWISE_CONVOLUTION_ASSEMBLY_WRAPPER_KERNEL_H */ diff --git a/src/core/NEON/kernels/assembly/arm_gemm_local.hpp b/src/core/NEON/kernels/assembly/arm_gemm_local.hpp new file mode 100644 index 0000000000..4715f2500a --- /dev/null +++ b/src/core/NEON/kernels/assembly/arm_gemm_local.hpp @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#pragma once + +/* This file is used to configure integration-specific aspects of arm_gemm into ACL */ + +#include "arm_compute/core/CPP/CPPTypes.h" + +namespace arm_gemm +{ +using CPUModel = arm_compute::CPUModel; +using CPUInfo = arm_compute::CPUInfo; +} // namespace arm_compute diff --git a/src/core/NEON/kernels/convolution/common/activation.hpp b/src/core/NEON/kernels/convolution/common/activation.hpp new file mode 100644 index 0000000000..0c9b7c1368 --- /dev/null +++ b/src/core/NEON/kernels/convolution/common/activation.hpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2019 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#pragma once + +namespace neon_convolution_kernels +{ + +enum class ActivationFunction +{ + None, + ReLU, + ReLU6, +}; + +} diff --git a/src/core/NEON/kernels/convolution/common/alloc.hpp b/src/core/NEON/kernels/convolution/common/alloc.hpp new file mode 100644 index 0000000000..7be3cdaaf5 --- /dev/null +++ b/src/core/NEON/kernels/convolution/common/alloc.hpp @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2017 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#pragma once + +#ifdef ALLOC_ALIGN +#define ALLOCATE(x) aligned_alloc(ALLOC_ALIGN, x) +#else +#define ALLOCATE(x) malloc(x) +#endif diff --git a/src/core/NEON/kernels/convolution/common/arm.hpp b/src/core/NEON/kernels/convolution/common/arm.hpp new file mode 100644 index 0000000000..b19bf98252 --- /dev/null +++ b/src/core/NEON/kernels/convolution/common/arm.hpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2017 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/** Sets the macro __arm_any__ if compiling for Aarch32 or Aarch64. + * Includes `arm_neon.h` if compiling for either architecture. + */ + +#ifdef __arm__ +#define __arm_any__ +#endif // __arm__ + +#ifdef __aarch64__ +#define __arm_any__ +#endif // __aarch64__ + +#ifdef __arm_any__ +#include +#endif // __arm_any__ diff --git a/src/core/NEON/kernels/convolution/common/convolution.hpp b/src/core/NEON/kernels/convolution/common/convolution.hpp new file mode 100644 index 0000000000..b1413527c3 --- /dev/null +++ b/src/core/NEON/kernels/convolution/common/convolution.hpp @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2017 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#pragma once + +enum PaddingType { + PADDING_SAME, PADDING_VALID +}; diff --git a/src/core/NEON/kernels/convolution/common/padding.hpp b/src/core/NEON/kernels/convolution/common/padding.hpp new file mode 100644 index 0000000000..b6f95872c0 --- /dev/null +++ b/src/core/NEON/kernels/convolution/common/padding.hpp @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2019 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#pragma once + +#include + +// Utilities for copying tensor tiles and adding/removing padding. +namespace padding +{ + +/* Copy a tile and apply padding to the output copy. + */ +template +void copy_and_pad_tile( + unsigned int tile_rows, + unsigned int tile_cols, + unsigned int n_channels, + const T *inptr, + unsigned int in_row_stride, + unsigned int in_col_stride, + T* outptr, + unsigned int out_row_stride, + unsigned int out_col_stride, + unsigned int pad_top, + unsigned int pad_left, + unsigned int pad_bottom, + unsigned int pad_right, + T pad_value=static_cast(0) +); + +/** Copy a tile and remove padding elements in the output. + */ +template +class CopyCropped +{ + public: + static void execute( + size_t size, // Amount of data to copy + const void *inptr, + size_t in_row_stride, + size_t in_col_stride, + void *outptr, + size_t out_row_stride, + size_t out_col_stride, + unsigned int pad_top, + unsigned int pad_left, + unsigned int pad_bottom, + unsigned int pad_right + ); +}; + +template +void crop_and_copy_tile( + unsigned int tile_rows, + unsigned int tile_cols, + unsigned int n_channels, + const T *inptr, + unsigned int in_row_stride, + unsigned int in_col_stride, + T *outptr, + unsigned int out_row_stride, + unsigned int out_col_stride, + unsigned int crop_top, + unsigned int crop_left, + unsigned int crop_bottom, + unsigned int crop_right +); + +} diff --git a/src/core/NEON/kernels/convolution/common/perf.h b/src/core/NEON/kernels/convolution/common/perf.h new file mode 100644 index 0000000000..fbae4dcdfa --- /dev/null +++ b/src/core/NEON/kernels/convolution/common/perf.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2018 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#pragma once + +/* Prototypes from perf.c */ + +void start_counter(int fd); +long long get_counter(int fd); +long long stop_counter(int fd); +int open_instruction_counter(void); +int open_cycle_counter(void); diff --git a/src/core/NEON/kernels/convolution/common/qasymm8.hpp b/src/core/NEON/kernels/convolution/common/qasymm8.hpp new file mode 100644 index 0000000000..88ef7327c0 --- /dev/null +++ b/src/core/NEON/kernels/convolution/common/qasymm8.hpp @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2019 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#pragma once +#include + +namespace qasymm8 +{ + +struct QAsymm8Params +{ + uint8_t quantize(float value) const; + float dequantize(uint8_t value) const; + + uint8_t offset; + float scale; +}; + +struct QAsymm8RescaleParams +{ + static QAsymm8RescaleParams make_rescale_params( + const QAsymm8Params& weight_quant, + const QAsymm8Params& input_quant, + const QAsymm8Params& output_quant + ); + + QAsymm8RescaleParams(int32_t shift, int32_t multiplier, float rescale); + + const int32_t shift, multiplier; + const float rescale; +}; + +} diff --git a/src/core/NEON/kernels/convolution/common/qsymm8.hpp b/src/core/NEON/kernels/convolution/common/qsymm8.hpp new file mode 100644 index 0000000000..726a02ccfd --- /dev/null +++ b/src/core/NEON/kernels/convolution/common/qsymm8.hpp @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2019 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#pragma once +#include +#include +#include "qasymm8.hpp" + + +namespace qsymm8 { + +struct QSymm8Params { + int8_t quantize(float value) const; + float dequantize(int8_t value) const; + + float scale; +}; + +struct QSymm8RescaleParams { + static QSymm8RescaleParams + make_rescale_params(const QSymm8Params &weight_quant, + const QSymm8Params &input_quant, + const QSymm8Params &output_quant); + + QSymm8RescaleParams(int32_t shift, int32_t multiplier, float rescale); + + const int32_t shift, multiplier; + const float rescale; +}; + +struct QSymm8PerChannelParams { + int8_t quantize(float value, float scale) const; + float dequantize(int8_t value, float scale) const; + + std::vector scales; +}; + +struct QSymm8PerChannelRescaleParams { + static QSymm8PerChannelRescaleParams + make_rescale_params(const QSymm8PerChannelParams &weight_quant, + const QSymm8PerChannelParams &input_quant, + const QSymm8PerChannelParams &output_quant); + + static QSymm8PerChannelRescaleParams + make_rescale_params(const QSymm8PerChannelParams &weight_quant, + const qasymm8::QAsymm8Params &input_quant, + const qasymm8::QAsymm8Params &output_quant); + + QSymm8PerChannelRescaleParams(std::vector& shift, std::vector& multiplier, std::vector& rescale); + + std::vector shifts, multipliers; + std::vector rescales; +}; + +} // namespace qsymm8 diff --git a/src/core/NEON/kernels/convolution/common/shims.hpp b/src/core/NEON/kernels/convolution/common/shims.hpp new file mode 100644 index 0000000000..310bd47b82 --- /dev/null +++ b/src/core/NEON/kernels/convolution/common/shims.hpp @@ -0,0 +1,749 @@ +/* + * Copyright (c) 2017 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#pragma once +#ifndef DOXYGEN_SKIP_THIS +#include +#endif /* DOXYGEN_SKIP_THIS */ +#include "arm.hpp" + +namespace reorder { +/** Re-order a tensor from NCHW format to NHWC. + * + * @note The stride parameters are optional and are provided to allow padding in either input or output tensors. + * + * @param[in] in Input tensor in NCHW format. + * @param[out] out Output tensor, to be written in NHWC format. + * @param n_batches Number of batches in the tensors. + * @param n_channels Number of channels in the tensors + * @param n_rows Height of the tensor + * @param n_cols Width of the tensor + * @param in_batch_stride Stride over batches in the input tensor. If `0` defaults to `n_channels * in_channel_stride`. + * @param in_channel_stride Stride over channels in the input tensor. If `0` defaults to `n_rows * in_row_stride`. + * @param in_row_stride Stride over rows in the input tensor. If `0` defaults to `n_cols`. + * @param out_batch_stride Stride over batches in the output tensor. If `0` defaults to `n_rows * out_row_stride`. + * @param out_row_stride Stride over rows in the output tensor. If `0` defaults to `n_cols * out_col_stride`. + * @param out_col_stride Stride over columns in the output tensor. If `0` defaults to `n_channels`. + */ +template +inline void nchw_to_nhwc( + const T* const in, + T* const out, + const int n_batches, + const int n_channels, + const int n_rows, + const int n_cols, + int in_batch_stride=0, + int in_channel_stride=0, + int in_row_stride=0, + int out_batch_stride=0, + int out_row_stride=0, + int out_col_stride=0 +); + +/** Re-order a tensor from NHWC format to NCHW. + * + * @note The stride parameters are optional and are provided to allow padding in either input or output tensors. + * + * @param[in] in Input tensor in NHWC format. + * @param[out] out Output tensor, to be written in NCHW format. + * @param n_batches Number of batches in the tensors. + * @param n_rows Height of the tensor + * @param n_cols Width of the tensor + * @param n_channels Number of channels in the tensors + * @param in_batch_stride Stride over batches in the input tensor. If `0` defaults to `n_rows * in_row_stride`. + * @param in_row_stride Stride over rows in the input tensor. If `0` defaults to `n_cols * in_col_stride`. + * @param in_col_stride Stride over columns in the input tensor. If `0` defaults to `n_channels`. + * @param out_batch_stride Stride over batches in the output tensor. If `0` defaults to `n_channels * out_channel_stride`. + * @param out_channel_stride Stride over channels in the output tensor. If `0` defaults to `n_rows * out_row_stride`. + * @param out_row_stride Stride over rows in the output tensor. If `0` defaults to `n_cols`. + */ +template +inline void nhwc_to_nchw( + const T* const in, // Input data in NHWC form + T* const out, // Output data in NCHW form + const int n_batches, + const int n_rows, + const int n_cols, + const int n_channels, + int in_batch_stride=0, + int in_row_stride=0, + int in_col_stride=0, + int out_batch_stride=0, + int out_channel_stride=0, + int out_row_stride=0 +); + +/** Re-order a weight tensor from [Output feature map x Input feature map x + * Height x Width] format to [Height x Width x Input feature map x Output + * feature map] format. + */ +template +inline void ofm_ifm_h_w_to_h_w_ifm_ofm( + const T* const in, // Input in [Output x Input x Height x Width] form + T* const out, // Output in [Height x Width x Input x Output] form + const int n_output_feature_maps, + const int n_input_feature_maps, + const int n_rows, + const int n_cols, + int in_output_feature_map_stride=0, + int in_input_feature_map_stride=0, + int in_row_stride=0, + int out_row_stride=0, + int out_col_stride=0, + int out_input_feature_map_stride=0 +); + +/** Re-order a weight tensor from [Height x Width x Input feature map x Output + * feature map] format to [Output feature map x Input feature map x Height x + * Width] format. + */ +template +inline void h_w_ifm_ofm_to_ofm_ifm_h_w( + const T* const in, // Input in [Height x Width x Input x Output] form + T* const out, // Output in [Output x Input x Height x Width] form + const int n_rows, + const int n_cols, + const int n_input_feature_maps, + const int n_output_feature_maps, + int in_row_stride=0, + int in_col_stride=0, + int in_input_feature_map_stride=0, + int out_output_feature_map_stride=0, + int out_input_feature_map_stride=0, + int out_row_stride=0 +); + +/*****************************************************************************/ +/* 32-bit implementation : NCHW -> NHWC + */ +template <> +inline void nchw_to_nhwc( + const int32_t* const in, + int32_t* const out, + const int n_batches, + const int n_channels, + const int n_rows, + const int n_cols, + int in_batch_stride, + int in_channel_stride, + int in_row_stride, + int out_batch_stride, + int out_row_stride, + int out_col_stride +) +{ + typedef int32_t T; + + // Fill in the stride values + in_row_stride = (in_row_stride) ? in_row_stride : n_cols; + in_channel_stride = (in_channel_stride) ? in_channel_stride + : n_rows * in_row_stride; + in_batch_stride = (in_batch_stride) ? in_batch_stride + : n_channels * in_channel_stride; + + out_col_stride = (out_col_stride) ? out_col_stride : n_channels; + out_row_stride = (out_row_stride) ? out_row_stride : n_cols * out_col_stride; + out_batch_stride = (out_batch_stride) ? out_batch_stride + : n_rows * out_row_stride; + + // Perform the re-ordering + for (int n = 0; n < n_batches; n++) + { + const T* const in_batch = in + n*in_batch_stride; + T* const out_batch = out + n*out_batch_stride; + + for (int i = 0; i < n_rows; i++) + { + const T* const in_row = in_batch + i*in_row_stride; + T* const out_row = out_batch + i*out_row_stride; + + int j = 0, j_remaining = n_cols; +#ifdef __arm_any__ + for (; j_remaining >= 4; j += 4, j_remaining -= 4) + { + int c = 0, c_remaining = n_channels; + for (; c_remaining >= 4; c += 4, c_remaining -= 4) + { + // Read 4 channels worth of 4 columns, then zip to produce 4 columns + // worth of 4 channels. + int32x4_t channel_pixels[4]; + channel_pixels[0] = vld1q_s32(in_row + (c + 0)*in_channel_stride + j); + channel_pixels[1] = vld1q_s32(in_row + (c + 1)*in_channel_stride + j); + channel_pixels[2] = vld1q_s32(in_row + (c + 2)*in_channel_stride + j); + channel_pixels[3] = vld1q_s32(in_row + (c + 3)*in_channel_stride + j); + + const auto zip1 = vzipq_s32(channel_pixels[0], channel_pixels[2]); + const auto zip2 = vzipq_s32(channel_pixels[1], channel_pixels[3]); + const auto out_0 = vzipq_s32(zip1.val[0], zip2.val[0]); + const auto out_1 = vzipq_s32(zip1.val[1], zip2.val[1]); + + vst1q_s32(out_row + (j + 0)*out_col_stride + c, out_0.val[0]); + vst1q_s32(out_row + (j + 1)*out_col_stride + c, out_0.val[1]); + vst1q_s32(out_row + (j + 2)*out_col_stride + c, out_1.val[0]); + vst1q_s32(out_row + (j + 3)*out_col_stride + c, out_1.val[1]); + } + for (; c_remaining; c++, c_remaining--) + { + for (int _j = 0; _j < 4; _j++) + { + const T* const in_col = in_row + j + _j; + T* const out_col = out_row + (j + _j)*out_col_stride; + const T* const in_channel = in_col + c*in_channel_stride; + out_col[c] = *(in_channel); + } + } + } + for (; j_remaining >= 2; j += 2, j_remaining -= 2) + { + int c = 0, c_remaining = n_channels; + for (; c_remaining >= 2; c += 2, c_remaining -= 2) + { + // Read 2 channels worth of 2 columns, then zip to produce 2 columns + // worth of 2 channels. + int32x2_t channel_pixels[2]; + channel_pixels[0] = vld1_s32(in_row + (c + 0)*in_channel_stride + j); + channel_pixels[1] = vld1_s32(in_row + (c + 1)*in_channel_stride + j); + + const auto output = vzip_s32(channel_pixels[0], channel_pixels[1]); + + vst1_s32(out_row + (j + 0)*out_col_stride + c, output.val[0]); + vst1_s32(out_row + (j + 1)*out_col_stride + c, output.val[1]); + } + for (; c_remaining; c++, c_remaining--) + { + for (int _j = 0; _j < 2; _j++) + { + const T* const in_col = in_row + j + _j; + T* const out_col = out_row + (j + _j)*out_col_stride; + const T* const in_channel = in_col + c*in_channel_stride; + out_col[c] = *(in_channel); + } + } + } +#endif // __arm_any__ + for (; j_remaining; j++, j_remaining--) + { + const T* const in_col = in_row + j; + T* const out_col = out_row + j*out_col_stride; + + for (int c = 0; c < n_channels; c++) + { + const T* const in_channel = in_col + c*in_channel_stride; + out_col[c] = *(in_channel); + } + } + } + } +} + +template <> +inline void nchw_to_nhwc( + const uint32_t* const in, + uint32_t* const out, + const int n_batches, + const int n_channels, + const int n_rows, + const int n_cols, + int in_batch_stride, + int in_channel_stride, + int in_row_stride, + int out_batch_stride, + int out_row_stride, + int out_col_stride +) +{ + nchw_to_nhwc( + reinterpret_cast(in), + reinterpret_cast(out), + n_batches, n_channels, n_rows, n_cols, + in_batch_stride, in_channel_stride, in_row_stride, + out_batch_stride, out_row_stride, out_col_stride + ); +} + +template <> +inline void nchw_to_nhwc( + const float* const in, + float* const out, + const int n_batches, + const int n_channels, + const int n_rows, + const int n_cols, + int in_batch_stride, + int in_channel_stride, + int in_row_stride, + int out_batch_stride, + int out_row_stride, + int out_col_stride +) +{ + nchw_to_nhwc( + reinterpret_cast(in), + reinterpret_cast(out), + n_batches, n_channels, n_rows, n_cols, + in_batch_stride, in_channel_stride, in_row_stride, + out_batch_stride, out_row_stride, out_col_stride + ); +} + +/*****************************************************************************/ +/* Generic implementation : NCHW -> NHWC + */ +template +inline void nchw_to_nhwc( + const T* const in, + T* const out, + const int n_batches, + const int n_channels, + const int n_rows, + const int n_cols, + int in_batch_stride, + int in_channel_stride, + int in_row_stride, + int out_batch_stride, + int out_row_stride, + int out_col_stride +) +{ + // Fill in the stride values + in_row_stride = (in_row_stride) ? in_row_stride : n_cols; + in_channel_stride = (in_channel_stride) ? in_channel_stride + : n_rows * in_row_stride; + in_batch_stride = (in_batch_stride) ? in_batch_stride + : n_channels * in_channel_stride; + + out_col_stride = (out_col_stride) ? out_col_stride : n_channels; + out_row_stride = (out_row_stride) ? out_row_stride : n_cols * out_col_stride; + out_batch_stride = (out_batch_stride) ? out_batch_stride + : n_rows * out_row_stride; + + // Perform the re-ordering + for (int n = 0; n < n_batches; n++) + { + const T* const in_batch = in + n*in_batch_stride; + T* const out_batch = out + n*out_batch_stride; + + for (int i = 0; i < n_rows; i++) + { + const T* const in_row = in_batch + i*in_row_stride; + T* const out_row = out_batch + i*out_row_stride; + + for (int j = 0; j < n_cols; j++) + { + const T* const in_col = in_row + j; + T* const out_col = out_row + j*out_col_stride; + + for (int c = 0; c < n_channels; c++) + { + const T* const in_channel = in_col + c*in_channel_stride; + out_col[c] = *(in_channel); + } + } + } + } +} + +/*****************************************************************************/ +/* 32-bit implementation : NHWC -> NCHW + */ +template <> +inline void nhwc_to_nchw( + const int32_t* const in, // Input data in NHWC form + int32_t* const out, // Output data in NCHW form + const int n_batches, + const int n_rows, + const int n_cols, + const int n_channels, + int in_batch_stride, + int in_row_stride, + int in_col_stride, + int out_batch_stride, + int out_channel_stride, + int out_row_stride +) +{ + typedef int32_t T; + + // Fill in stride values + in_col_stride = (in_col_stride) ? in_col_stride : n_channels; + in_row_stride = (in_row_stride) ? in_row_stride : n_cols * in_col_stride; + in_batch_stride = (in_batch_stride) ? in_batch_stride + : n_rows * in_row_stride; + + out_row_stride = (out_row_stride) ? out_row_stride : n_cols; + out_channel_stride = (out_channel_stride) ? out_channel_stride + : n_rows * out_row_stride; + out_batch_stride = (out_batch_stride) ? out_batch_stride + : n_channels * out_channel_stride; + + // Perform the re-ordering + // For every batch + for (int n = 0; n < n_batches; n++) + { + const T* const in_batch = in + n*in_batch_stride; + T* const out_batch = out + n*out_batch_stride; + + // For every row + for (int i = 0; i < n_rows; i++) + { + const T* const in_i = in_batch + i*in_row_stride; + T* const out_i = out_batch + i*out_row_stride; + + // For every column, beginning with chunks of 4 + int j = 0, j_remaining = n_cols; +#ifdef __arm_any__ + for (; j_remaining >= 4; j += 4, j_remaining -=4) + { + // For every channel, beginning with chunks of 4 + int c = 0, c_remaining = n_channels; + for (; c_remaining >= 4; c += 4, c_remaining -= 4) + { + // Read 4 columns worth of 4 channels then zip to produce 4 channels + // worth of 4 columns. + int32x4_t pixel_channels[4]; + pixel_channels[0] = vld1q_s32(in_i + (j + 0)*in_col_stride + c); + pixel_channels[1] = vld1q_s32(in_i + (j + 1)*in_col_stride + c); + pixel_channels[2] = vld1q_s32(in_i + (j + 2)*in_col_stride + c); + pixel_channels[3] = vld1q_s32(in_i + (j + 3)*in_col_stride + c); + + const auto zip1 = vzipq_s32(pixel_channels[0], pixel_channels[2]); + const auto zip2 = vzipq_s32(pixel_channels[1], pixel_channels[3]); + const auto out_0 = vzipq_s32(zip1.val[0], zip2.val[0]); + const auto out_1 = vzipq_s32(zip1.val[1], zip2.val[1]); + + vst1q_s32(out_i + j + (c + 0)*out_channel_stride, out_0.val[0]); + vst1q_s32(out_i + j + (c + 1)*out_channel_stride, out_0.val[1]); + vst1q_s32(out_i + j + (c + 2)*out_channel_stride, out_1.val[0]); + vst1q_s32(out_i + j + (c + 3)*out_channel_stride, out_1.val[1]); + } + for (; c_remaining; c++, c_remaining--) + { + for (int _j = 0; _j < 4; _j++) + { + const T* const in_j = in_i + (j + _j)*in_col_stride; + T* const out_j = out_i + (j + _j); + + const T* const in_channel = in_j + c; + T* const out_channel = out_j + c*out_channel_stride; + *(out_channel) = *(in_channel); + } + } + } + for (; j_remaining >= 2; j += 2, j_remaining -=2) + { + int c = 0, c_remaining = n_channels; + for (; c_remaining >= 2; c += 2, c_remaining -= 2) + { + // Read 2 columns worth of 2 channels then zip to produce 2 channels + // worth of 2 columns. + int32x2_t pixel_channels[2]; + pixel_channels[0] = vld1_s32(in_i + (j + 0)*in_col_stride + c); + pixel_channels[1] = vld1_s32(in_i + (j + 1)*in_col_stride + c); + + const auto output = vzip_s32(pixel_channels[0], pixel_channels[1]); + + vst1_s32(out_i + j + (c + 0)*out_channel_stride, output.val[0]); + vst1_s32(out_i + j + (c + 1)*out_channel_stride, output.val[1]); + } + for (; c_remaining; c++, c_remaining--) + { + for (int _j = 0; _j < 2; _j++) + { + const T* const in_j = in_i + (j + _j)*in_col_stride; + T* const out_j = out_i + (j + _j); + + const T* const in_channel = in_j + c; + T* const out_channel = out_j + c*out_channel_stride; + *(out_channel) = *(in_channel); + } + } + } +#endif // __arm_any__ + for (; j_remaining; j++, j_remaining--) + { + const T* const in_j = in_i + j*in_col_stride; + T* const out_j = out_i + j; + + // For every channel + for (int c = 0; c < n_channels; c++) + { + const T* const in_channel = in_j + c; + T* const out_channel = out_j + c*out_channel_stride; + *(out_channel) = *(in_channel); + } + } + } + } +} + +template <> +inline void nhwc_to_nchw( + const uint32_t* const in, // Input data in NHWC form + uint32_t* const out, // Output data in NCHW form + const int n_batches, + const int n_rows, + const int n_cols, + const int n_channels, + int in_batch_stride, + int in_row_stride, + int in_col_stride, + int out_batch_stride, + int out_channel_stride, + int out_row_stride +) +{ + // Redirect to generic 32-bit implementation + nhwc_to_nchw( + reinterpret_cast(in), + reinterpret_cast(out), + n_batches, n_rows, n_cols, n_channels, + in_batch_stride, in_row_stride, in_col_stride, + out_batch_stride, out_channel_stride, out_row_stride + ); +} + +template <> +inline void nhwc_to_nchw( + const float* const in, // Input data in NHWC form + float* const out, // Output data in NCHW form + const int n_batches, + const int n_rows, + const int n_cols, + const int n_channels, + int in_batch_stride, + int in_row_stride, + int in_col_stride, + int out_batch_stride, + int out_channel_stride, + int out_row_stride +) +{ + // Redirect to generic 32-bit implementation + nhwc_to_nchw( + reinterpret_cast(in), + reinterpret_cast(out), + n_batches, n_rows, n_cols, n_channels, + in_batch_stride, in_row_stride, in_col_stride, + out_batch_stride, out_channel_stride, out_row_stride + ); +} + +/*****************************************************************************/ +/* Generic implementation : NHWC -> NCHW + */ +template +inline void nhwc_to_nchw( + const T* const in, // Input data in NHWC form + T* const out, // Output data in NCHW form + const int n_batches, + const int n_rows, + const int n_cols, + const int n_channels, + int in_batch_stride, + int in_row_stride, + int in_col_stride, + int out_batch_stride, + int out_channel_stride, + int out_row_stride +) +{ + // Fill in stride values + in_col_stride = (in_col_stride) ? in_col_stride : n_channels; + in_row_stride = (in_row_stride) ? in_row_stride : n_cols * in_col_stride; + in_batch_stride = (in_batch_stride) ? in_batch_stride + : n_rows * in_row_stride; + + out_row_stride = (out_row_stride) ? out_row_stride : n_cols; + out_channel_stride = (out_channel_stride) ? out_channel_stride + : n_rows * out_row_stride; + out_batch_stride = (out_batch_stride) ? out_batch_stride + : n_channels * out_channel_stride; + + // Perform the re-ordering + // For every batch + for (int n = 0; n < n_batches; n++) + { + const T* const in_batch = in + n*in_batch_stride; + T* const out_batch = out + n*out_batch_stride; + + // For every row + for (int i = 0; i < n_rows; i++) + { + const T* const in_i = in_batch + i*in_row_stride; + T* const out_i = out_batch + i*out_row_stride; + + // For every column + for (int j = 0; j < n_cols; j++) + { + const T* const in_j = in_i + j*in_col_stride; + T* const out_j = out_i + j; + + // For every channel + for (int c = 0; c < n_channels; c++) + { + const T* const in_channel = in_j + c; + T* const out_channel = out_j + c*out_channel_stride; + *(out_channel) = *(in_channel); + } + } + } + } +} + +/*****************************************************************************/ +/* Generic weight re-order implementation. + */ +template +inline void ofm_ifm_h_w_to_h_w_ifm_ofm( + const T* const in, // Input in [Output x Input x Height x Width] form + T* const out, // Output in [Height x Width x Input x Output] form + const int n_output_feature_maps, + const int n_input_feature_maps, + const int n_rows, + const int n_cols, + int in_output_feature_map_stride, + int in_input_feature_map_stride, + int in_row_stride, + int out_row_stride, + int out_col_stride, + int out_input_feature_map_stride +) +{ + // Fill in stride values + in_row_stride = (in_row_stride) + ? in_row_stride + : n_cols; + in_input_feature_map_stride = (in_input_feature_map_stride) + ? in_input_feature_map_stride + : n_rows * in_row_stride; + in_output_feature_map_stride = (in_output_feature_map_stride) + ? in_output_feature_map_stride + : n_input_feature_maps * in_input_feature_map_stride; + + out_input_feature_map_stride = (out_input_feature_map_stride) + ? out_input_feature_map_stride + : n_output_feature_maps; + out_col_stride = (out_col_stride) + ? out_col_stride + : n_input_feature_maps * out_input_feature_map_stride; + out_row_stride = (out_row_stride) + ? out_row_stride + : n_cols * out_col_stride; + + // Perform the re-ordering + for (int i = 0; i < n_rows; i++) + { + const T* const in_row = in + i * in_row_stride; + T* out_row = out + i * out_row_stride; + + for (int j = 0; j < n_cols; j++) + { + const T* const in_col = in_row + j; + T* const out_col = out_row + j * out_col_stride; + + for (int ifm = 0; ifm < n_input_feature_maps; ifm++) + { + const T* const in_ifm = in_col + ifm * in_input_feature_map_stride; + T* const out_ifm = out_col + ifm * out_input_feature_map_stride; + + for (int ofm = 0; ofm < n_output_feature_maps; ofm++) + { + const T* const in_ofm = in_ifm + ofm * in_output_feature_map_stride; + T* const out_ofm = out_ifm + ofm; + *(out_ofm) = *(in_ofm); + } + } + } + } +} + +/*****************************************************************************/ +/* Generic weight re-order implementation. + */ +template +inline void h_w_ifm_ofm_to_ofm_ifm_h_w( + const T* const in, // Input in [Height x Width x Input x Output] form + T* const out, // Output in [Output x Input x Height x Width] form + const int n_rows, + const int n_cols, + const int n_input_feature_maps, + const int n_output_feature_maps, + int in_row_stride, + int in_col_stride, + int in_input_feature_map_stride, + int out_output_feature_map_stride, + int out_input_feature_map_stride, + int out_row_stride +) +{ + // Fill in the stride values + in_input_feature_map_stride = (in_input_feature_map_stride) + ? in_input_feature_map_stride + : n_output_feature_maps; + in_col_stride = (in_col_stride) + ? in_col_stride + : n_input_feature_maps * in_input_feature_map_stride; + in_row_stride = (in_row_stride) + ? in_row_stride + : n_cols * in_col_stride; + + out_row_stride = (out_row_stride) + ? out_row_stride + : n_cols; + out_input_feature_map_stride = (out_input_feature_map_stride) + ? out_input_feature_map_stride + : n_rows * out_row_stride; + out_output_feature_map_stride = (out_output_feature_map_stride) + ? out_output_feature_map_stride + : n_input_feature_maps * out_input_feature_map_stride; + + // Perform the re-ordering + for (int i = 0; i < n_rows; i++) + { + const T* const in_row = in + i * in_row_stride; + T* const out_row = out + i * out_row_stride; + + for (int j = 0; j < n_cols; j++) + { + const T* const in_col = in_row + j * in_col_stride; + T* const out_col = out_row + j; + + for (int ifm = 0; ifm < n_input_feature_maps; ifm++) + { + const T* const in_ifm = in_col + ifm * in_input_feature_map_stride; + T* const out_ifm = out_col + ifm * out_input_feature_map_stride; + + for (int ofm = 0; ofm < n_output_feature_maps; ofm++) + { + const T* const in_ofm = in_ifm + ofm; + T* const out_ofm = out_ifm + ofm * out_output_feature_map_stride; + *(out_ofm) = *(in_ofm); + } + } + } + } +} + +} // namespace reorder diff --git a/src/core/NEON/kernels/convolution/common/tensor.hpp b/src/core/NEON/kernels/convolution/common/tensor.hpp new file mode 100644 index 0000000000..7738cdb349 --- /dev/null +++ b/src/core/NEON/kernels/convolution/common/tensor.hpp @@ -0,0 +1,178 @@ +/* + * Copyright (c) 2017-2019 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#pragma once +#include +#include + +#include "alloc.hpp" + +enum TensorOrder +{ + NHWC, ///< [Batch x Height x Width x Channels] + NCHW, ///< [Batch x Channels x Height x Width] +}; + +struct Tensor4DShape +{ + int n_batches, n_rows, n_cols, n_channels; + TensorOrder ordering; + + // Create a new tensor with the default (NHWC) ordering + inline Tensor4DShape( + const int n_batches, + const int n_rows, + const int n_cols, + const int n_channels, + const TensorOrder ordering=NHWC + ) : n_batches(n_batches), + n_rows(n_rows), + n_cols(n_cols), + n_channels(n_channels), + ordering(ordering) + { + } + + inline int index(const int n, const int i, const int j, const int c) const + { + if (this->ordering == NHWC) + { + return ((n*this->n_rows + i)*this->n_cols + j)*this->n_channels + c; + } + else // NCHW + { + return ((n*this->n_channels + c)*this->n_rows + i)*this->n_cols + j; + } + } + + inline int size() const + { + return n_batches * n_rows * n_cols * n_channels; + } + + inline bool TestEq(const Tensor4DShape& other) const + { + return (n_batches == other.n_batches && + n_rows == other.n_rows && + n_cols == other.n_cols && + n_channels == other.n_channels); + } +}; + + +enum WeightOrder +{ + HWIO, ///< [Height x Width x Input channels x Output channels] + OIHW, ///< [Output channels x Input channels x Height x Width] +}; + +struct KernelShape +{ + int n_output_channels, n_rows, n_cols, n_input_channels; + WeightOrder ordering; + + inline KernelShape( + const int n_output_channels, + const int n_rows, + const int n_cols, + const int n_input_channels, + const WeightOrder ordering=HWIO + ) : n_output_channels(n_output_channels), + n_rows(n_rows), + n_cols(n_cols), + n_input_channels(n_input_channels), + ordering(ordering) + { + } + + inline int index(int oc, int i, int j, int ic) const + { + if (this->ordering == HWIO) + { + return ((i*this->n_cols + j)*this->n_input_channels + ic)*this->n_output_channels + oc; + } + else // OIHW + { + return ((oc*this->n_input_channels + ic)*this->n_rows + i)*this->n_cols + j; + } + } + + inline int size(void) const + { + return n_output_channels * n_rows * n_cols * n_input_channels; + } +}; + + +template +class Tensor4D final +{ + public: + Tensor4D(ShapeT shape) : + shape(shape), + _data(reinterpret_cast(ALLOCATE(size_bytes()))) + { + Clear(); + } + + Tensor4D(const Tensor4D&) = delete; + Tensor4D operator=(const Tensor4D&) = delete; + + ~Tensor4D() { + free(_data); + } + + inline T* ptr() const { + return _data; + } + + inline size_t size_bytes() const { + return shape.size() * sizeof(T); + } + + /* Extract an element of the tensor. + * + * If the shape is a Tensor4DShape then the index is given as batch, row, + * column and channel. If the shape is a KernelShape then the index is + * given as output channel, row, column and input channel. + */ + inline T& element(const int a, const int b, const int c, const int d) const + { + return _data[shape.index(a, b, c, d)]; + } + + inline void Clear() { + Fill(static_cast(0)); + } + + inline void Fill(T val) { + for (int i = 0; i < shape.size(); i++) + _data[i] = val; + } + + const ShapeT shape; + + private: + T* const _data; +}; diff --git a/src/core/NEON/kernels/convolution/common/tensor_utils.hpp b/src/core/NEON/kernels/convolution/common/tensor_utils.hpp new file mode 100644 index 0000000000..82619f4799 --- /dev/null +++ b/src/core/NEON/kernels/convolution/common/tensor_utils.hpp @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2017 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#pragma once +#include "tensor.hpp" + +// Methods to print tensors and weights +void PrintTensor(const Tensor4D& tensor); +void PrintWeights(const Tensor4D& weights); + +// Test the equivalence of two tensors +// Counts the instances that |a - b|/|a| > max_err +bool CmpTensors( + const Tensor4D& a, + const Tensor4D& b, + const float max_err=0.0f +); + +// Fill the tensor with a test pattern +void TestPattern(Tensor4D& tensor); +void TestPattern(Tensor4D& weights); + +// Fill the tensor with random values +void Randomise(Tensor4D& tensor, const int seed=0); +void Randomise(Tensor4D& weights, const int seed=0); diff --git a/src/core/NEON/kernels/convolution/common/utils.hpp b/src/core/NEON/kernels/convolution/common/utils.hpp new file mode 100644 index 0000000000..b7a9517c65 --- /dev/null +++ b/src/core/NEON/kernels/convolution/common/utils.hpp @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2017-2018 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#pragma once + +#include + +void PrintMatrix(const float *const m, const int M, const int N, const int row_stride); + +constexpr inline int iceildiv(const int a, const int b) +{ + return (a + b - 1) / b; +} + +template +inline T roundup(const T a, const T b) +{ + return b * iceildiv(a, b); +} + +template +struct TypeBounds +{ + static constexpr T lower() noexcept { return std::numeric_limits::has_infinity + ? -std::numeric_limits::infinity() + : std::numeric_limits::lowest(); }; + static constexpr T upper() noexcept { return std::numeric_limits::has_infinity + ? std::numeric_limits::infinity() + : std::numeric_limits::max(); }; +}; + +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +template<> +struct TypeBounds<__fp16> +{ + static constexpr __fp16 lower() noexcept { return -std::numeric_limits::infinity(); }; + static constexpr __fp16 upper() noexcept { return std::numeric_limits::infinity(); } +}; +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ diff --git a/src/core/NEON/kernels/convolution/depthwise/depthwise.hpp b/src/core/NEON/kernels/convolution/depthwise/depthwise.hpp new file mode 100644 index 0000000000..70d6689731 --- /dev/null +++ b/src/core/NEON/kernels/convolution/depthwise/depthwise.hpp @@ -0,0 +1,551 @@ +/* + * Copyright (c) 2018-2019 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#pragma once + +#include +#include "activation.hpp" +#include "padding.hpp" + +namespace depthwise +{ + +namespace nck = neon_convolution_kernels; + +class IDepthwiseConvolution +{ + public: + virtual ~IDepthwiseConvolution() = default; + + virtual int output_size( + int dim_size, + unsigned int padding_before, + unsigned int padding_after + ) const = 0; + + /* Set input tensor and stride. */ + virtual void set_input(const void *inptr) = 0; + virtual void set_input(const void *inptr, int column_stride) = 0; + virtual void set_input(const void *inptr, int row_stride, int column_stride) = 0; + virtual void set_input(const void *inptr, int batch_stride, int row_stride, int column_stride) = 0; + + /* Set output tensor and stride. */ + virtual void set_output(void *outptr) = 0; + virtual void set_output(void *outptr, int column_stride) = 0; + virtual void set_output(void *outptr, int row_stride, int column_stride) = 0; + virtual void set_output(void *outptr, int batch_stride, int row_stride, int column_stride) = 0; + + /* Weights and biases are re-ordered to improve memory access patterns. Use + * these methods to determine the size of the re-pack buffer and to set the + * address (and implicitly reorder the weights and biases into) the buffer. + */ + virtual size_t get_packed_params_size(void) const = 0; + virtual void set_packed_params_buffer(void *) = 0; + + virtual void pack_params(const void *weights, const void *biases=nullptr) const = 0; + virtual void pack_params(void *buffer, const void *weights, const void *biases=nullptr) const = 0; + virtual void pack_params( + void *buffer, + const void* weights, + unsigned int weight_row_stride, + unsigned int weight_col_stride, + const void *biases=nullptr + ) const = 0; + + /* Working space is used to pad tensors on the fly. Before running any + * inference check the amount of space required, allocate and provide a + * pointer to the convolution engine. + */ + virtual size_t get_working_space_size(unsigned int nthreads=1) const = 0; + virtual void set_working_space(void *) = 0; + + virtual unsigned int get_window(void) const = 0; + virtual void run( + unsigned int start, + unsigned int stop, + unsigned int threadid=0 + ) = 0; +}; + +template < + unsigned int OutputTileRows, unsigned int OutputTileCols, + unsigned int KernelRows, unsigned int KernelCols, + unsigned int StrideRows, unsigned int StrideCols, + typename TIn, typename TBias, typename TOut, + typename Derived +> +class DepthwiseConvolutionBase : public IDepthwiseConvolution +{ + public: + // Information about the specific convolution instance + using InputType = TIn; + using BiasType = TBias; + using OutputType = TOut; + static constexpr int output_tile_rows = OutputTileRows; + static constexpr int output_tile_cols = OutputTileCols; + static constexpr int kernel_rows = KernelRows; + static constexpr int kernel_cols = KernelCols; + static constexpr int stride_rows = StrideRows; + static constexpr int stride_cols = StrideCols; + static constexpr int inner_tile_rows = stride_rows * (output_tile_rows - 1) + kernel_rows; + static constexpr int inner_tile_cols = stride_cols * (output_tile_cols - 1) + kernel_cols; + + /** Create a new depthwise convolution engine. + * + * @param[in] n_batches Number of batches tensors. + * @param[in] n_input_rows Number of rows in input tensor. + * @param[in] n_input_cols Number of columns in input tensor. + * @param[in] n_channels Number of channels in input and output tensors. + */ + DepthwiseConvolutionBase( + int n_batches, int n_input_rows, int n_input_cols, int n_channels, + nck::ActivationFunction activation, + unsigned int padding_top, + unsigned int padding_left, + unsigned int padding_bottom, + unsigned int padding_right + ); + + /** Create a new depthwise convolution engine. + * + * @param[in] n_batches Number of batches tensors. + * @param[in] n_input_rows Number of rows in input tensor. + * @param[in] n_input_cols Number of columns in input tensor. + * @param[in] n_channels Number of channels in input and output tensors. + */ + DepthwiseConvolutionBase( + int n_batches, int n_input_rows, int n_input_cols, int n_channels, + int n_output_rows, int n_output_cols, + nck::ActivationFunction activation, + unsigned int padding_top, + unsigned int padding_left, + unsigned int padding_bottom, + unsigned int padding_right + ); + + // Cannot copy or move a DepthwiseConvolution. + DepthwiseConvolutionBase(DepthwiseConvolutionBase&) = delete; + DepthwiseConvolutionBase operator=(DepthwiseConvolutionBase&) = delete; + + /* Set input tensor and stride. */ + void set_input(const void *inptr) override; + void set_input(const void *inptr, int column_stride) override; + void set_input(const void *inptr, int row_stride, int column_stride) override; + void set_input(const void *inptr, int batch_stride, int row_stride, int column_stride) override; + + /* Set output tensor and stride. */ + void set_output(void *outptr) override; + void set_output(void *outptr, int column_stride) override; + void set_output(void *outptr, int row_stride, int column_stride) override; + void set_output(void *outptr, int batch_stride, int row_stride, int column_stride) override; + + /** Get the number of output rows/columns. + * + * @param[in] dim_size Number of elements in the dimension (rows/columns) + * @param[in] same_padding True if the padding is SAME, otherwise false. + */ + static int get_output_size( + int dim_size, unsigned int padding_before, unsigned int padding_after + ); + + int output_size( + int dim_size, unsigned int padding_before, unsigned int padding_after + ) const override; + + /* Determine how much memory is required to store the packed weights and + * biases. + */ + size_t get_packed_params_size(void) const override; + + /* Set the buffer for the packed weights and biases, and perform the + * packing. + */ + void set_packed_params_buffer(void *buffer) override; + + void pack_params(const void *weights, const void *biases=nullptr) const override; + + void pack_params( + void *buffer, + const void *weights, + const void *biases=nullptr + ) const override; + + void pack_params( + void *buffer, + const void *weights, + unsigned int weight_row_stride, + unsigned int weight_col_stride, + const void *biases=nullptr + ) const override; + + /** Query the amount of working space required. + * @param[in] The largest number of threads which will be used to execute + * the kernel. + */ + size_t get_working_space_size(unsigned int n_threads=1) const override; + + /** Set the working space buffer. + */ + void set_working_space(void *buffer) override; + + /** Get the window of work to be performed by an instance of the operator. + */ + unsigned int get_window(void) const override; + + /** Perform a portion of the work associated with the operator. + * + * Will perform the window of work described by $[start, stop)$. + * + * @param[in] start Start of the window of work to perform. + * @param[in] stop End of the work to perform. + * @param[in] ID of the thread performing the work. + */ + void run( + unsigned int start, + unsigned int stop, + unsigned int threadid=0 + ) override; + + protected: + /** Get the value to use to pad the tensor. + */ + TIn _input_padding_value(void) const; + + /** Implementation of the parameter packing. + */ + void _pack_params( + void *buffer, + const void *weights, + unsigned int weight_row_stride, + unsigned int weight_col_stride, + const void *biases=nullptr + ) const; + + /** Process a tile-row of the tensors. + */ + void process_tile_row( + unsigned int threadid, + int n_channels, + const void* packed_params, + const InputType* inptr, + OutputType* outptr, + int row_pad_in_top, + int row_pad_in_left, + int row_pad_in_bottom, + int row_pad_out_bottom, + int n_tiles, + int n_input_cols, + int n_output_cols + ); + + /** Process a single tile of the tensor. + * + * This method will apply input/output padding (if required) and call the + * depthwise tile implementation. + */ + void process_tile( + unsigned int threadid, + int n_channels, + const void* packed_params, + const InputType* inptr, + OutputType* outptr, + int pad_in_top, + int pad_in_left, + int pad_in_bottom, + int pad_in_right, + int pad_out_bottom, + int pad_out_right + ); + + /** Perform depthwise convolution on a single tile. + */ + template + void execute_tile( + int n_channels, + const void* packed_params, + const InputType* inptr, + unsigned int in_row_stride, + unsigned int in_col_stride, + OutputType* outptr, + unsigned int out_row_stride, + unsigned int out_col_stride + ); + + template + void execute_tile( + int n_channels, + const void* packed_params, + const InputType* inptrs[inner_tile_rows][inner_tile_cols], + OutputType* outptrs[output_tile_rows][output_tile_cols] + ); + + int n_channels(void) const; + + private: + // Member variables of instances of a convolution engine. + const InputType* _input; + OutputType* _output; + void* _packed_parameters; + void* _working_space; // Per-thread working space + const int _n_batches, _n_input_rows, _n_input_cols, _n_channels, + _n_output_rows, _n_output_cols, _n_tile_rows, _n_tile_cols; + const unsigned int _padding_top, _padding_left, _padding_bottom, _padding_right; + const nck::ActivationFunction _activation; + + // Stride information for a convolution instance + int _input_col_stride, _input_row_stride, _input_batch_stride; + int _output_col_stride, _output_row_stride, _output_batch_stride; + + // Methods for getting access to working space + size_t _get_input_working_space_size(void) const; + size_t _get_output_working_space_size(void) const; + + void *_get_input_working_space(unsigned int threadid) const; + void *_get_output_working_space(unsigned int threadid) const; +}; + + +template < + unsigned int OutputTileRows, unsigned int OutputTileCols, + unsigned int KernelRows, unsigned int KernelCols, + unsigned int StrideRows, unsigned int StrideCols, + typename TIn, typename TBias, typename TOut +> +class DepthwiseConvolution : public DepthwiseConvolutionBase< + OutputTileRows, OutputTileCols, + KernelRows, KernelCols, + StrideRows, StrideCols, + TIn, TBias, TOut, + DepthwiseConvolution< + OutputTileRows, OutputTileCols, + KernelRows, KernelCols, + StrideRows, StrideCols, + TIn, TBias, TOut + > +> +{ + using Base = DepthwiseConvolutionBase< + OutputTileRows, OutputTileCols, + KernelRows, KernelCols, + StrideRows, StrideCols, + TIn, TBias, TOut, + DepthwiseConvolution< + OutputTileRows, OutputTileCols, + KernelRows, KernelCols, + StrideRows, StrideCols, + TIn, TBias, TOut + > >; + friend Base; + using InputType = typename Base::InputType; + using OutputType = typename Base::OutputType; + + public: + using Base::DepthwiseConvolutionBase; + + protected: + template + void execute_tile( + int n_channels, + const void* packed_params, + const TIn* inptr, + unsigned int in_row_stride, + unsigned int in_col_stride, + TOut* outptr, + unsigned int out_row_stride, + unsigned int out_col_stride + ); + + template + void execute_tile( + int n_channels, + const void* packed_params, + const InputType* inptrs[Base::inner_tile_rows][Base::inner_tile_cols], + OutputType* outptrs[Base::output_tile_rows][Base::output_tile_cols] + ); +}; + + +template < + unsigned int OutputTileRows, unsigned int OutputTileCols, + unsigned int KernelRows, unsigned int KernelCols, + unsigned int StrideRows, unsigned int StrideCols +> +class DepthwiseConvolution< + OutputTileRows, OutputTileCols, + KernelRows, KernelCols, + StrideRows, StrideCols, + float, float, float +> : public DepthwiseConvolutionBase< + OutputTileRows, OutputTileCols, + KernelRows, KernelCols, + StrideRows, StrideCols, + float, float, float, + DepthwiseConvolution< + OutputTileRows, OutputTileCols, + KernelRows, KernelCols, + StrideRows, StrideCols, + float, float, float + > +> +{ + using Base = DepthwiseConvolutionBase< + OutputTileRows, OutputTileCols, + KernelRows, KernelCols, + StrideRows, StrideCols, + float, float, float, + DepthwiseConvolution< + OutputTileRows, OutputTileCols, + KernelRows, KernelCols, + StrideRows, StrideCols, + float, float, float + > >; + friend Base; + using InputType = typename Base::InputType; + using OutputType = typename Base::OutputType; + + public: + DepthwiseConvolution( + int n_batches, int n_input_rows, int n_input_cols, int n_channels, + nck::ActivationFunction activation, + unsigned int padding_top, + unsigned int padding_left, + unsigned int padding_bottom, + unsigned int padding_right + ); + + DepthwiseConvolution( + int n_batches, int n_input_rows, int n_input_cols, int n_channels, + int n_output_rows, int n_output_cols, + nck::ActivationFunction activation, + unsigned int padding_top, + unsigned int padding_left, + unsigned int padding_bottom, + unsigned int padding_right + ); + + protected: + template + void execute_tile( + int n_channels, + const void* packed_params, + const float* inptr, + unsigned int in_row_stride, + unsigned int in_col_stride, + float* outptr, + unsigned int out_row_stride, + unsigned int out_col_stride + ); + + template + void execute_tile( + int n_channels, + const void* packed_params, + const float* inptrs[Base::inner_tile_rows][Base::inner_tile_cols], + float* outptrs[Base::output_tile_rows][Base::output_tile_cols] + ); +}; + +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +template < + unsigned int OutputTileRows, unsigned int OutputTileCols, + unsigned int KernelRows, unsigned int KernelCols, + unsigned int StrideRows, unsigned int StrideCols +> +class DepthwiseConvolution< + OutputTileRows, OutputTileCols, + KernelRows, KernelCols, + StrideRows, StrideCols, + float16_t, float16_t, float16_t +> : public DepthwiseConvolutionBase< + OutputTileRows, OutputTileCols, + KernelRows, KernelCols, + StrideRows, StrideCols, + float16_t, float16_t, float16_t, + DepthwiseConvolution< + OutputTileRows, OutputTileCols, + KernelRows, KernelCols, + StrideRows, StrideCols, + float16_t, float16_t, float16_t + > +> +{ + using Base = DepthwiseConvolutionBase< + OutputTileRows, OutputTileCols, + KernelRows, KernelCols, + StrideRows, StrideCols, + float16_t, float16_t, float16_t, + DepthwiseConvolution< + OutputTileRows, OutputTileCols, + KernelRows, KernelCols, + StrideRows, StrideCols, + float16_t, float16_t, float16_t + > >; + friend Base; + using InputType = typename Base::InputType; + using OutputType = typename Base::OutputType; + + public: + DepthwiseConvolution( + int n_batches, int n_input_rows, int n_input_cols, int n_channels, + nck::ActivationFunction activation, + unsigned int padding_top, + unsigned int padding_left, + unsigned int padding_bottom, + unsigned int padding_right + ); + + DepthwiseConvolution( + int n_batches, int n_input_rows, int n_input_cols, int n_channels, + int n_output_rows, int n_output_cols, + nck::ActivationFunction activation, + unsigned int padding_top, + unsigned int padding_left, + unsigned int padding_bottom, + unsigned int padding_right + ); + + protected: + template + void execute_tile( + int n_channels, + const void* packed_params, + const float16_t* inptr, + unsigned int in_row_stride, + unsigned int in_col_stride, + float16_t* outptr, + unsigned int out_row_stride, + unsigned int out_col_stride + ); + + template + void execute_tile( + int n_channels, + const void* packed_params, + const float16_t* inptrs[Base::inner_tile_rows][Base::inner_tile_cols], + float16_t* outptrs[Base::output_tile_rows][Base::output_tile_cols] + ); +}; +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + +} // namespace depthwise diff --git a/src/core/NEON/kernels/convolution/depthwise/depthwise_dilated.hpp b/src/core/NEON/kernels/convolution/depthwise/depthwise_dilated.hpp new file mode 100644 index 0000000000..1bae815613 --- /dev/null +++ b/src/core/NEON/kernels/convolution/depthwise/depthwise_dilated.hpp @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2019 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#pragma once + +#include +#include +#include + +#include "depthwise.hpp" + +namespace depthwise +{ + +template < + unsigned int OutputTileRows, unsigned int OutputTileCols, + unsigned int KernelRows, unsigned int KernelCols, + unsigned int StrideRows, unsigned int StrideCols, + typename TIn, typename TBias, typename TOut +> +class DilatedDepthwiseConvolution : public IDepthwiseConvolution +{ + public: + /** Create a new dilated depthwise convolution engine. + */ + DilatedDepthwiseConvolution( + int n_batches, int n_input_rows, int n_input_cols, int n_channels, + int dilation_factor, + nck::ActivationFunction activation, + unsigned int padding_top, + unsigned int padding_left, + unsigned int padding_bottom, + unsigned int padding_right + ); + + /** Create a new dilated depthwise convolution engine. + */ + DilatedDepthwiseConvolution( + int n_batches, int n_input_rows, int n_input_cols, int n_channels, + int dilation_factor, int n_output_rows, int n_output_cols, + nck::ActivationFunction activation, + unsigned int padding_top, + unsigned int padding_left, + unsigned int padding_bottom, + unsigned int padding_right + ); + + // Cannot copy or move a DilatedDepthwiseConvolution. + DilatedDepthwiseConvolution(DilatedDepthwiseConvolution&) = delete; + DilatedDepthwiseConvolution operator=(DilatedDepthwiseConvolution&) = delete; + + /* Set input tensor and stride. */ + void set_input(const void *inptr) override; + void set_input(const void *inptr, int column_stride) override; + void set_input(const void *inptr, int row_stride, int column_stride) override; + void set_input(const void *inptr, int batch_stride, int row_stride, int column_stride) override; + + /* Set output tensor and stride. */ + void set_output(void *outptr) override; + void set_output(void *outptr, int column_stride) override; + void set_output(void *outptr, int row_stride, int column_stride) override; + void set_output(void *outptr, int batch_stride, int row_stride, int column_stride) override; + + static int get_output_size( + int dim_size, + unsigned int padding_before, + unsigned int padding_after, + int dilation_factor + ); + + int output_size( + int dim_size, unsigned int padding_before, unsigned int padding_after + ) const override; + + /* Weights and biases are re-ordered to improve memory access patterns. Use + * these methods to determine the size of the re-pack buffer and to set the + * address (and implicitly reorder the weights and biases into) the buffer. + */ + size_t get_packed_params_size(void) const override; + void set_packed_params_buffer(void *) override; + + void pack_params(const void *weights, const void *biases=nullptr) const override; + void pack_params(void *buffer, const void *weights, const void *biases=nullptr) const override; + void pack_params( + void *buffer, + const void* weights, + unsigned int weight_row_stride, + unsigned int weight_col_stride, + const void *biases=nullptr + ) const override; + + /* Working space is used to pad tensors on the fly. Before running any + * inference check the amount of space required, allocate and provide a + * pointer to the convolution engine. + */ + size_t get_working_space_size(unsigned int nthreads=1) const override; + void set_working_space(void *) override; + + unsigned int get_window(void) const override; + void run(unsigned int start, unsigned int stop, unsigned int threadid=0) override; + + protected: + /** Protected constructor which also accepts a function to construct a new + * subconvolution + */ + DilatedDepthwiseConvolution( + int n_batches, int n_input_rows, int n_input_cols, int n_channels, + int dilation_factor, int n_output_rows, int n_output_cols, + nck::ActivationFunction activation, + unsigned int padding_top, + unsigned int padding_left, + unsigned int padding_bottom, + unsigned int padding_right, + std::function subconvfn + ); + + const int _dilation_factor; + const int _n_input_rows, _n_input_cols, _n_channels; + const int _padding_top, _padding_left; + const int _n_output_rows, _n_output_cols; + + /* Dilated depthwise convolution is performed through repeated calls to + * non-dilated convolutions. If the dilation factor is $n$, then we perform + * $(n + 1)^2$ depthwise convolutions. + */ + using BaseDepthwise = DepthwiseConvolution< + OutputTileRows, OutputTileCols, + KernelRows, KernelCols, + StrideRows, StrideCols, + TIn, TBias, TOut + >; + std::deque>> _convs; +}; + +} // namespace depthwise diff --git a/src/core/NEON/kernels/convolution/depthwise/depthwise_quantized.hpp b/src/core/NEON/kernels/convolution/depthwise/depthwise_quantized.hpp new file mode 100644 index 0000000000..4343f6ad45 --- /dev/null +++ b/src/core/NEON/kernels/convolution/depthwise/depthwise_quantized.hpp @@ -0,0 +1,291 @@ +/* + * Copyright (c) 2018-2019 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#pragma once +#include "depthwise.hpp" +#include "qasymm8.hpp" +#include "qsymm8.hpp" +#pragma once + +using namespace neon_convolution_kernels; +using namespace qasymm8; + +inline int32x4_t saturating_doubling_high_mul(const int32x4_t& a, const int32x4_t& b) +{ + return vqrdmulhq_s32(a, b); +} + +inline int32x4_t saturating_doubling_high_mul(const int32x4_t& a, const int32_t& b) +{ + return vqrdmulhq_n_s32(a, b); +} + +inline int32_t saturating_doubling_high_mul(const int32_t& a, const int32_t& b) +{ + return vget_lane_s32(vqrdmulh_n_s32(vdup_n_s32(a), b), 0); +} + +inline int32x4_t rounding_divide_by_exp2(const int32x4_t& x, const int32x4_t shift) +{ + const int32x4_t fixup = vshrq_n_s32(vandq_s32(x, shift), 31); + const int32x4_t fixed = vqaddq_s32(x, fixup); + return vrshlq_s32(fixed, shift); +} + +inline int32x4_t rounding_divide_by_exp2(const int32x4_t& x, const int exponent) +{ + const int32x4_t shift = vdupq_n_s32(-exponent); + const int32x4_t fixup = vshrq_n_s32(vandq_s32(x, shift), 31); + const int32x4_t fixed = vqaddq_s32(x, fixup); + return vrshlq_s32(fixed, shift); +} + +inline int32x2_t rounding_divide_by_exp2(const int32x2_t& x, const int exponent) +{ + const int32x2_t shift = vdup_n_s32(-exponent); + const int32x2_t fixup = vshr_n_s32(vand_s32(x, shift), 31); + const int32x2_t fixed = vqadd_s32(x, fixup); + return vrshl_s32(fixed, shift); +} + +inline int32_t rounding_divide_by_exp2(const int32_t& x, const int exponent) +{ + const int32x2_t xs = vdup_n_s32(x); + return vget_lane_s32(rounding_divide_by_exp2(xs, exponent), 0); +} + +namespace depthwise +{ + +namespace nck = neon_convolution_kernels; + +template < + unsigned int OutputTileRows, unsigned int OutputTileCols, + unsigned int KernelRows, unsigned int KernelCols, + unsigned int StrideRows, unsigned int StrideCols +> +class QAsymm8DepthwiseConvolution : public DepthwiseConvolutionBase< + OutputTileRows, OutputTileCols, + KernelRows, KernelCols, + StrideRows, StrideCols, + uint8_t, int32_t, uint8_t, + QAsymm8DepthwiseConvolution +> +{ + using Base = DepthwiseConvolutionBase< + OutputTileRows, OutputTileCols, + KernelRows, KernelCols, + StrideRows, StrideCols, + uint8_t, int32_t, uint8_t, + QAsymm8DepthwiseConvolution + >; + friend Base; + using InputType = typename Base::InputType; + using OutputType = typename Base::OutputType; + + public: + QAsymm8DepthwiseConvolution( + int n_batches, int n_input_rows, int n_input_cols, int n_channels, + nck::ActivationFunction activation, + const qasymm8::QAsymm8Params& weight_quantisation, + const qasymm8::QAsymm8Params& input_quantisation, + const qasymm8::QAsymm8Params& output_quantisation, + unsigned int padding_top, + unsigned int padding_left, + unsigned int padding_bottom, + unsigned int padding_right + ); + + QAsymm8DepthwiseConvolution( + int n_batches, int n_input_rows, int n_input_cols, int n_channels, + int n_output_rows, int n_output_cols, + nck::ActivationFunction activation, + const qasymm8::QAsymm8Params& weight_quantisation, + const qasymm8::QAsymm8Params& input_quantisation, + const qasymm8::QAsymm8Params& output_quantisation, + unsigned int padding_top, + unsigned int padding_left, + unsigned int padding_bottom, + unsigned int padding_right + ); + + QAsymm8DepthwiseConvolution( + int n_batches, int n_input_rows, int n_input_cols, int n_channels, + nck::ActivationFunction activation, + const qasymm8::QAsymm8Params& weight_quantisation, + const qasymm8::QAsymm8Params& input_quantisation, + const qasymm8::QAsymm8Params& output_quantisation, + const qasymm8::QAsymm8RescaleParams& rescale_parameters, + unsigned int padding_top, + unsigned int padding_left, + unsigned int padding_bottom, + unsigned int padding_right + ); + + QAsymm8DepthwiseConvolution( + int n_batches, int n_input_rows, int n_input_cols, int n_channels, + int n_output_rows, int n_output_cols, + nck::ActivationFunction activation, + const qasymm8::QAsymm8Params& weight_quantisation, + const qasymm8::QAsymm8Params& input_quantisation, + const qasymm8::QAsymm8Params& output_quantisation, + const qasymm8::QAsymm8RescaleParams& rescale_parameters, + unsigned int padding_top, + unsigned int padding_left, + unsigned int padding_bottom, + unsigned int padding_right + ); + + protected: + uint8_t _input_padding_value(void) const; + + void _pack_params( + void *buffer, + const void *weights, + unsigned int weight_row_stride, + unsigned int weight_col_stride, + const void *biases=nullptr + ) const; + + template + void execute_tile( + int n_channels, + const void* packed_params, + const uint8_t* inptr, + unsigned int in_row_stride, + unsigned int in_col_stride, + uint8_t* outptr, + unsigned int out_row_stride, + unsigned int out_col_stride + ); + + template + void execute_tile( + int n_channels, + const void* packed_params, + const uint8_t* inptrs[Base::inner_tile_rows][Base::inner_tile_cols], + uint8_t* outptrs[Base::output_tile_rows][Base::output_tile_cols] + ); + + private: + // Quantization parameters + const qasymm8::QAsymm8Params _weights_quant, _inputs_quant, _output_quant; + const qasymm8::QAsymm8RescaleParams rescale_parameters; +}; + +template < + unsigned int OutputTileRows, unsigned int OutputTileCols, + unsigned int KernelRows, unsigned int KernelCols, + unsigned int StrideRows, unsigned int StrideCols +> +class QSymm8HybridPerChannelDepthwiseConvolution : public DepthwiseConvolutionBase< + OutputTileRows, OutputTileCols, + KernelRows, KernelCols, + StrideRows, StrideCols, + uint8_t, int32_t, uint8_t, + QSymm8HybridPerChannelDepthwiseConvolution +> +{ + using Base = DepthwiseConvolutionBase< + OutputTileRows, OutputTileCols, + KernelRows, KernelCols, + StrideRows, StrideCols, + uint8_t, int32_t, uint8_t, + QSymm8HybridPerChannelDepthwiseConvolution + >; + friend Base; + using InputType = typename Base::InputType; + using OutputType = typename Base::OutputType; + + public: + QSymm8HybridPerChannelDepthwiseConvolution( + int n_batches, int n_input_rows, int n_input_cols, int n_channels, + nck::ActivationFunction activation, + const qsymm8::QSymm8PerChannelParams& weight_quantisation, + const qasymm8::QAsymm8Params& input_quantisation, + const qasymm8::QAsymm8Params& output_quantisation, + unsigned int padding_top, + unsigned int padding_left, + unsigned int padding_bottom, + unsigned int padding_right + ); + + QSymm8HybridPerChannelDepthwiseConvolution( + int n_batches, int n_input_rows, int n_input_cols, int n_channels, + nck::ActivationFunction activation, + const qsymm8::QSymm8PerChannelParams& weight_quantisation, + const qasymm8::QAsymm8Params& input_quantisation, + const qasymm8::QAsymm8Params& output_quantisation, + const qsymm8::QSymm8PerChannelRescaleParams& rescale_parameters, + unsigned int padding_top, + unsigned int padding_left, + unsigned int padding_bottom, + unsigned int padding_right + ); + + size_t get_packed_params_size(void) const override + { + return this->n_channels() * (sizeof(int8_t)*KernelRows*KernelCols + 3*sizeof(int32_t)); + + } + + protected: + uint8_t _input_padding_value(void) const; + + void _pack_params( + void *buffer, + const void *weights, + unsigned int weight_row_stride, + unsigned int weight_col_stride, + const void *biases=nullptr + ) const; + + template + void execute_tile( + int n_channels, + const void* packed_params, + const uint8_t* inptr, + unsigned int in_row_stride, + unsigned int in_col_stride, + uint8_t* outptr, + unsigned int out_row_stride, + unsigned int out_col_stride + ); + + template + void execute_tile( + int n_channels, + const void* packed_params, + const uint8_t* inptrs[Base::inner_tile_rows][Base::inner_tile_cols], + uint8_t* outptrs[Base::output_tile_rows][Base::output_tile_cols] + ); + + private: + // Quantization parameters + const qsymm8::QSymm8PerChannelParams _weights_quant; + const qasymm8::QAsymm8Params _input_quant, _output_quant; + const qsymm8::QSymm8PerChannelRescaleParams _rescale_parameters; +}; + +} // namespace depthwise diff --git a/src/core/NEON/kernels/convolution/depthwise/depthwise_quantized_dilated.hpp b/src/core/NEON/kernels/convolution/depthwise/depthwise_quantized_dilated.hpp new file mode 100644 index 0000000000..a11b0981c9 --- /dev/null +++ b/src/core/NEON/kernels/convolution/depthwise/depthwise_quantized_dilated.hpp @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2019 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#pragma once +#include "depthwise_dilated.hpp" +#include "depthwise_quantized.hpp" + +namespace depthwise { + +template +class QAsymm8DilatedDepthwiseConvolution + : public DilatedDepthwiseConvolution< + OutputTileRows, OutputTileCols, KernelRows, KernelCols, StrideRows, + StrideCols, uint8_t, int32_t, uint8_t> { +public: + /** Create a new dilated depthwise convolution engine. + */ + QAsymm8DilatedDepthwiseConvolution( + int n_batches, int n_input_rows, int n_input_cols, int n_channels, + int dilation_factor, nck::ActivationFunction activation, + const qasymm8::QAsymm8Params &weight_quantisation, + const qasymm8::QAsymm8Params &input_quantisation, + const qasymm8::QAsymm8Params &output_quantisation, + unsigned int padding_top, unsigned int padding_left, + unsigned int padding_bottom, unsigned int padding_right); + + /** Create a new dilated depthwise convolution engine. + */ + QAsymm8DilatedDepthwiseConvolution( + int n_batches, int n_input_rows, int n_input_cols, int n_channels, + int dilation_factor, int n_output_rows, int n_output_cols, + nck::ActivationFunction activation, + const qasymm8::QAsymm8Params &weight_quantisation, + const qasymm8::QAsymm8Params &input_quantisation, + const qasymm8::QAsymm8Params &output_quantisation, + unsigned int padding_top, unsigned int padding_left, + unsigned int padding_bottom, unsigned int padding_right); + + /** Create a new dilated depthwise convolution engine. + */ + QAsymm8DilatedDepthwiseConvolution( + int n_batches, int n_input_rows, int n_input_cols, int n_channels, + int dilation_factor, nck::ActivationFunction activation, + const qasymm8::QAsymm8Params &weight_quantisation, + const qasymm8::QAsymm8Params &input_quantisation, + const qasymm8::QAsymm8Params &output_quantisation, + const qasymm8::QAsymm8RescaleParams &rescale_parameters, + unsigned int padding_top, unsigned int padding_left, + unsigned int padding_bottom, unsigned int padding_right); + + /** Create a new dilated depthwise convolution engine. + */ + QAsymm8DilatedDepthwiseConvolution( + int n_batches, int n_input_rows, int n_input_cols, int n_channels, + int dilation_factor, int n_output_rows, int n_output_cols, + nck::ActivationFunction activation, + const qasymm8::QAsymm8Params &weight_quantisation, + const qasymm8::QAsymm8Params &input_quantisation, + const qasymm8::QAsymm8Params &output_quantisation, + const qasymm8::QAsymm8RescaleParams& rescale_parameters, + unsigned int padding_top, unsigned int padding_left, + unsigned int padding_bottom, unsigned int padding_right); +}; + +} // namespace depthwise diff --git a/src/core/NEON/kernels/detail/NEDirectConvolutionDetail.h b/src/core/NEON/kernels/detail/NEDirectConvolutionDetail.h index d7ee70a1cd..59f5c6c6b3 100644 --- a/src/core/NEON/kernels/detail/NEDirectConvolutionDetail.h +++ b/src/core/NEON/kernels/detail/NEDirectConvolutionDetail.h @@ -25,10 +25,10 @@ #ifndef ARM_COMPUTE_NEDIRECTCONVOLUTIONDETAIL_H #define ARM_COMPUTE_NEDIRECTCONVOLUTIONDETAIL_H -#include "arm_compute/core/AccessWindowStatic.h" -#include "arm_compute/core/utils/misc/Requires.h" +#include "src/core/AccessWindowStatic.h" #include "src/core/NEON/NEFixedPoint.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "support/Requires.h" #include -- cgit v1.2.1