From ebcebf1dee7f8314976b1e0cabd62b4cf893d765 Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Wed, 21 Oct 2020 00:04:14 +0100 Subject: COMPMID-3638: Move NEON kernels Signed-off-by: Michalis Spyrou Change-Id: Ieed3e4bc8be7fef80c90c5094599b477a56fc473 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4285 Comments-Addressed: Arm Jenkins Reviewed-by: Georgios Pinitas Tested-by: Arm Jenkins --- .../NEON/functions/NEAbsoluteDifference.cpp | 7 +- src/runtime/NEON/functions/NEAccumulate.cpp | 11 ++- src/runtime/NEON/functions/NEActivationLayer.cpp | 4 +- src/runtime/NEON/functions/NEArgMinMaxLayer.cpp | 3 + .../NEON/functions/NEArithmeticAddition.cpp | 4 +- .../NEON/functions/NEArithmeticSubtraction.cpp | 2 +- .../NEON/functions/NEBatchNormalizationLayer.cpp | 11 ++- src/runtime/NEON/functions/NEBatchToSpaceLayer.cpp | 1 + src/runtime/NEON/functions/NEBitwiseAnd.cpp | 2 +- src/runtime/NEON/functions/NEBitwiseNot.cpp | 2 +- src/runtime/NEON/functions/NEBitwiseOr.cpp | 2 +- src/runtime/NEON/functions/NEBitwiseXor.cpp | 2 +- .../NEON/functions/NEBoundingBoxTransform.cpp | 1 + src/runtime/NEON/functions/NEBox3x3.cpp | 12 ++- src/runtime/NEON/functions/NECannyEdge.cpp | 31 +++--- src/runtime/NEON/functions/NECast.cpp | 2 +- src/runtime/NEON/functions/NEChannelCombine.cpp | 2 +- src/runtime/NEON/functions/NEChannelExtract.cpp | 2 +- .../NEON/functions/NEChannelShuffleLayer.cpp | 2 +- src/runtime/NEON/functions/NECol2Im.cpp | 2 +- src/runtime/NEON/functions/NEColorConvert.cpp | 2 +- src/runtime/NEON/functions/NEComputeAllAnchors.cpp | 1 + src/runtime/NEON/functions/NEConcatenateLayer.cpp | 8 +- .../functions/NEConvertFullyConnectedWeights.cpp | 11 ++- src/runtime/NEON/functions/NEConvolution.cpp | 48 ++++++--- src/runtime/NEON/functions/NEConvolutionLayer.cpp | 21 ++++ src/runtime/NEON/functions/NECopy.cpp | 4 +- src/runtime/NEON/functions/NECropResize.cpp | 3 + .../NEON/functions/NEDeconvolutionLayer.cpp | 1 + src/runtime/NEON/functions/NEDepthConvertLayer.cpp | 2 +- src/runtime/NEON/functions/NEDepthToSpaceLayer.cpp | 1 + .../NEON/functions/NEDepthwiseConvolutionLayer.cpp | 14 ++- .../NEON/functions/NEDequantizationLayer.cpp | 2 +- src/runtime/NEON/functions/NEDerivative.cpp | 22 +++-- src/runtime/NEON/functions/NEDilate.cpp | 8 +- .../NEON/functions/NEDirectConvolutionLayer.cpp | 23 +++-- .../NEON/functions/NEElementwiseOperators.cpp | 2 +- .../NEON/functions/NEElementwiseUnaryLayer.cpp | 2 +- src/runtime/NEON/functions/NEEqualizeHistogram.cpp | 28 ++++-- src/runtime/NEON/functions/NEErode.cpp | 13 ++- src/runtime/NEON/functions/NEFFT1D.cpp | 21 ++-- src/runtime/NEON/functions/NEFFT2D.cpp | 5 + .../NEON/functions/NEFFTConvolutionLayer.cpp | 7 ++ src/runtime/NEON/functions/NEFastCorners.cpp | 35 ++++--- src/runtime/NEON/functions/NEFill.cpp | 1 + src/runtime/NEON/functions/NEFillBorder.cpp | 9 +- src/runtime/NEON/functions/NEFlattenLayer.cpp | 2 +- src/runtime/NEON/functions/NEFloor.cpp | 2 +- .../NEON/functions/NEFullyConnectedLayer.cpp | 21 +++- .../NEON/functions/NEFuseBatchNormalization.cpp | 11 ++- src/runtime/NEON/functions/NEGEMM.cpp | 32 ++++-- .../NEON/functions/NEGEMMConvolutionLayer.cpp | 33 +++++-- src/runtime/NEON/functions/NEGEMMInterleave4x4.cpp | 2 +- .../NEGEMMLowpAssemblyMatrixMultiplyCore.cpp | 11 ++- .../functions/NEGEMMLowpMatrixMultiplyCore.cpp | 75 ++++++++------ .../NEON/functions/NEGEMMLowpOutputStage.cpp | 16 ++- src/runtime/NEON/functions/NEGEMMTranspose1xW.cpp | 2 +- src/runtime/NEON/functions/NEGather.cpp | 2 +- src/runtime/NEON/functions/NEGaussian3x3.cpp | 13 ++- src/runtime/NEON/functions/NEGaussian5x5.cpp | 27 ++++-- src/runtime/NEON/functions/NEGaussianPyramid.cpp | 32 +++--- .../NEON/functions/NEGenerateProposalsLayer.cpp | 63 ++++++------ src/runtime/NEON/functions/NEHOGDescriptor.cpp | 21 ++-- src/runtime/NEON/functions/NEHOGDetector.cpp | 7 +- src/runtime/NEON/functions/NEHOGGradient.cpp | 9 +- src/runtime/NEON/functions/NEHOGMultiDetection.cpp | 8 +- src/runtime/NEON/functions/NEHarrisCorners.cpp | 22 +++-- src/runtime/NEON/functions/NEHistogram.cpp | 12 ++- src/runtime/NEON/functions/NEIm2Col.cpp | 9 +- .../functions/NEInstanceNormalizationLayer.cpp | 12 ++- src/runtime/NEON/functions/NEIntegralImage.cpp | 13 ++- src/runtime/NEON/functions/NEL2NormalizeLayer.cpp | 11 ++- src/runtime/NEON/functions/NELSTMLayer.cpp | 22 ++++- .../NEON/functions/NELSTMLayerQuantized.cpp | 11 +++ src/runtime/NEON/functions/NELaplacianPyramid.cpp | 9 +- .../NEON/functions/NELaplacianReconstruct.cpp | 6 +- .../NEON/functions/NELocallyConnectedLayer.cpp | 36 ++++--- src/runtime/NEON/functions/NEMagnitude.cpp | 7 +- src/runtime/NEON/functions/NEMaxUnpoolingLayer.cpp | 15 ++- src/runtime/NEON/functions/NEMeanStdDev.cpp | 21 ++-- .../functions/NEMeanStdDevNormalizationLayer.cpp | 4 +- src/runtime/NEON/functions/NEMedian3x3.cpp | 13 ++- src/runtime/NEON/functions/NEMinMaxLocation.cpp | 22 +++-- src/runtime/NEON/functions/NENonLinearFilter.cpp | 13 ++- .../NEON/functions/NENonMaximaSuppression3x3.cpp | 14 ++- .../NEON/functions/NENormalizationLayer.cpp | 9 +- src/runtime/NEON/functions/NEOpticalFlow.cpp | 22 +++-- src/runtime/NEON/functions/NEPReluLayer.cpp | 2 +- src/runtime/NEON/functions/NEPadLayer.cpp | 15 ++- src/runtime/NEON/functions/NEPermute.cpp | 2 +- src/runtime/NEON/functions/NEPhase.cpp | 7 +- .../NEON/functions/NEPixelWiseMultiplication.cpp | 2 +- src/runtime/NEON/functions/NEPoolingLayer.cpp | 21 ++-- src/runtime/NEON/functions/NEPriorBoxLayer.cpp | 1 + src/runtime/NEON/functions/NEQLSTMLayer.cpp | 108 ++++++++++++++++----- src/runtime/NEON/functions/NEQuantizationLayer.cpp | 1 + src/runtime/NEON/functions/NERNNLayer.cpp | 20 +++- src/runtime/NEON/functions/NEROIAlignLayer.cpp | 3 +- src/runtime/NEON/functions/NEROIPoolingLayer.cpp | 12 ++- src/runtime/NEON/functions/NERange.cpp | 11 ++- src/runtime/NEON/functions/NEReduceMean.cpp | 3 + .../NEON/functions/NEReductionOperation.cpp | 9 +- src/runtime/NEON/functions/NERemap.cpp | 15 +-- src/runtime/NEON/functions/NEReorgLayer.cpp | 2 +- src/runtime/NEON/functions/NEReshapeLayer.cpp | 4 +- src/runtime/NEON/functions/NEReverse.cpp | 2 +- src/runtime/NEON/functions/NEScale.cpp | 1 + src/runtime/NEON/functions/NEScharr3x3.cpp | 8 +- src/runtime/NEON/functions/NESelect.cpp | 2 +- src/runtime/NEON/functions/NESlice.cpp | 2 +- src/runtime/NEON/functions/NESobel3x3.cpp | 13 ++- src/runtime/NEON/functions/NESobel5x5.cpp | 34 ++++--- src/runtime/NEON/functions/NESobel7x7.cpp | 33 ++++--- src/runtime/NEON/functions/NESoftmaxLayer.cpp | 30 ++++-- src/runtime/NEON/functions/NESpaceToBatchLayer.cpp | 27 ++++-- src/runtime/NEON/functions/NESpaceToDepthLayer.cpp | 11 ++- src/runtime/NEON/functions/NEStackLayer.cpp | 9 +- src/runtime/NEON/functions/NEStridedSlice.cpp | 2 +- src/runtime/NEON/functions/NETableLookup.cpp | 2 +- src/runtime/NEON/functions/NEThreshold.cpp | 2 +- src/runtime/NEON/functions/NETile.cpp | 2 +- src/runtime/NEON/functions/NETranspose.cpp | 2 +- src/runtime/NEON/functions/NEUpsampleLayer.cpp | 12 ++- src/runtime/NEON/functions/NEWarpAffine.cpp | 7 +- src/runtime/NEON/functions/NEWarpPerspective.cpp | 12 ++- .../NEON/functions/NEWinogradConvolutionLayer.cpp | 4 + src/runtime/NEON/functions/NEYOLOLayer.cpp | 2 +- 127 files changed, 1090 insertions(+), 458 deletions(-) (limited to 'src/runtime/NEON/functions') diff --git a/src/runtime/NEON/functions/NEAbsoluteDifference.cpp b/src/runtime/NEON/functions/NEAbsoluteDifference.cpp index ec27820126..df2bc7d72e 100644 --- a/src/runtime/NEON/functions/NEAbsoluteDifference.cpp +++ b/src/runtime/NEON/functions/NEAbsoluteDifference.cpp @@ -23,12 +23,14 @@ */ #include "arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h" -#include "arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h" +#include "src/core/NEON/kernels/NEAbsoluteDifferenceKernel.h" #include "support/MemorySupport.h" #include -using namespace arm_compute; +namespace arm_compute +{ +NEAbsoluteDifference::~NEAbsoluteDifference() = default; void NEAbsoluteDifference::configure(const ITensor *input1, const ITensor *input2, ITensor *output) { @@ -36,3 +38,4 @@ void NEAbsoluteDifference::configure(const ITensor *input1, const ITensor *input k->configure(input1, input2, output); _kernel = std::move(k); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEAccumulate.cpp b/src/runtime/NEON/functions/NEAccumulate.cpp index 662f8ccb5b..20eefd9d2d 100644 --- a/src/runtime/NEON/functions/NEAccumulate.cpp +++ b/src/runtime/NEON/functions/NEAccumulate.cpp @@ -23,12 +23,14 @@ */ #include "arm_compute/runtime/NEON/functions/NEAccumulate.h" -#include "arm_compute/core/NEON/kernels/NEAccumulateKernel.h" +#include "src/core/NEON/kernels/NEAccumulateKernel.h" #include "support/MemorySupport.h" #include -using namespace arm_compute; +namespace arm_compute +{ +NEAccumulate::~NEAccumulate() = default; void NEAccumulate::configure(const ITensor *input, ITensor *output) { @@ -37,6 +39,8 @@ void NEAccumulate::configure(const ITensor *input, ITensor *output) _kernel = std::move(k); } +NEAccumulateWeighted::~NEAccumulateWeighted() = default; + void NEAccumulateWeighted::configure(const ITensor *input, float alpha, ITensor *output, bool use_fp16) { if(use_fp16) @@ -53,9 +57,12 @@ void NEAccumulateWeighted::configure(const ITensor *input, float alpha, ITensor } } +NEAccumulateSquared::~NEAccumulateSquared() = default; + void NEAccumulateSquared::configure(const ITensor *input, uint32_t shift, ITensor *output) { auto k = arm_compute::support::cpp14::make_unique(); k->configure(input, shift, output); _kernel = std::move(k); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEActivationLayer.cpp b/src/runtime/NEON/functions/NEActivationLayer.cpp index 7f55edbf70..f9ad298e4d 100644 --- a/src/runtime/NEON/functions/NEActivationLayer.cpp +++ b/src/runtime/NEON/functions/NEActivationLayer.cpp @@ -24,16 +24,18 @@ #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" #include "arm_compute/core/Error.h" -#include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h" #include "arm_compute/core/experimental/Types.h" #include "arm_compute/runtime/IRuntimeContext.h" #include "arm_compute/runtime/Tensor.h" +#include "src/core/NEON/kernels/NEActivationLayerKernel.h" #include "support/MemorySupport.h" namespace arm_compute { namespace experimental { +NEActivationLayer::~NEActivationLayer() = default; + void NEActivationLayer::configure(const ITensorInfo *input, ITensorInfo *output, const ActivationLayerInfo &activation_info) { auto k = arm_compute::support::cpp14::make_unique(); diff --git a/src/runtime/NEON/functions/NEArgMinMaxLayer.cpp b/src/runtime/NEON/functions/NEArgMinMaxLayer.cpp index 70bbba62ad..2a9bb76c7f 100644 --- a/src/runtime/NEON/functions/NEArgMinMaxLayer.cpp +++ b/src/runtime/NEON/functions/NEArgMinMaxLayer.cpp @@ -29,11 +29,14 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/NEON/kernels/NEReductionOperationKernel.h" #include "support/MemorySupport.h" namespace arm_compute { +NEArgMinMaxLayer::~NEArgMinMaxLayer() = default; + NEArgMinMaxLayer::NEArgMinMaxLayer(std::shared_ptr memory_manager) : _reduction_function(support::cpp14::make_unique()) { diff --git a/src/runtime/NEON/functions/NEArithmeticAddition.cpp b/src/runtime/NEON/functions/NEArithmeticAddition.cpp index 4453a015e8..0bf9a09333 100644 --- a/src/runtime/NEON/functions/NEArithmeticAddition.cpp +++ b/src/runtime/NEON/functions/NEArithmeticAddition.cpp @@ -24,7 +24,7 @@ #include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h" +#include "src/core/NEON/kernels/NEArithmeticAdditionKernel.h" #include "support/MemorySupport.h" #include @@ -33,6 +33,8 @@ namespace arm_compute { namespace experimental { +NEArithmeticAddition::~NEArithmeticAddition() = default; + void NEArithmeticAddition::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info) { ARM_COMPUTE_UNUSED(act_info); diff --git a/src/runtime/NEON/functions/NEArithmeticSubtraction.cpp b/src/runtime/NEON/functions/NEArithmeticSubtraction.cpp index 1c95bbfae8..ba3f426269 100644 --- a/src/runtime/NEON/functions/NEArithmeticSubtraction.cpp +++ b/src/runtime/NEON/functions/NEArithmeticSubtraction.cpp @@ -24,7 +24,7 @@ #include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h" +#include "src/core/NEON/kernels/NEArithmeticSubtractionKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/NEON/functions/NEBatchNormalizationLayer.cpp b/src/runtime/NEON/functions/NEBatchNormalizationLayer.cpp index eab40ac5be..d0fdfcf101 100644 --- a/src/runtime/NEON/functions/NEBatchNormalizationLayer.cpp +++ b/src/runtime/NEON/functions/NEBatchNormalizationLayer.cpp @@ -29,10 +29,13 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEBatchNormalizationLayerKernel.h" #include "support/MemorySupport.h" -using namespace arm_compute; +namespace arm_compute +{ +NEBatchNormalizationLayer::~NEBatchNormalizationLayer() = default; NEBatchNormalizationLayer::NEBatchNormalizationLayer() : _norm_kernel() @@ -43,7 +46,8 @@ void NEBatchNormalizationLayer::configure(ITensor *input, ITensor *output, const ActivationLayerInfo act_info) { // Configure kernel - _norm_kernel.configure(input, output, mean, var, beta, gamma, epsilon, act_info); + _norm_kernel = arm_compute::support::cpp14::make_unique(); + _norm_kernel->configure(input, output, mean, var, beta, gamma, epsilon, act_info); } Status NEBatchNormalizationLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *mean, const ITensorInfo *var, const ITensorInfo *beta, const ITensorInfo *gamma, @@ -55,5 +59,6 @@ Status NEBatchNormalizationLayer::validate(const ITensorInfo *input, const ITens void NEBatchNormalizationLayer::run() { - NEScheduler::get().schedule(&_norm_kernel, Window::DimY); + NEScheduler::get().schedule(_norm_kernel.get(), Window::DimY); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEBatchToSpaceLayer.cpp b/src/runtime/NEON/functions/NEBatchToSpaceLayer.cpp index 2705cffe68..77a63c0f63 100644 --- a/src/runtime/NEON/functions/NEBatchToSpaceLayer.cpp +++ b/src/runtime/NEON/functions/NEBatchToSpaceLayer.cpp @@ -28,6 +28,7 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/NEON/kernels/NEBatchToSpaceLayerKernel.h" #include "support/MemorySupport.h" diff --git a/src/runtime/NEON/functions/NEBitwiseAnd.cpp b/src/runtime/NEON/functions/NEBitwiseAnd.cpp index 1d89308565..f3b5220ccf 100644 --- a/src/runtime/NEON/functions/NEBitwiseAnd.cpp +++ b/src/runtime/NEON/functions/NEBitwiseAnd.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/NEON/functions/NEBitwiseAnd.h" -#include "arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h" +#include "src/core/NEON/kernels/NEBitwiseAndKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/NEON/functions/NEBitwiseNot.cpp b/src/runtime/NEON/functions/NEBitwiseNot.cpp index 585b059005..036584ea1a 100644 --- a/src/runtime/NEON/functions/NEBitwiseNot.cpp +++ b/src/runtime/NEON/functions/NEBitwiseNot.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/NEON/functions/NEBitwiseNot.h" -#include "arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h" +#include "src/core/NEON/kernels/NEBitwiseNotKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/NEON/functions/NEBitwiseOr.cpp b/src/runtime/NEON/functions/NEBitwiseOr.cpp index bba866d97a..fc905a0919 100644 --- a/src/runtime/NEON/functions/NEBitwiseOr.cpp +++ b/src/runtime/NEON/functions/NEBitwiseOr.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/NEON/functions/NEBitwiseOr.h" -#include "arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h" +#include "src/core/NEON/kernels/NEBitwiseOrKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/NEON/functions/NEBitwiseXor.cpp b/src/runtime/NEON/functions/NEBitwiseXor.cpp index 188fe3d9ef..301a0c4659 100644 --- a/src/runtime/NEON/functions/NEBitwiseXor.cpp +++ b/src/runtime/NEON/functions/NEBitwiseXor.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/NEON/functions/NEBitwiseXor.h" -#include "arm_compute/core/NEON/kernels/NEBitwiseXorKernel.h" +#include "src/core/NEON/kernels/NEBitwiseXorKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/NEON/functions/NEBoundingBoxTransform.cpp b/src/runtime/NEON/functions/NEBoundingBoxTransform.cpp index b1ecfaf314..0b639430b1 100644 --- a/src/runtime/NEON/functions/NEBoundingBoxTransform.cpp +++ b/src/runtime/NEON/functions/NEBoundingBoxTransform.cpp @@ -22,6 +22,7 @@ * SOFTWARE. */ #include "arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h" +#include "src/core/NEON/kernels/NEBoundingBoxTransformKernel.h" #include "support/MemorySupport.h" diff --git a/src/runtime/NEON/functions/NEBox3x3.cpp b/src/runtime/NEON/functions/NEBox3x3.cpp index a380377daa..01d2356a4c 100644 --- a/src/runtime/NEON/functions/NEBox3x3.cpp +++ b/src/runtime/NEON/functions/NEBox3x3.cpp @@ -23,14 +23,15 @@ */ #include "arm_compute/runtime/NEON/functions/NEBox3x3.h" -#include "arm_compute/core/NEON/kernels/NEBox3x3Kernel.h" #include "arm_compute/core/PixelValue.h" +#include "src/core/NEON/kernels/NEBox3x3Kernel.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" #include "support/MemorySupport.h" #include -using namespace arm_compute; - +namespace arm_compute +{ void NEBox3x3::configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value, bool use_fp16) { if(use_fp16) @@ -45,5 +46,8 @@ void NEBox3x3::configure(ITensor *input, ITensor *output, BorderMode border_mode k->configure(input, output, border_mode == BorderMode::UNDEFINED); _kernel = std::move(k); } - _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + auto b = arm_compute::support::cpp14::make_unique(); + b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + _border_handler = std::move(b); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NECannyEdge.cpp b/src/runtime/NEON/functions/NECannyEdge.cpp index d7ec52c5ac..bf4f7d7933 100644 --- a/src/runtime/NEON/functions/NECannyEdge.cpp +++ b/src/runtime/NEON/functions/NECannyEdge.cpp @@ -25,8 +25,6 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/kernels/NECannyEdgeKernel.h" -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" @@ -34,13 +32,19 @@ #include "arm_compute/runtime/NEON/functions/NESobel5x5.h" #include "arm_compute/runtime/NEON/functions/NESobel7x7.h" #include "arm_compute/runtime/TensorAllocator.h" +#include "src/core/NEON/kernels/NECannyEdgeKernel.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NESobel5x5Kernel.h" +#include "src/core/NEON/kernels/NESobel7x7Kernel.h" #include "support/MemorySupport.h" #include #include #include -using namespace arm_compute; +namespace arm_compute +{ +NECannyEdge::~NECannyEdge() = default; NECannyEdge::NECannyEdge(std::shared_ptr memory_manager) // NOLINT : _memory_group(std::move(memory_manager)), @@ -139,21 +143,25 @@ void NECannyEdge::configure(ITensor *input, ITensor *output, int32_t upper_thr, _memory_group.manage(&_nonmax); // Configure non-maxima suppression - _non_max_suppr.configure(&_magnitude, &_phase, &_nonmax, upper_thr, lower_thr, border_mode == BorderMode::UNDEFINED); + _non_max_suppr = arm_compute::support::cpp14::make_unique(); + _non_max_suppr->configure(&_magnitude, &_phase, &_nonmax, upper_thr, lower_thr, border_mode == BorderMode::UNDEFINED); // Fill border around magnitude image as non-maxima suppression will access // it. If border mode is undefined filling the border is a nop. - _border_mag_gradient.configure(&_magnitude, _non_max_suppr.border_size(), border_mode, constant_border_value); + _border_mag_gradient = arm_compute::support::cpp14::make_unique(); + _border_mag_gradient->configure(&_magnitude, _non_max_suppr->border_size(), border_mode, constant_border_value); // Allocate intermediate tensors _phase.allocator()->allocate(); _magnitude.allocator()->allocate(); // Configure edge tracing - _edge_trace.configure(&_nonmax, output); + _edge_trace = arm_compute::support::cpp14::make_unique(); + _edge_trace->configure(&_nonmax, output); // Fill border with "No edge" to stop recursion in edge trace - _border_edge_trace.configure(&_nonmax, _edge_trace.border_size(), BorderMode::CONSTANT, static_cast(0.f)); + _border_edge_trace = arm_compute::support::cpp14::make_unique(); + _border_edge_trace->configure(&_nonmax, _edge_trace->border_size(), BorderMode::CONSTANT, static_cast(0.f)); // Allocate intermediate tensors _nonmax.allocator()->allocate(); @@ -172,17 +180,18 @@ void NECannyEdge::run() NEScheduler::get().schedule(_gradient.get(), Window::DimY); // Fill border before non-maxima suppression. Nop for border mode undefined. - NEScheduler::get().schedule(&_border_mag_gradient, Window::DimZ); + NEScheduler::get().schedule(_border_mag_gradient.get(), Window::DimZ); // Run non-maxima suppression - NEScheduler::get().schedule(&_non_max_suppr, Window::DimY); + NEScheduler::get().schedule(_non_max_suppr.get(), Window::DimY); ARM_COMPUTE_ERROR_ON(_output->buffer() == nullptr); std::fill_n(_output->buffer(), _output->info()->total_size(), 0); // Fill border before edge trace - NEScheduler::get().schedule(&_border_edge_trace, Window::DimZ); + NEScheduler::get().schedule(_border_edge_trace.get(), Window::DimZ); // Run edge tracing - NEScheduler::get().schedule(&_edge_trace, Window::DimY); + NEScheduler::get().schedule(_edge_trace.get(), Window::DimY); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NECast.cpp b/src/runtime/NEON/functions/NECast.cpp index 4b35110417..7fd2605fd2 100644 --- a/src/runtime/NEON/functions/NECast.cpp +++ b/src/runtime/NEON/functions/NECast.cpp @@ -24,8 +24,8 @@ #include "arm_compute/runtime/NEON/functions/NECast.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h" #include "arm_compute/core/TensorInfo.h" +#include "src/core/NEON/kernels/NEDepthConvertLayerKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/NEON/functions/NEChannelCombine.cpp b/src/runtime/NEON/functions/NEChannelCombine.cpp index e987951097..f8a9be0313 100644 --- a/src/runtime/NEON/functions/NEChannelCombine.cpp +++ b/src/runtime/NEON/functions/NEChannelCombine.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/NEON/functions/NEChannelCombine.h" -#include "arm_compute/core/NEON/kernels/NEChannelCombineKernel.h" +#include "src/core/NEON/kernels/NEChannelCombineKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/NEON/functions/NEChannelExtract.cpp b/src/runtime/NEON/functions/NEChannelExtract.cpp index d78a8f8301..8f5e4d47d9 100644 --- a/src/runtime/NEON/functions/NEChannelExtract.cpp +++ b/src/runtime/NEON/functions/NEChannelExtract.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/NEON/functions/NEChannelExtract.h" -#include "arm_compute/core/NEON/kernels/NEChannelExtractKernel.h" +#include "src/core/NEON/kernels/NEChannelExtractKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/NEON/functions/NEChannelShuffleLayer.cpp b/src/runtime/NEON/functions/NEChannelShuffleLayer.cpp index 0392a92663..c72dec67ee 100644 --- a/src/runtime/NEON/functions/NEChannelShuffleLayer.cpp +++ b/src/runtime/NEON/functions/NEChannelShuffleLayer.cpp @@ -23,8 +23,8 @@ */ #include "arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h" -#include "arm_compute/core/NEON/kernels/NEChannelShuffleLayerKernel.h" #include "arm_compute/core/Types.h" +#include "src/core/NEON/kernels/NEChannelShuffleLayerKernel.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/NEON/functions/NECol2Im.cpp b/src/runtime/NEON/functions/NECol2Im.cpp index e4fe36fd25..0706125157 100644 --- a/src/runtime/NEON/functions/NECol2Im.cpp +++ b/src/runtime/NEON/functions/NECol2Im.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/NEON/functions/NECol2Im.h" -#include "arm_compute/core/NEON/kernels/NECol2ImKernel.h" +#include "src/core/NEON/kernels/NECol2ImKernel.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/NEON/functions/NEColorConvert.cpp b/src/runtime/NEON/functions/NEColorConvert.cpp index 7befac7aa3..ebdd1046ce 100644 --- a/src/runtime/NEON/functions/NEColorConvert.cpp +++ b/src/runtime/NEON/functions/NEColorConvert.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/NEON/functions/NEColorConvert.h" -#include "arm_compute/core/NEON/kernels/NEColorConvertKernel.h" +#include "src/core/NEON/kernels/NEColorConvertKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/NEON/functions/NEComputeAllAnchors.cpp b/src/runtime/NEON/functions/NEComputeAllAnchors.cpp index cb89117ff9..3f5712dd3a 100644 --- a/src/runtime/NEON/functions/NEComputeAllAnchors.cpp +++ b/src/runtime/NEON/functions/NEComputeAllAnchors.cpp @@ -23,6 +23,7 @@ */ #include "arm_compute/runtime/NEON/functions/NEComputeAllAnchors.h" +#include "src/core/NEON/kernels/NEGenerateProposalsLayerKernel.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/NEON/functions/NEConcatenateLayer.cpp b/src/runtime/NEON/functions/NEConcatenateLayer.cpp index 72bd9e6b19..03a01aec6b 100644 --- a/src/runtime/NEON/functions/NEConcatenateLayer.cpp +++ b/src/runtime/NEON/functions/NEConcatenateLayer.cpp @@ -23,10 +23,10 @@ */ #include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h" -#include "arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h" +#include "src/core/NEON/kernels/NEBatchConcatenateLayerKernel.h" +#include "src/core/NEON/kernels/NEDepthConcatenateLayerKernel.h" +#include "src/core/NEON/kernels/NEHeightConcatenateLayerKernel.h" +#include "src/core/NEON/kernels/NEWidthConcatenateLayerKernel.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/NEON/NEScheduler.h" diff --git a/src/runtime/NEON/functions/NEConvertFullyConnectedWeights.cpp b/src/runtime/NEON/functions/NEConvertFullyConnectedWeights.cpp index f697efb367..291afe0273 100644 --- a/src/runtime/NEON/functions/NEConvertFullyConnectedWeights.cpp +++ b/src/runtime/NEON/functions/NEConvertFullyConnectedWeights.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,9 +22,13 @@ * SOFTWARE. */ #include "arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h" +#include "src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h" +#include "support/MemorySupport.h" namespace arm_compute { +NEConvertFullyConnectedWeights::~NEConvertFullyConnectedWeights() = default; + NEConvertFullyConnectedWeights::NEConvertFullyConnectedWeights() : _kernel() { @@ -33,7 +37,8 @@ NEConvertFullyConnectedWeights::NEConvertFullyConnectedWeights() void NEConvertFullyConnectedWeights::configure(const ITensor *input, ITensor *output, const TensorShape &original_input_shape, DataLayout data_layout) { - _kernel.configure(input, output, original_input_shape, data_layout); + _kernel = arm_compute::support::cpp14::make_unique(); + _kernel->configure(input, output, original_input_shape, data_layout); } Status NEConvertFullyConnectedWeights::validate(const ITensorInfo *input, const ITensorInfo *output, const TensorShape &original_input_shape, @@ -44,6 +49,6 @@ Status NEConvertFullyConnectedWeights::validate(const ITensorInfo *input, const void NEConvertFullyConnectedWeights::run() { - NEScheduler::get().schedule(&_kernel, Window::DimZ); + NEScheduler::get().schedule(_kernel.get(), Window::DimZ); } } // namespace arm_compute \ No newline at end of file diff --git a/src/runtime/NEON/functions/NEConvolution.cpp b/src/runtime/NEON/functions/NEConvolution.cpp index 8200a08ca8..07ac8bd42b 100644 --- a/src/runtime/NEON/functions/NEConvolution.cpp +++ b/src/runtime/NEON/functions/NEConvolution.cpp @@ -25,28 +25,38 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/kernels/NEConvolutionKernel.h" #include "arm_compute/core/PixelValue.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" #include "arm_compute/runtime/TensorAllocator.h" +#include "src/core/NEON/kernels/NEConvolutionKernel.h" +#include "src/core/NEON/kernels/NEConvolutionKernel.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" #include "support/MemorySupport.h" #include #include -using namespace arm_compute; +namespace arm_compute +{ +NEConvolution3x3::~NEConvolution3x3() = default; void NEConvolution3x3::configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value) { auto k = arm_compute::support::cpp14::make_unique(); k->configure(input, output, conv, scale, border_mode == BorderMode::UNDEFINED); _kernel = std::move(k); - _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + + auto b = arm_compute::support::cpp14::make_unique(); + b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + _border_handler = std::move(b); } +template +NEConvolutionSquare::~NEConvolutionSquare() = default; + template NEConvolutionSquare::NEConvolutionSquare(std::shared_ptr memory_manager) : _memory_group(std::move(memory_manager)), _tmp(), _is_separable(false), _kernel_hor(), _kernel_vert(), _kernel(), _border_handler() @@ -66,6 +76,7 @@ void NEConvolutionSquare::configure(ITensor *input, ITensor *output _is_separable = separate_matrix(conv, conv_col.data(), conv_row.data(), matrix_size); + auto b = arm_compute::support::cpp14::make_unique(); if(_is_separable) { DataType intermediate_type = DataType::UNKNOWN; @@ -82,35 +93,40 @@ void NEConvolutionSquare::configure(ITensor *input, ITensor *output scale = calculate_matrix_scale(conv, matrix_size); } - _kernel_hor.configure(input, &_tmp, conv_row.data(), border_mode == BorderMode::UNDEFINED); - _kernel_vert.configure(&_tmp, output, conv_col.data(), scale, border_mode == BorderMode::UNDEFINED); + _kernel_hor = arm_compute::support::cpp14::make_unique>(); + _kernel_vert = arm_compute::support::cpp14::make_unique>(); + + _kernel_hor->configure(input, &_tmp, conv_row.data(), border_mode == BorderMode::UNDEFINED); + _kernel_vert->configure(&_tmp, output, conv_col.data(), scale, border_mode == BorderMode::UNDEFINED); _tmp.allocator()->allocate(); - _border_handler.configure(input, _kernel_hor.border_size(), border_mode, PixelValue(constant_border_value)); + b->configure(input, _kernel_hor->border_size(), border_mode, PixelValue(constant_border_value)); } else { - _kernel.configure(input, output, conv, scale, border_mode == BorderMode::UNDEFINED); - _border_handler.configure(input, _kernel.border_size(), border_mode, PixelValue(constant_border_value)); + _kernel = arm_compute::support::cpp14::make_unique>(); + _kernel->configure(input, output, conv, scale, border_mode == BorderMode::UNDEFINED); + b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); } + _border_handler = std::move(b); } template void NEConvolutionSquare::run() { - NEScheduler::get().schedule(&_border_handler, Window::DimZ); + NEScheduler::get().schedule(_border_handler.get(), Window::DimZ); if(_is_separable) { MemoryGroupResourceScope scope_mg(_memory_group); - NEScheduler::get().schedule(&_kernel_hor, Window::DimY); - NEScheduler::get().schedule(&_kernel_vert, Window::DimY); + NEScheduler::get().schedule(_kernel_hor.get(), Window::DimY); + NEScheduler::get().schedule(_kernel_vert.get(), Window::DimY); } else { - NEScheduler::get().schedule(&_kernel, Window::DimY); + NEScheduler::get().schedule(_kernel.get(), Window::DimY); } } @@ -118,10 +134,16 @@ template class arm_compute::NEConvolutionSquare<5>; template class arm_compute::NEConvolutionSquare<7>; template class arm_compute::NEConvolutionSquare<9>; +NEConvolutionRectangle::~NEConvolutionRectangle() = default; + void NEConvolutionRectangle::configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t rows, uint32_t cols, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value) { auto k = arm_compute::support::cpp14::make_unique(); k->configure(input, output, conv, rows, cols, scale, border_mode == BorderMode::UNDEFINED); _kernel = std::move(k); - _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + + auto b = arm_compute::support::cpp14::make_unique(); + b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + _border_handler = std::move(b); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEConvolutionLayer.cpp b/src/runtime/NEON/functions/NEConvolutionLayer.cpp index 491425c487..901b1e880e 100644 --- a/src/runtime/NEON/functions/NEConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEConvolutionLayer.cpp @@ -27,6 +27,27 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NECol2ImKernel.h" +#include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h" +#include "src/core/NEON/kernels/NECopyKernel.h" +#include "src/core/NEON/kernels/NEDirectConvolutionLayerKernel.h" +#include "src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h" +#include "src/core/NEON/kernels/NEFFTDigitReverseKernel.h" +#include "src/core/NEON/kernels/NEFFTRadixStageKernel.h" +#include "src/core/NEON/kernels/NEFFTScaleKernel.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h" +#include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h" +#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" +#include "src/core/NEON/kernels/NEIm2ColKernel.h" +#include "src/core/NEON/kernels/NEPadLayerKernel.h" +#include "src/core/NEON/kernels/NEReductionOperationKernel.h" +#include "src/core/NEON/kernels/NEWeightsReshapeKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/NEON/functions/NECopy.cpp b/src/runtime/NEON/functions/NECopy.cpp index a461c18894..9e7bf40559 100644 --- a/src/runtime/NEON/functions/NECopy.cpp +++ b/src/runtime/NEON/functions/NECopy.cpp @@ -23,13 +23,15 @@ */ #include "arm_compute/runtime/NEON/functions/NECopy.h" -#include "arm_compute/core/NEON/kernels/NECopyKernel.h" +#include "src/core/NEON/kernels/NECopyKernel.h" #include "support/MemorySupport.h" #include namespace arm_compute { +NECopy::~NECopy() = default; + void NECopy::configure(ITensor *input, ITensor *output) { auto k = arm_compute::support::cpp14::make_unique(); diff --git a/src/runtime/NEON/functions/NECropResize.cpp b/src/runtime/NEON/functions/NECropResize.cpp index f8f99169aa..2e2d2251b6 100644 --- a/src/runtime/NEON/functions/NECropResize.cpp +++ b/src/runtime/NEON/functions/NECropResize.cpp @@ -24,6 +24,7 @@ #include "arm_compute/runtime/NEON/NEScheduler.h" #include "arm_compute/runtime/NEON/functions/NECropResize.h" +#include "src/core/NEON/kernels/NECropKernel.h" #include "support/MemorySupport.h" @@ -31,6 +32,8 @@ namespace arm_compute { +NECropResize::~NECropResize() = default; + NECropResize::NECropResize() : _output(nullptr), _num_boxes(0), _method(), _extrapolation_value(0), _crop(), _scale(), _crop_results(), _scaled_results() { diff --git a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp index cb9ab168a7..2b5b0082c4 100644 --- a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp @@ -28,6 +28,7 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEWeightsReshapeKernel.h" #include "src/core/helpers/AutoConfiguration.h" using namespace arm_compute::misc::shape_calculator; diff --git a/src/runtime/NEON/functions/NEDepthConvertLayer.cpp b/src/runtime/NEON/functions/NEDepthConvertLayer.cpp index 1ffcca0d7f..af0f5efb69 100644 --- a/src/runtime/NEON/functions/NEDepthConvertLayer.cpp +++ b/src/runtime/NEON/functions/NEDepthConvertLayer.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h" -#include "arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h" +#include "src/core/NEON/kernels/NEDepthConvertLayerKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/NEON/functions/NEDepthToSpaceLayer.cpp b/src/runtime/NEON/functions/NEDepthToSpaceLayer.cpp index 0aaa37ec92..c4f15e3b68 100644 --- a/src/runtime/NEON/functions/NEDepthToSpaceLayer.cpp +++ b/src/runtime/NEON/functions/NEDepthToSpaceLayer.cpp @@ -28,6 +28,7 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/NEON/kernels/NEDepthToSpaceLayerKernel.h" #include "support/MemorySupport.h" diff --git a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp index 6c22523bcb..fc97279211 100644 --- a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp @@ -27,6 +27,8 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h" +#include "support/MemorySupport.h" using namespace arm_compute::misc; using namespace arm_compute::misc::shape_calculator; @@ -69,10 +71,11 @@ Status validate_arguments_optimized(const ITensorInfo *input, const ITensorInfo } } // namespace +NEDepthwiseConvolutionLayer::~NEDepthwiseConvolutionLayer() = default; + NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::NEDepthwiseConvolutionLayerOptimizedInternal(std::shared_ptr memory_manager) - : _memory_group(memory_manager), _dwc_optimized_func(memory_manager), _output_stage_kernel(), _border_handler(), _permute_input(), _permute_weights(), _permute_output(), _activationlayer_function(), - _accumulator(), _permuted_input(), _permuted_weights(), _permuted_output(), _original_weights(nullptr), _has_bias(false), _is_quantized(false), _is_nchw(true), _permute(false), - _is_activationlayer_enabled(false), _is_prepared(false) + : _memory_group(memory_manager), _dwc_optimized_func(memory_manager), _permute_input(), _permute_weights(), _permute_output(), _activationlayer_function(), _accumulator(), _permuted_input(), + _permuted_weights(), _permuted_output(), _original_weights(nullptr), _has_bias(false), _is_quantized(false), _is_nchw(true), _permute(false), _is_activationlayer_enabled(false), _is_prepared(false) { } @@ -243,7 +246,8 @@ void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::configure( } _original_weights = weights_to_use; - _depthwise_conv_kernel.configure(input_to_use, weights_to_use, biases, output_to_use, conv_info, depth_multiplier, dilation); + _depthwise_conv_kernel = arm_compute::support::cpp14::make_unique(); + _depthwise_conv_kernel->configure(input_to_use, weights_to_use, biases, output_to_use, conv_info, depth_multiplier, dilation); if(_is_nchw) { @@ -309,7 +313,7 @@ void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::run() _permute_input.run(); } - NEScheduler::get().schedule(&_depthwise_conv_kernel, Window::DimY); + NEScheduler::get().schedule(_depthwise_conv_kernel.get(), Window::DimY); if(_is_nchw) { diff --git a/src/runtime/NEON/functions/NEDequantizationLayer.cpp b/src/runtime/NEON/functions/NEDequantizationLayer.cpp index a4a3a43b2e..0c0f86c82b 100644 --- a/src/runtime/NEON/functions/NEDequantizationLayer.cpp +++ b/src/runtime/NEON/functions/NEDequantizationLayer.cpp @@ -24,7 +24,7 @@ #include "arm_compute/runtime/NEON/functions/NEDequantizationLayer.h" -#include "arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h" +#include "src/core/NEON/kernels/NEDequantizationLayerKernel.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/NEON/functions/NEDerivative.cpp b/src/runtime/NEON/functions/NEDerivative.cpp index 24991400b8..f007e9fda3 100644 --- a/src/runtime/NEON/functions/NEDerivative.cpp +++ b/src/runtime/NEON/functions/NEDerivative.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,12 +24,16 @@ #include "arm_compute/runtime/NEON/functions/NEDerivative.h" #include "arm_compute/core/Error.h" -#include "arm_compute/core/NEON/kernels/NEDerivativeKernel.h" #include "arm_compute/core/PixelValue.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEDerivativeKernel.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "support/MemorySupport.h" -using namespace arm_compute; +namespace arm_compute +{ +NEDerivative::~NEDerivative() = default; NEDerivative::NEDerivative() : _kernel(), _border_handler() @@ -41,12 +45,16 @@ void NEDerivative::configure(ITensor *input, ITensor *output_x, ITensor *output_ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); ARM_COMPUTE_ERROR_ON((output_x == nullptr) && (output_y == nullptr)); - _kernel.configure(input, output_x, output_y, border_mode == BorderMode::UNDEFINED); - _border_handler.configure(input, BorderSize(1), border_mode, PixelValue(constant_border_value)); + _kernel = arm_compute::support::cpp14::make_unique(); + _border_handler = arm_compute::support::cpp14::make_unique(); + + _kernel->configure(input, output_x, output_y, border_mode == BorderMode::UNDEFINED); + _border_handler->configure(input, BorderSize(1), border_mode, PixelValue(constant_border_value)); } void NEDerivative::run() { - NEScheduler::get().schedule(&_border_handler, Window::DimZ); - NEScheduler::get().schedule(&_kernel, Window::DimY); + NEScheduler::get().schedule(_border_handler.get(), Window::DimZ); + NEScheduler::get().schedule(_kernel.get(), Window::DimY); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEDilate.cpp b/src/runtime/NEON/functions/NEDilate.cpp index 7f503865b4..70c0b61639 100644 --- a/src/runtime/NEON/functions/NEDilate.cpp +++ b/src/runtime/NEON/functions/NEDilate.cpp @@ -23,8 +23,9 @@ */ #include "arm_compute/runtime/NEON/functions/NEDilate.h" -#include "arm_compute/core/NEON/kernels/NEDilateKernel.h" #include "arm_compute/core/PixelValue.h" +#include "src/core/NEON/kernels/NEDilateKernel.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" #include "support/MemorySupport.h" #include @@ -36,5 +37,8 @@ void NEDilate::configure(ITensor *input, ITensor *output, BorderMode border_mode auto k = arm_compute::support::cpp14::make_unique(); k->configure(input, output, border_mode == BorderMode::UNDEFINED); _kernel = std::move(k); - _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + + auto b = arm_compute::support::cpp14::make_unique(); + b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + _border_handler = std::move(b); } diff --git a/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp index fe545905d5..98d6386ffe 100644 --- a/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp @@ -27,9 +27,15 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEDirectConvolutionLayerKernel.h" +#include "src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "support/MemorySupport.h" namespace arm_compute { +NEDirectConvolutionLayer::~NEDirectConvolutionLayer() = default; + NEDirectConvolutionLayer::NEDirectConvolutionLayer(std::shared_ptr memory_manager) : _memory_group(std::move(memory_manager)), _output_stage_kernel(), _conv_kernel(), _input_border_handler(), _activationlayer_function(), _accumulator(), _has_bias(false), _is_activationlayer_enabled(false), _dim_split(Window::DimZ), _is_padding_required() @@ -39,6 +45,9 @@ NEDirectConvolutionLayer::NEDirectConvolutionLayer(std::shared_ptrinfo()->data_layout() == DataLayout::UNKNOWN); + _output_stage_kernel = arm_compute::support::cpp14::make_unique(); + _conv_kernel = arm_compute::support::cpp14::make_unique(); + _input_border_handler = arm_compute::support::cpp14::make_unique(); // Free accumulator if(_accumulator.buffer() != nullptr) @@ -51,17 +60,17 @@ void NEDirectConvolutionLayer::configure(ITensor *input, const ITensor *weights, // Check if bias should be added in the convolution result _has_bias = (bias != nullptr); - _conv_kernel.configure(input, weights, output, conv_info); + _conv_kernel->configure(input, weights, output, conv_info); if(_has_bias) { - _output_stage_kernel.configure(output, bias); + _output_stage_kernel->configure(output, bias); } - _is_padding_required = !_conv_kernel.border_size().empty(); + _is_padding_required = !_conv_kernel->border_size().empty(); if(_is_padding_required) { // Add zero padding XY - _input_border_handler.configure(input, _conv_kernel.border_size(), BorderMode::CONSTANT, PixelValue(static_cast(0.f))); + _input_border_handler->configure(input, _conv_kernel->border_size(), BorderMode::CONSTANT, PixelValue(static_cast(0.f))); } //Configure Activation Layer @@ -109,12 +118,12 @@ void NEDirectConvolutionLayer::run() if(_is_padding_required) { - NEScheduler::get().schedule(&_input_border_handler, Window::DimZ); + NEScheduler::get().schedule(_input_border_handler.get(), Window::DimZ); } - NEScheduler::get().schedule(&_conv_kernel, _dim_split); + NEScheduler::get().schedule(_conv_kernel.get(), _dim_split); if(_has_bias) { - NEScheduler::get().schedule(&_output_stage_kernel, Window::DimY); + NEScheduler::get().schedule(_output_stage_kernel.get(), Window::DimY); } if(_is_activationlayer_enabled) diff --git a/src/runtime/NEON/functions/NEElementwiseOperators.cpp b/src/runtime/NEON/functions/NEElementwiseOperators.cpp index d1f60c71e1..7f3fe8b30b 100644 --- a/src/runtime/NEON/functions/NEElementwiseOperators.cpp +++ b/src/runtime/NEON/functions/NEElementwiseOperators.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/functions/NEElementwiseOperations.h" -#include +#include #include "arm_compute/core/ITensor.h" #include "support/MemorySupport.h" diff --git a/src/runtime/NEON/functions/NEElementwiseUnaryLayer.cpp b/src/runtime/NEON/functions/NEElementwiseUnaryLayer.cpp index cb4e3a0b7d..5e130205d2 100644 --- a/src/runtime/NEON/functions/NEElementwiseUnaryLayer.cpp +++ b/src/runtime/NEON/functions/NEElementwiseUnaryLayer.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h" -#include "arm_compute/core/NEON/kernels/NEElementwiseUnaryKernel.h" +#include "src/core/NEON/kernels/NEElementwiseUnaryKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/NEON/functions/NEEqualizeHistogram.cpp b/src/runtime/NEON/functions/NEEqualizeHistogram.cpp index b3d5ad484f..d3ff171323 100644 --- a/src/runtime/NEON/functions/NEEqualizeHistogram.cpp +++ b/src/runtime/NEON/functions/NEEqualizeHistogram.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -28,8 +28,15 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NECumulativeDistributionKernel.h" +#include "src/core/NEON/kernels/NEHistogramKernel.h" +#include "src/core/NEON/kernels/NEHistogramKernel.h" +#include "src/core/NEON/kernels/NETableLookupKernel.h" +#include "support/MemorySupport.h" -using namespace arm_compute; +namespace arm_compute +{ +NEEqualizeHistogram::~NEEqualizeHistogram() = default; NEEqualizeHistogram::NEEqualizeHistogram() : _histogram_kernel(), _cd_histogram_kernel(), _map_histogram_kernel(), _hist(nr_bins, 0, max_range), _cum_dist(nr_bins, 0, max_range), _cd_lut(nr_bins, DataType::U8) @@ -43,20 +50,25 @@ void NEEqualizeHistogram::configure(const IImage *input, IImage *output) ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); + _histogram_kernel = arm_compute::support::cpp14::make_unique(); + _cd_histogram_kernel = arm_compute::support::cpp14::make_unique(); + _map_histogram_kernel = arm_compute::support::cpp14::make_unique(); + // Configure kernels - _histogram_kernel.configure(input, &_hist); - _cd_histogram_kernel.configure(input, &_hist, &_cum_dist, &_cd_lut); - _map_histogram_kernel.configure(input, &_cd_lut, output); + _histogram_kernel->configure(input, &_hist); + _cd_histogram_kernel->configure(input, &_hist, &_cum_dist, &_cd_lut); + _map_histogram_kernel->configure(input, &_cd_lut, output); } void NEEqualizeHistogram::run() { // Calculate histogram of input. - NEScheduler::get().schedule(&_histogram_kernel, Window::DimY); + NEScheduler::get().schedule(_histogram_kernel.get(), Window::DimY); // Calculate cumulative distribution of histogram and create LUT. - NEScheduler::get().schedule(&_cd_histogram_kernel, Window::DimY); + NEScheduler::get().schedule(_cd_histogram_kernel.get(), Window::DimY); // Map input to output using created LUT. - NEScheduler::get().schedule(&_map_histogram_kernel, Window::DimY); + NEScheduler::get().schedule(_map_histogram_kernel.get(), Window::DimY); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEErode.cpp b/src/runtime/NEON/functions/NEErode.cpp index a89993c1fe..748694fe3f 100644 --- a/src/runtime/NEON/functions/NEErode.cpp +++ b/src/runtime/NEON/functions/NEErode.cpp @@ -23,18 +23,23 @@ */ #include "arm_compute/runtime/NEON/functions/NEErode.h" -#include "arm_compute/core/NEON/kernels/NEErodeKernel.h" #include "arm_compute/core/PixelValue.h" +#include "src/core/NEON/kernels/NEErodeKernel.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" #include "support/MemorySupport.h" #include -using namespace arm_compute; - +namespace arm_compute +{ void NEErode::configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value) { auto k = arm_compute::support::cpp14::make_unique(); k->configure(input, output, border_mode == BorderMode::UNDEFINED); _kernel = std::move(k); - _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + + auto b = arm_compute::support::cpp14::make_unique(); + b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + _border_handler = std::move(b); } +} // namespace arm_compute \ No newline at end of file diff --git a/src/runtime/NEON/functions/NEFFT1D.cpp b/src/runtime/NEON/functions/NEFFT1D.cpp index 2c53b185df..b94c25832a 100644 --- a/src/runtime/NEON/functions/NEFFT1D.cpp +++ b/src/runtime/NEON/functions/NEFFT1D.cpp @@ -26,10 +26,16 @@ #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEFFTDigitReverseKernel.h" +#include "src/core/NEON/kernels/NEFFTRadixStageKernel.h" +#include "src/core/NEON/kernels/NEFFTScaleKernel.h" #include "src/core/utils/helpers/fft.h" +#include "support/MemorySupport.h" namespace arm_compute { +NEFFT1D::~NEFFT1D() = default; + NEFFT1D::NEFFT1D(std::shared_ptr memory_manager) : _memory_group(std::move(memory_manager)), _digit_reverse_kernel(), _fft_kernels(), _scale_kernel(), _digit_reversed_input(), _digit_reverse_indices(), _num_ffts(0), _axis(0), _run_scale(false) { @@ -58,7 +64,8 @@ void NEFFT1D::configure(const ITensor *input, ITensor *output, const FFT1DInfo & TensorInfo digit_reverse_indices_info(TensorShape(input->info()->tensor_shape()[config.axis]), 1, DataType::U32); _digit_reverse_indices.allocator()->init(digit_reverse_indices_info); _memory_group.manage(&_digit_reversed_input); - _digit_reverse_kernel.configure(input, &_digit_reversed_input, &_digit_reverse_indices, digit_reverse_config); + _digit_reverse_kernel = arm_compute::support::cpp14::make_unique(); + _digit_reverse_kernel->configure(input, &_digit_reversed_input, &_digit_reverse_indices, digit_reverse_config); // Create and configure FFT kernels unsigned int Nx = 1; @@ -75,7 +82,8 @@ void NEFFT1D::configure(const ITensor *input, ITensor *output, const FFT1DInfo & fft_kernel_info.radix = radix_for_stage; fft_kernel_info.Nx = Nx; fft_kernel_info.is_first_stage = (i == 0); - _fft_kernels[i].configure(&_digit_reversed_input, ((i == (_num_ffts - 1)) && !is_c2r) ? output : nullptr, fft_kernel_info); + _fft_kernels[i] = arm_compute::support::cpp14::make_unique(); + _fft_kernels[i]->configure(&_digit_reversed_input, ((i == (_num_ffts - 1)) && !is_c2r) ? output : nullptr, fft_kernel_info); Nx *= radix_for_stage; } @@ -86,7 +94,8 @@ void NEFFT1D::configure(const ITensor *input, ITensor *output, const FFT1DInfo & FFTScaleKernelInfo scale_config; scale_config.scale = static_cast(N); scale_config.conjugate = config.direction == FFTDirection::Inverse; - is_c2r ? _scale_kernel.configure(&_digit_reversed_input, output, scale_config) : _scale_kernel.configure(output, nullptr, scale_config); + _scale_kernel = arm_compute::support::cpp14::make_unique(); + is_c2r ? _scale_kernel->configure(&_digit_reversed_input, output, scale_config) : _scale_kernel->configure(output, nullptr, scale_config); } // Allocate tensors @@ -128,17 +137,17 @@ void NEFFT1D::run() { MemoryGroupResourceScope scope_mg(_memory_group); - NEScheduler::get().schedule(&_digit_reverse_kernel, (_axis == 0 ? Window::DimY : Window::DimZ)); + NEScheduler::get().schedule(_digit_reverse_kernel.get(), (_axis == 0 ? Window::DimY : Window::DimZ)); for(unsigned int i = 0; i < _num_ffts; ++i) { - NEScheduler::get().schedule(&_fft_kernels[i], (_axis == 0 ? Window::DimY : Window::DimX)); + NEScheduler::get().schedule(_fft_kernels[i].get(), (_axis == 0 ? Window::DimY : Window::DimX)); } // Run output scaling if(_run_scale) { - NEScheduler::get().schedule(&_scale_kernel, Window::DimY); + NEScheduler::get().schedule(_scale_kernel.get(), Window::DimY); } } } // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEFFT2D.cpp b/src/runtime/NEON/functions/NEFFT2D.cpp index b63afe59c0..3b787cd523 100644 --- a/src/runtime/NEON/functions/NEFFT2D.cpp +++ b/src/runtime/NEON/functions/NEFFT2D.cpp @@ -26,9 +26,14 @@ #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/Scheduler.h" +#include "src/core/NEON/kernels/NEFFTDigitReverseKernel.h" +#include "src/core/NEON/kernels/NEFFTRadixStageKernel.h" +#include "src/core/NEON/kernels/NEFFTScaleKernel.h" namespace arm_compute { +NEFFT2D::~NEFFT2D() = default; + NEFFT2D::NEFFT2D(std::shared_ptr memory_manager) : _memory_group(memory_manager), _first_pass_func(memory_manager), _second_pass_func(memory_manager), _first_pass_tensor() { diff --git a/src/runtime/NEON/functions/NEFFTConvolutionLayer.cpp b/src/runtime/NEON/functions/NEFFTConvolutionLayer.cpp index a46fc9f45f..23788b7c39 100644 --- a/src/runtime/NEON/functions/NEFFTConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEFFTConvolutionLayer.cpp @@ -27,6 +27,12 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/NEON/kernels/NECopyKernel.h" +#include "src/core/NEON/kernels/NEFFTDigitReverseKernel.h" +#include "src/core/NEON/kernels/NEFFTRadixStageKernel.h" +#include "src/core/NEON/kernels/NEFFTScaleKernel.h" +#include "src/core/NEON/kernels/NEPadLayerKernel.h" +#include "src/core/NEON/kernels/NEReductionOperationKernel.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/utils/helpers/fft.h" @@ -96,6 +102,7 @@ NEFFTConvolutionLayer::NEFFTConvolutionLayer(std::shared_ptr mem _is_prepared(false) { } +NEFFTConvolutionLayer::~NEFFTConvolutionLayer() = default; void NEFFTConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info) diff --git a/src/runtime/NEON/functions/NEFastCorners.cpp b/src/runtime/NEON/functions/NEFastCorners.cpp index 303c593f84..1bde3cc508 100644 --- a/src/runtime/NEON/functions/NEFastCorners.cpp +++ b/src/runtime/NEON/functions/NEFastCorners.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,15 +25,21 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" #include "arm_compute/core/PixelValue.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/Array.h" #include "arm_compute/runtime/NEON/NEScheduler.h" #include "arm_compute/runtime/TensorAllocator.h" +#include "src/core/NEON/kernels/NEFastCornersKernel.h" +#include "src/core/NEON/kernels/NEFillArrayKernel.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h" +#include "support/MemorySupport.h" -using namespace arm_compute; +namespace arm_compute +{ +NEFastCorners::~NEFastCorners() = default; NEFastCorners::NEFastCorners(std::shared_ptr memory_manager) : _memory_group(std::move(memory_manager)), @@ -62,24 +68,28 @@ void NEFastCorners::configure(IImage *input, float threshold, bool nonmax_suppre _output.allocator()->init(tensor_info); _memory_group.manage(&_output); + _fast_corners_kernel = arm_compute::support::cpp14::make_unique(); + _border_handler = arm_compute::support::cpp14::make_unique(); + _fill_kernel = arm_compute::support::cpp14::make_unique(); // If border is UNDEFINED _fast_corners_kernel will operate in xwindow (3, // width - 3) and ywindow (3, height -3) so the output image will leave the // pixels on the borders unchanged. This is reflected in the valid region // of the output. The non maxima suppression is only run on the valid // pixels. - _fast_corners_kernel.configure(input, &_output, threshold, nonmax_suppression, BorderMode::UNDEFINED == border_mode); - _border_handler.configure(input, _fast_corners_kernel.border_size(), border_mode, constant_border_value); + _fast_corners_kernel->configure(input, &_output, threshold, nonmax_suppression, BorderMode::UNDEFINED == border_mode); + _border_handler->configure(input, _fast_corners_kernel->border_size(), border_mode, constant_border_value); if(!_non_max) { - _fill_kernel.configure(&_output, 1 /* we keep all texels >0 */, corners); + _fill_kernel->configure(&_output, 1 /* we keep all texels >0 */, corners); } else { _suppressed.allocator()->init(tensor_info); _memory_group.manage(&_suppressed); - _nonmax_kernel.configure(&_output, &_suppressed, BorderMode::UNDEFINED == border_mode); - _fill_kernel.configure(&_suppressed, 1 /* we keep all texels >0 */, corners); + _nonmax_kernel = arm_compute::support::cpp14::make_unique(); + _nonmax_kernel->configure(&_output, &_suppressed, BorderMode::UNDEFINED == border_mode); + _fill_kernel->configure(&_suppressed, 1 /* we keep all texels >0 */, corners); // Allocate intermediate tensors _suppressed.allocator()->allocate(); @@ -91,16 +101,17 @@ void NEFastCorners::configure(IImage *input, float threshold, bool nonmax_suppre void NEFastCorners::run() { - NEScheduler::get().schedule(&_border_handler, Window::DimZ); + NEScheduler::get().schedule(_border_handler.get(), Window::DimZ); MemoryGroupResourceScope scope_mg(_memory_group); - NEScheduler::get().schedule(&_fast_corners_kernel, Window::DimY); + NEScheduler::get().schedule(_fast_corners_kernel.get(), Window::DimY); if(_non_max) { - NEScheduler::get().schedule(&_nonmax_kernel, Window::DimY); + NEScheduler::get().schedule(_nonmax_kernel.get(), Window::DimY); } - NEScheduler::get().schedule(&_fill_kernel, Window::DimY); + NEScheduler::get().schedule(_fill_kernel.get(), Window::DimY); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEFill.cpp b/src/runtime/NEON/functions/NEFill.cpp index 79fe175e69..68292c9ee0 100644 --- a/src/runtime/NEON/functions/NEFill.cpp +++ b/src/runtime/NEON/functions/NEFill.cpp @@ -25,6 +25,7 @@ #include "arm_compute/core/Window.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEMemsetKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/NEON/functions/NEFillBorder.cpp b/src/runtime/NEON/functions/NEFillBorder.cpp index de2ef26b80..e96069f97c 100644 --- a/src/runtime/NEON/functions/NEFillBorder.cpp +++ b/src/runtime/NEON/functions/NEFillBorder.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,16 +25,19 @@ #include "arm_compute/core/Window.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "support/MemorySupport.h" namespace arm_compute { void NEFillBorder::configure(ITensor *input, unsigned int border_width, BorderMode border_mode, const PixelValue &constant_border_value) { - _border_handler.configure(input, BorderSize(border_width), border_mode, constant_border_value); + _border_handler = arm_compute::support::cpp14::make_unique(); + _border_handler->configure(input, BorderSize(border_width), border_mode, constant_border_value); } void NEFillBorder::run() { - NEScheduler::get().schedule(&_border_handler, Window::DimZ); + NEScheduler::get().schedule(_border_handler.get(), Window::DimZ); } } // namespace arm_compute \ No newline at end of file diff --git a/src/runtime/NEON/functions/NEFlattenLayer.cpp b/src/runtime/NEON/functions/NEFlattenLayer.cpp index 936a70dacc..4dfe96325e 100644 --- a/src/runtime/NEON/functions/NEFlattenLayer.cpp +++ b/src/runtime/NEON/functions/NEFlattenLayer.cpp @@ -23,8 +23,8 @@ */ #include "arm_compute/runtime/NEON/functions/NEFlattenLayer.h" -#include "arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h" #include "arm_compute/core/Size2D.h" +#include "src/core/NEON/kernels/NEFlattenLayerKernel.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/NEON/functions/NEFloor.cpp b/src/runtime/NEON/functions/NEFloor.cpp index 95b2497ded..5f6bd61017 100644 --- a/src/runtime/NEON/functions/NEFloor.cpp +++ b/src/runtime/NEON/functions/NEFloor.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/NEON/functions/NEFloor.h" -#include "arm_compute/core/NEON/kernels/NEFloorKernel.h" +#include "src/core/NEON/kernels/NEFloorKernel.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp index d956d16f4d..714fa58a66 100644 --- a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp +++ b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp @@ -29,6 +29,19 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h" +#include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h" +#include "src/core/NEON/kernels/NEFlattenLayerKernel.h" +#include "src/core/NEON/kernels/NEFlattenLayerKernel.h" +#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h" +#include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h" +#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" +#include "src/core/NEON/kernels/NETransposeKernel.h" #include "support/MemorySupport.h" @@ -145,6 +158,8 @@ Status NEFullyConnectedLayerReshapeWeights::validate(const ITensorInfo *input, c return NETransposeKernel::validate(input, output); } +NEFullyConnectedLayer::~NEFullyConnectedLayer() = default; + NEFullyConnectedLayer::NEFullyConnectedLayer(std::shared_ptr memory_manager, IWeightsManager *weights_manager) : _memory_group(std::move(memory_manager)), _weights_manager(weights_manager), _flatten_kernel(), _convert_weights(), _convert_weights_managed(), _reshape_weights_function(), _reshape_weights_managed_function(), _mm_gemm(nullptr, weights_manager), _mm_gemmlowp(nullptr, weights_manager), _flatten_output(), _converted_weights_output(), _reshape_weights_output(), @@ -199,7 +214,9 @@ void NEFullyConnectedLayer::configure_conv_fc(const ITensor *input, const ITenso // Configure flatten kernel _memory_group.manage(&_flatten_output); - _flatten_kernel.configure(input, &_flatten_output); + + _flatten_kernel = arm_compute::support::cpp14::make_unique(); + _flatten_kernel->configure(input, &_flatten_output); // Configure matrix multiply kernel configure_mm(&_flatten_output, weights, biases, output, act); @@ -398,7 +415,7 @@ void NEFullyConnectedLayer::run() // Linearize input if it comes from a convolutional layer if(_is_fc_after_conv) { - NEScheduler::get().schedule(&_flatten_kernel, Window::DimY); + NEScheduler::get().schedule(_flatten_kernel.get(), Window::DimY); } // Run matrix multiply diff --git a/src/runtime/NEON/functions/NEFuseBatchNormalization.cpp b/src/runtime/NEON/functions/NEFuseBatchNormalization.cpp index fd26bb49a7..c64fde050e 100644 --- a/src/runtime/NEON/functions/NEFuseBatchNormalization.cpp +++ b/src/runtime/NEON/functions/NEFuseBatchNormalization.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -28,9 +28,13 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEFuseBatchNormalizationKernel.h" +#include "support/MemorySupport.h" namespace arm_compute { +NEFuseBatchNormalization::~NEFuseBatchNormalization() = default; + NEFuseBatchNormalization::NEFuseBatchNormalization() : _fuse_bn_kernel() { @@ -41,7 +45,8 @@ void NEFuseBatchNormalization::configure(const ITensor *input_weights, const ITe const ITensor *input_bias, const ITensor *bn_beta, const ITensor *bn_gamma, float epsilon, FuseBatchNormalizationType fbn_type) { - _fuse_bn_kernel.configure(input_weights, bn_mean, bn_var, fused_weights, fused_bias, input_bias, bn_beta, bn_gamma, epsilon, fbn_type); + _fuse_bn_kernel = arm_compute::support::cpp14::make_unique(); + _fuse_bn_kernel->configure(input_weights, bn_mean, bn_var, fused_weights, fused_bias, input_bias, bn_beta, bn_gamma, epsilon, fbn_type); } Status NEFuseBatchNormalization::validate(const ITensorInfo *input_weights, const ITensorInfo *bn_mean, const ITensorInfo *bn_var, @@ -54,6 +59,6 @@ Status NEFuseBatchNormalization::validate(const ITensorInfo *input_weights, cons void NEFuseBatchNormalization::run() { - NEScheduler::get().schedule(&_fuse_bn_kernel, Window::DimY); + NEScheduler::get().schedule(_fuse_bn_kernel.get(), Window::DimY); } } // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEGEMM.cpp b/src/runtime/NEON/functions/NEGEMM.cpp index 4166cff97a..0215098792 100644 --- a/src/runtime/NEON/functions/NEGEMM.cpp +++ b/src/runtime/NEON/functions/NEGEMM.cpp @@ -34,7 +34,12 @@ #include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" #include "arm_compute/runtime/TensorAllocator.h" #include "src/core/CPP/Validate.h" +#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" +#include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h" +#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" #include "src/core/helpers/AutoConfiguration.h" +#include "support/MemorySupport.h" #include @@ -42,6 +47,8 @@ using namespace arm_compute::misc::shape_calculator; namespace arm_compute { +NEGEMM::~NEGEMM() = default; + NEGEMM::NEGEMM(std::shared_ptr memory_manager, IWeightsManager *weights_manager) : _memory_group(memory_manager), _weights_manager(weights_manager), _interleave_kernel(), _transpose_kernel(), _mm_kernel(), _asm_glue(memory_manager, weights_manager), _ma_kernel(), _alpha_scale_func(nullptr), _add_bias(), _activation_func(), _tmp_a(), _tmp_b(), _tmp_d(), _original_b(nullptr), _run_vector_matrix_multiplication(false), _run_alpha_scale(false), @@ -88,11 +95,13 @@ void NEGEMM::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITe _memory_group.manage(&_tmp_d); } + _mm_kernel = arm_compute::support::cpp14::make_unique(); + // Select between GEMV and GEMM if(_run_vector_matrix_multiplication) { // Configure the matrix multiply kernel - _mm_kernel.configure(a, b, gemm_output_to_use, alpha, false); + _mm_kernel->configure(a, b, gemm_output_to_use, alpha, false); } else { @@ -124,13 +133,15 @@ void NEGEMM::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITe int k = a->info()->dimension(0); // Configure interleave kernel - _interleave_kernel.configure(a, &_tmp_a); + _interleave_kernel = arm_compute::support::cpp14::make_unique(); + _interleave_kernel->configure(a, &_tmp_a); // Configure transpose kernel - _transpose_kernel.configure(b, &_tmp_b); + _transpose_kernel = arm_compute::support::cpp14::make_unique(); + _transpose_kernel->configure(b, &_tmp_b); // Configure matrix multiplication kernel - _mm_kernel.configure(&_tmp_a, &_tmp_b, gemm_output_to_use, alpha, true, GEMMReshapeInfo(m, n, k)); + _mm_kernel->configure(&_tmp_a, &_tmp_b, gemm_output_to_use, alpha, true, GEMMReshapeInfo(m, n, k)); // Allocate once the all configure methods have been called _tmp_a.allocator()->allocate(); @@ -150,7 +161,8 @@ void NEGEMM::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITe // Configure matrix addition kernel if(_run_addition) { - _ma_kernel.configure(c, d, beta); + _ma_kernel = arm_compute::support::cpp14::make_unique(); + _ma_kernel->configure(c, d, beta); } // Configure activation @@ -298,16 +310,16 @@ void NEGEMM::run() if(!_run_vector_matrix_multiplication) { // Run interleave kernel - NEScheduler::get().schedule(&_interleave_kernel, Window::DimY); + NEScheduler::get().schedule(_interleave_kernel.get(), Window::DimY); if(!_reshape_b_only_on_first_run) { // Run transpose kernel - NEScheduler::get().schedule(&_transpose_kernel, Window::DimY); + NEScheduler::get().schedule(_transpose_kernel.get(), Window::DimY); } } - NEScheduler::get().schedule(&_mm_kernel, _run_vector_matrix_multiplication ? Window::DimX : Window::DimY); + NEScheduler::get().schedule(_mm_kernel.get(), _run_vector_matrix_multiplication ? Window::DimX : Window::DimY); // Run bias addition kernel if(_run_bias_addition) @@ -319,7 +331,7 @@ void NEGEMM::run() // Run matrix addition kernel if(_run_addition) { - NEScheduler::get().schedule(&_ma_kernel, Window::DimY); + NEScheduler::get().schedule(_ma_kernel.get(), Window::DimY); } // Run activation function @@ -355,7 +367,7 @@ void NEGEMM::prepare() } _tmp_b.allocator()->allocate(); - NEScheduler::get().schedule(&_transpose_kernel, Window::DimY); + NEScheduler::get().schedule(_transpose_kernel.get(), Window::DimY); if(!original_b_managed_by_weights_manager) { _original_b->mark_as_unused(); diff --git a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp index 834a66a867..3f50f81af2 100644 --- a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp @@ -30,6 +30,21 @@ #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NECol2ImKernel.h" +#include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h" +#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h" +#include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h" +#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" +#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" +#include "src/core/NEON/kernels/NEIm2ColKernel.h" +#include "src/core/NEON/kernels/NEWeightsReshapeKernel.h" +#include "support/MemorySupport.h" + #include #include @@ -37,6 +52,7 @@ namespace arm_compute { using namespace arm_compute::misc::shape_calculator; +NEConvolutionLayerReshapeWeights::~NEConvolutionLayerReshapeWeights() = default; NEConvolutionLayerReshapeWeights::NEConvolutionLayerReshapeWeights() : _weights_reshape_kernel() { @@ -52,7 +68,8 @@ void NEConvolutionLayerReshapeWeights::configure(const ITensor *weights, const I const bool append_biases = (biases != nullptr) && !is_data_type_quantized_asymmetric(weights->info()->data_type()); const ITensor *biases_to_use = (append_biases) ? biases : nullptr; - _weights_reshape_kernel.configure(weights, biases_to_use, output); + _weights_reshape_kernel = arm_compute::support::cpp14::make_unique(); + _weights_reshape_kernel->configure(weights, biases_to_use, output); output->info()->set_quantization_info(weights->info()->quantization_info()); } @@ -86,9 +103,11 @@ Status NEConvolutionLayerReshapeWeights::validate(const ITensorInfo *weights, co void NEConvolutionLayerReshapeWeights::run() { - NEScheduler::get().schedule(&_weights_reshape_kernel, 3); + NEScheduler::get().schedule(_weights_reshape_kernel.get(), 3); } +NEGEMMConvolutionLayer::~NEGEMMConvolutionLayer() = default; + NEGEMMConvolutionLayer::NEGEMMConvolutionLayer(const std::shared_ptr &memory_manager, IWeightsManager *weights_manager) : _memory_group(memory_manager), _weights_manager(weights_manager), _reshape_weights(), _reshape_weights_managed(), _im2col_kernel(), _mm_gemm(memory_manager), _mm_gemmlowp(memory_manager), _col2im_kernel(), _reshape_layer(), _original_weights(nullptr), _im2col_output(), _weights_reshaped(), _gemm_output(), _tmp_output(), _data_layout(DataLayout::NCHW), _skip_im2col(false), @@ -323,7 +342,8 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig _memory_group.manage(&_im2col_output); // Configure - _im2col_kernel.configure(input, &_im2col_output, Size2D(kernel_width, kernel_height), conv_info, false, dilation); + _im2col_kernel = arm_compute::support::cpp14::make_unique(); + _im2col_kernel->configure(input, &_im2col_output, Size2D(kernel_width, kernel_height), conv_info, false, dilation); // Update GEMM input gemm_input_to_use = &_im2col_output; @@ -365,7 +385,8 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig if(_data_layout == DataLayout::NCHW) { // Configure col2im - _col2im_kernel.configure(gemm_output_to_use, output, Size2D(conv_w, conv_h)); + _col2im_kernel = arm_compute::support::cpp14::make_unique(); + _col2im_kernel->configure(gemm_output_to_use, output, Size2D(conv_w, conv_h)); } else { @@ -538,7 +559,7 @@ void NEGEMMConvolutionLayer::run() { // Run input reshaping unsigned int y_dim = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::HEIGHT); - NEScheduler::get().schedule(&_im2col_kernel, y_dim); + NEScheduler::get().schedule(_im2col_kernel.get(), y_dim); } // Runs NEGEMM or NEGEMMLowpMatrixMultiplyCore functions @@ -558,7 +579,7 @@ void NEGEMMConvolutionLayer::run() { if(_data_layout == DataLayout::NCHW) { - NEScheduler::get().schedule(&_col2im_kernel, Window::DimY); + NEScheduler::get().schedule(_col2im_kernel.get(), Window::DimY); } else { diff --git a/src/runtime/NEON/functions/NEGEMMInterleave4x4.cpp b/src/runtime/NEON/functions/NEGEMMInterleave4x4.cpp index ad306c3662..70fdcf492d 100644 --- a/src/runtime/NEON/functions/NEGEMMInterleave4x4.cpp +++ b/src/runtime/NEON/functions/NEGEMMInterleave4x4.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h" -#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" +#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.cpp b/src/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.cpp index 6d52f2b15c..09637dd2d6 100644 --- a/src/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.cpp +++ b/src/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.cpp @@ -26,17 +26,19 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" #include "arm_compute/runtime/TensorAllocator.h" +#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" #include "support/MemorySupport.h" -using namespace arm_compute; +namespace arm_compute +{ +NEGEMMLowpAssemblyMatrixMultiplyCore::~NEGEMMLowpAssemblyMatrixMultiplyCore() = default; NEGEMMLowpAssemblyMatrixMultiplyCore::NEGEMMLowpAssemblyMatrixMultiplyCore(std::shared_ptr memory_manager) : _memory_group(memory_manager), _asm_glue(memory_manager), _mm_kernel(nullptr), _mtx_a_reshape_kernel(nullptr), _mtx_b_reshape_kernel(nullptr), _tmp_a(), _tmp_b() @@ -137,3 +139,4 @@ void NEGEMMLowpAssemblyMatrixMultiplyCore::run() NEScheduler::get().schedule(_mm_kernel.get(), Window::DimY); } } +} // namespace arm_compute \ No newline at end of file diff --git a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp index 36357dde41..9050427b34 100644 --- a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp +++ b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp @@ -34,12 +34,23 @@ #include "arm_compute/runtime/NEON/NEScheduler.h" #include "arm_compute/runtime/TensorAllocator.h" #include "src/core/helpers/AutoConfiguration.h" + +#include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h" +#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h" +#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" + #include "support/MemorySupport.h" namespace arm_compute { using namespace arm_compute::misc::shape_calculator; +NEGEMMLowpMatrixMultiplyCore::~NEGEMMLowpMatrixMultiplyCore() = default; + NEGEMMLowpMatrixMultiplyCore::NEGEMMLowpMatrixMultiplyCore(std::shared_ptr memory_manager, IWeightsManager *weights_manager) : _memory_group(memory_manager), _weights_manager(weights_manager), _asm_glue(memory_manager, weights_manager), _mm_kernel(), _mtx_a_reshape_kernel(), _mtx_b_reshape_kernel(), _mtx_a_reduction_kernel(), _mtx_b_reduction_kernel(), _offset_contribution_kernel(), _offset_contribution_output_stage_kernel(), _activation_func(), _convert_to_signed_asymm(), @@ -80,7 +91,8 @@ void NEGEMMLowpMatrixMultiplyCore::configure(const ITensor *a, const ITensor *b, _signed_a.allocator()->init(a_to_use->info()->clone()->set_data_type(dt).set_quantization_info(QuantizationInfo(iqinfo.scale, iqinfo.offset + offset_correction))); _memory_group.manage(&_signed_a); - _convert_to_signed_asymm.configure(a_to_use, &_signed_a); + _convert_to_signed_asymm = arm_compute::support::cpp14::make_unique(); + _convert_to_signed_asymm->configure(a_to_use, &_signed_a); a_to_use = &_signed_a; _a_offset = _signed_a.info()->quantization_info().uniform().offset; @@ -153,10 +165,12 @@ void NEGEMMLowpMatrixMultiplyCore::configure(const ITensor *a, const ITensor *b, } // Configure interleave kernel - _mtx_a_reshape_kernel.configure(a_to_use, &_tmp_a); + _mtx_a_reshape_kernel = arm_compute::support::cpp14::make_unique(); + _mtx_a_reshape_kernel->configure(a_to_use, &_tmp_a); // Configure transpose kernel - _mtx_b_reshape_kernel.configure(b, &_tmp_b); + _mtx_b_reshape_kernel = arm_compute::support::cpp14::make_unique(); + _mtx_b_reshape_kernel->configure(b, &_tmp_b); } if(!_fused_assembly_path) @@ -176,7 +190,8 @@ void NEGEMMLowpMatrixMultiplyCore::configure(const ITensor *a, const ITensor *b, } // Configure Matrix B reduction kernel - _mtx_b_reduction_kernel.configure(b, &_vector_sum_col, reduction_info); + _mtx_b_reduction_kernel = arm_compute::support::cpp14::make_unique(); + _mtx_b_reduction_kernel->configure(b, &_vector_sum_col, reduction_info); } // Initialize Matrix A reduction kernel only if _b_offset is not equal to 0 @@ -188,7 +203,8 @@ void NEGEMMLowpMatrixMultiplyCore::configure(const ITensor *a, const ITensor *b, _memory_group.manage(&_vector_sum_row); // Configure matrix A reduction kernel - _mtx_a_reduction_kernel.configure(a_to_use, &_vector_sum_row, reduction_info); + _mtx_a_reduction_kernel = arm_compute::support::cpp14::make_unique(); + _mtx_a_reduction_kernel->configure(a_to_use, &_vector_sum_row, reduction_info); } if(_fuse_output_stage) @@ -196,19 +212,22 @@ void NEGEMMLowpMatrixMultiplyCore::configure(const ITensor *a, const ITensor *b, // Configure matrix multiply kernel if(!_assembly_path) { - _mm_kernel.configure(matrix_a, matrix_b, &_mm_result_s32); + _mm_kernel = arm_compute::support::cpp14::make_unique(); + _mm_kernel->configure(matrix_a, matrix_b, &_mm_result_s32); } - _offset_contribution_output_stage_kernel.configure(&_mm_result_s32, - _a_offset == 0 ? nullptr : &_vector_sum_col, - _b_offset == 0 ? nullptr : &_vector_sum_row, c, - _flip_signedness ? &_signed_output : output, - a->info()->dimension(0), - _a_offset, _b_offset, info.gemmlowp_output_stage()); + _offset_contribution_output_stage_kernel = arm_compute::support::cpp14::make_unique(); + _offset_contribution_output_stage_kernel->configure(&_mm_result_s32, + _a_offset == 0 ? nullptr : &_vector_sum_col, + _b_offset == 0 ? nullptr : &_vector_sum_row, c, + _flip_signedness ? &_signed_output : output, + a->info()->dimension(0), + _a_offset, _b_offset, info.gemmlowp_output_stage()); if(_flip_signedness) { - _convert_from_signed_asymm.configure(&_signed_output, output); + _convert_from_signed_asymm = arm_compute::support::cpp14::make_unique(); + _convert_from_signed_asymm->configure(&_signed_output, output); } } else @@ -216,10 +235,12 @@ void NEGEMMLowpMatrixMultiplyCore::configure(const ITensor *a, const ITensor *b, // Configure matrix multiply kernel if(!_assembly_path) { - _mm_kernel.configure(matrix_a, matrix_b, output); + _mm_kernel = arm_compute::support::cpp14::make_unique(); + _mm_kernel->configure(matrix_a, matrix_b, output); } // Configure offset contribution kernel - _offset_contribution_kernel.configure(output, _a_offset == 0 ? nullptr : &_vector_sum_col, _b_offset == 0 ? nullptr : &_vector_sum_row, a_to_use->info()->dimension(0), _a_offset, _b_offset); + _offset_contribution_kernel = arm_compute::support::cpp14::make_unique(); + _offset_contribution_kernel->configure(output, _a_offset == 0 ? nullptr : &_vector_sum_col, _b_offset == 0 ? nullptr : &_vector_sum_row, a_to_use->info()->dimension(0), _a_offset, _b_offset); } // Configure activation @@ -468,7 +489,7 @@ void NEGEMMLowpMatrixMultiplyCore::run() // Convert QASYMM8->QASYMM8_SIGNED if(_flip_signedness) { - NEScheduler::get().schedule(&_convert_to_signed_asymm, Window::DimY); + NEScheduler::get().schedule(_convert_to_signed_asymm.get(), Window::DimY); } // Run GEMM @@ -481,15 +502,15 @@ void NEGEMMLowpMatrixMultiplyCore::run() if(!_run_vector_matrix_multiplication) { // Run interleave kernel - NEScheduler::get().schedule(&_mtx_a_reshape_kernel, Window::DimY); + NEScheduler::get().schedule(_mtx_a_reshape_kernel.get(), Window::DimY); if(!_reshape_b_only_on_first_run) { // Run transpose kernel - NEScheduler::get().schedule(&_mtx_b_reshape_kernel, Window::DimY); + NEScheduler::get().schedule(_mtx_b_reshape_kernel.get(), Window::DimY); } } - NEScheduler::get().schedule(&_mm_kernel, Window::DimY); + NEScheduler::get().schedule(_mm_kernel.get(), Window::DimY); } if(!_fused_assembly_path) @@ -497,31 +518,31 @@ void NEGEMMLowpMatrixMultiplyCore::run() // Run matrix A reduction kernel only if _b_offset is not equal to 0 if(_b_offset != 0) { - NEScheduler::get().schedule(&_mtx_a_reduction_kernel, Window::DimX); + NEScheduler::get().schedule(_mtx_a_reduction_kernel.get(), Window::DimX); } // Run matrix B reduction kernel only if _a_offset is not equal to 0 if(_a_offset != 0 && !_reshape_b_only_on_first_run) { - NEScheduler::get().schedule(&_mtx_b_reduction_kernel, Window::DimX); + NEScheduler::get().schedule(_mtx_b_reduction_kernel.get(), Window::DimX); } if(_fuse_output_stage) { // Run offset contribution kernel - NEScheduler::get().schedule(&_offset_contribution_output_stage_kernel, Window::DimY); + NEScheduler::get().schedule(_offset_contribution_output_stage_kernel.get(), Window::DimY); } else { // Run offset contribution kernel - NEScheduler::get().schedule(&_offset_contribution_kernel, Window::DimY); + NEScheduler::get().schedule(_offset_contribution_kernel.get(), Window::DimY); } } // Convert QASYMM8_SIGNED->QASYMM8 - if(_flip_signedness) + if(!_fused_assembly_path && _fuse_output_stage && _flip_signedness) { - NEScheduler::get().schedule(&_convert_from_signed_asymm, Window::DimY); + NEScheduler::get().schedule(_convert_from_signed_asymm.get(), Window::DimY); } // Run fused activation unless already run in the fused assembly @@ -560,7 +581,7 @@ void NEGEMMLowpMatrixMultiplyCore::prepare() // Run reshape kernel and mark original weights tensor as unused _tmp_b.allocator()->allocate(); - NEScheduler::get().schedule(&_mtx_b_reshape_kernel, Window::DimY); + NEScheduler::get().schedule(_mtx_b_reshape_kernel.get(), Window::DimY); if(!original_b_managed_by_weights_manager) { _original_b->mark_as_unused(); @@ -571,7 +592,7 @@ void NEGEMMLowpMatrixMultiplyCore::prepare() if(!_fused_assembly_path && _a_offset != 0 && _reshape_b_only_on_first_run) { _vector_sum_col.allocator()->allocate(); - NEScheduler::get().schedule(&_mtx_b_reduction_kernel, Window::DimX); + NEScheduler::get().schedule(_mtx_b_reduction_kernel.get(), Window::DimX); } _is_prepared = true; diff --git a/src/runtime/NEON/functions/NEGEMMLowpOutputStage.cpp b/src/runtime/NEON/functions/NEGEMMLowpOutputStage.cpp index 239a8e668a..9fb8851d7a 100644 --- a/src/runtime/NEON/functions/NEGEMMLowpOutputStage.cpp +++ b/src/runtime/NEON/functions/NEGEMMLowpOutputStage.cpp @@ -24,15 +24,17 @@ #include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h" #include "arm_compute/core/Validate.h" +#include "src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h" #include "support/MemorySupport.h" namespace arm_compute { +NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint::~NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint() = default; + void NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint::configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, int min, int max) { @@ -46,6 +48,8 @@ Status NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint::validate(const ITens return NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::validate(input, bias, output, min, max); } +NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint::~NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint() = default; + void NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint::configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, int min, int max) { @@ -59,6 +63,8 @@ Status NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint::validate(const ITenso return NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel::validate(input, bias, output, min, max); } +NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint::~NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint() = default; + void NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint::configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int min, int max) { auto k = arm_compute::support::cpp14::make_unique(); @@ -71,6 +77,8 @@ Status NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint::validate(const ITens return NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel::validate(input, bias, output, min, max); } +NEGEMMLowpOutputStage::~NEGEMMLowpOutputStage() = default; + void NEGEMMLowpOutputStage::configure(const ITensor *input, const ITensor *bias, ITensor *output, const GEMMLowpOutputStageInfo &info) { // Perform validate step diff --git a/src/runtime/NEON/functions/NEGEMMTranspose1xW.cpp b/src/runtime/NEON/functions/NEGEMMTranspose1xW.cpp index e807e86299..90cf0bab07 100644 --- a/src/runtime/NEON/functions/NEGEMMTranspose1xW.cpp +++ b/src/runtime/NEON/functions/NEGEMMTranspose1xW.cpp @@ -25,9 +25,9 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/NEON/functions/NEGather.cpp b/src/runtime/NEON/functions/NEGather.cpp index 5238936015..5c0dae1507 100644 --- a/src/runtime/NEON/functions/NEGather.cpp +++ b/src/runtime/NEON/functions/NEGather.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/NEON/functions/NEGather.h" -#include "arm_compute/core/NEON/kernels/NEGatherKernel.h" +#include "src/core/NEON/kernels/NEGatherKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/NEON/functions/NEGaussian3x3.cpp b/src/runtime/NEON/functions/NEGaussian3x3.cpp index fba49ede2a..5290de1348 100644 --- a/src/runtime/NEON/functions/NEGaussian3x3.cpp +++ b/src/runtime/NEON/functions/NEGaussian3x3.cpp @@ -23,18 +23,23 @@ */ #include "arm_compute/runtime/NEON/functions/NEGaussian3x3.h" -#include "arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h" #include "arm_compute/core/PixelValue.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NEGaussian3x3Kernel.h" #include "support/MemorySupport.h" #include -using namespace arm_compute; - +namespace arm_compute +{ void NEGaussian3x3::configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value) { auto k = arm_compute::support::cpp14::make_unique(); k->configure(input, output, border_mode == BorderMode::UNDEFINED); _kernel = std::move(k); - _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + + auto b = arm_compute::support::cpp14::make_unique(); + b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + _border_handler = std::move(b); } +} // namespace arm_compute \ No newline at end of file diff --git a/src/runtime/NEON/functions/NEGaussian5x5.cpp b/src/runtime/NEON/functions/NEGaussian5x5.cpp index 99591f4107..7857710462 100644 --- a/src/runtime/NEON/functions/NEGaussian5x5.cpp +++ b/src/runtime/NEON/functions/NEGaussian5x5.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,13 +24,17 @@ #include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h" #include "arm_compute/core/PixelValue.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/runtime/NEON/NEScheduler.h" #include "arm_compute/runtime/TensorAllocator.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NEGaussian5x5Kernel.h" +#include "support/MemorySupport.h" -using namespace arm_compute; +namespace arm_compute +{ +NEGaussian5x5::~NEGaussian5x5() = default; NEGaussian5x5::NEGaussian5x5(std::shared_ptr memory_manager) : _memory_group(std::move(memory_manager)), _kernel_hor(), _kernel_vert(), _tmp(), _border_handler() @@ -46,21 +50,26 @@ void NEGaussian5x5::configure(ITensor *input, ITensor *output, BorderMode border // Manage intermediate buffers _memory_group.manage(&_tmp); + _kernel_hor = arm_compute::support::cpp14::make_unique(); + _kernel_vert = arm_compute::support::cpp14::make_unique(); + _border_handler = arm_compute::support::cpp14::make_unique(); + // Create and configure kernels for the two passes - _kernel_hor.configure(input, &_tmp, border_mode == BorderMode::UNDEFINED); - _kernel_vert.configure(&_tmp, output, border_mode == BorderMode::UNDEFINED); + _kernel_hor->configure(input, &_tmp, border_mode == BorderMode::UNDEFINED); + _kernel_vert->configure(&_tmp, output, border_mode == BorderMode::UNDEFINED); _tmp.allocator()->allocate(); - _border_handler.configure(input, _kernel_hor.border_size(), border_mode, PixelValue(constant_border_value)); + _border_handler->configure(input, _kernel_hor->border_size(), border_mode, PixelValue(constant_border_value)); } void NEGaussian5x5::run() { - NEScheduler::get().schedule(&_border_handler, Window::DimZ); + NEScheduler::get().schedule(_border_handler.get(), Window::DimZ); MemoryGroupResourceScope scope_mg(_memory_group); - NEScheduler::get().schedule(&_kernel_hor, Window::DimY); - NEScheduler::get().schedule(&_kernel_vert, Window::DimY); + NEScheduler::get().schedule(_kernel_hor.get(), Window::DimY); + NEScheduler::get().schedule(_kernel_vert.get(), Window::DimY); } +} // namespace arm_compute \ No newline at end of file diff --git a/src/runtime/NEON/functions/NEGaussianPyramid.cpp b/src/runtime/NEON/functions/NEGaussianPyramid.cpp index e4e20e041b..30fe70f0ab 100644 --- a/src/runtime/NEON/functions/NEGaussianPyramid.cpp +++ b/src/runtime/NEON/functions/NEGaussianPyramid.cpp @@ -25,16 +25,18 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h" -#include "arm_compute/core/NEON/kernels/NEScaleKernel.h" #include "arm_compute/core/PixelValue.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" -#include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h" #include "arm_compute/runtime/Pyramid.h" #include "arm_compute/runtime/Tensor.h" #include "arm_compute/runtime/TensorAllocator.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NEGaussian5x5Kernel.h" +#include "src/core/NEON/kernels/NEGaussianPyramidKernel.h" +#include "src/core/NEON/kernels/NEScaleKernel.h" +#include "support/MemorySupport.h" #include @@ -45,6 +47,8 @@ NEGaussianPyramid::NEGaussianPyramid() { } +NEGaussianPyramidHalf::~NEGaussianPyramidHalf() = default; + NEGaussianPyramidHalf::NEGaussianPyramidHalf() // NOLINT : _horizontal_border_handler(), _vertical_border_handler(), @@ -94,16 +98,20 @@ void NEGaussianPyramidHalf::configure(const ITensor *input, IPyramid *pyramid, B for(size_t i = 0; i < num_stages; ++i) { /* Configure horizontal kernel */ - _horizontal_reduction[i].configure(_pyramid->get_pyramid_level(i), _tmp.get_pyramid_level(i)); + _horizontal_reduction[i] = arm_compute::support::cpp14::make_unique(); + _horizontal_reduction[i]->configure(_pyramid->get_pyramid_level(i), _tmp.get_pyramid_level(i)); /* Configure vertical kernel */ - _vertical_reduction[i].configure(_tmp.get_pyramid_level(i), _pyramid->get_pyramid_level(i + 1)); + _vertical_reduction[i] = arm_compute::support::cpp14::make_unique(); + _vertical_reduction[i]->configure(_tmp.get_pyramid_level(i), _pyramid->get_pyramid_level(i + 1)); /* Configure border */ - _horizontal_border_handler[i].configure(_pyramid->get_pyramid_level(i), _horizontal_reduction[i].border_size(), border_mode, PixelValue(constant_border_value)); + _horizontal_border_handler[i] = arm_compute::support::cpp14::make_unique(); + _horizontal_border_handler[i]->configure(_pyramid->get_pyramid_level(i), _horizontal_reduction[i]->border_size(), border_mode, PixelValue(constant_border_value)); /* Configure border */ - _vertical_border_handler[i].configure(_tmp.get_pyramid_level(i), _vertical_reduction[i].border_size(), border_mode, PixelValue(pixel_value_u16)); + _vertical_border_handler[i] = arm_compute::support::cpp14::make_unique(); + _vertical_border_handler[i]->configure(_tmp.get_pyramid_level(i), _vertical_reduction[i]->border_size(), border_mode, PixelValue(pixel_value_u16)); } _tmp.allocate(); @@ -122,13 +130,15 @@ void NEGaussianPyramidHalf::run() for(unsigned int i = 0; i < num_levels - 1; ++i) { - NEScheduler::get().schedule(&_horizontal_border_handler[i], Window::DimZ); - NEScheduler::get().schedule(&_horizontal_reduction[i], Window::DimY); - NEScheduler::get().schedule(&_vertical_border_handler[i], Window::DimZ); - NEScheduler::get().schedule(&_vertical_reduction[i], Window::DimY); + NEScheduler::get().schedule(_horizontal_border_handler[i].get(), Window::DimZ); + NEScheduler::get().schedule(_horizontal_reduction[i].get(), Window::DimY); + NEScheduler::get().schedule(_vertical_border_handler[i].get(), Window::DimZ); + NEScheduler::get().schedule(_vertical_reduction[i].get(), Window::DimY); } } +NEGaussianPyramidOrb::~NEGaussianPyramidOrb() = default; + NEGaussianPyramidOrb::NEGaussianPyramidOrb() // NOLINT : _gaus5x5(), _scale_nearest() diff --git a/src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp b/src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp index 13210a06cd..d9a498e4bd 100644 --- a/src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp +++ b/src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp @@ -25,19 +25,22 @@ #include "arm_compute/core/Types.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NECopyKernel.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NEPadLayerKernel.h" #include "src/core/helpers/AutoConfiguration.h" namespace arm_compute { NEGenerateProposalsLayer::NEGenerateProposalsLayer(std::shared_ptr memory_manager) : _memory_group(memory_manager), - _permute_deltas_kernel(), + _permute_deltas(), _flatten_deltas(), - _permute_scores_kernel(), + _permute_scores(), _flatten_scores(), - _compute_anchors_kernel(), - _bounding_box_kernel(), - _pad_kernel(), + _compute_anchors(), + _bounding_box(), + _pad(), _dequantize_anchors(), _dequantize_deltas(), _quantize_all_proposals(), @@ -62,6 +65,8 @@ NEGenerateProposalsLayer::NEGenerateProposalsLayer(std::shared_ptrinit(TensorInfo(flatten_shape_deltas, 1, scores_data_type, deltas->info()->quantization_info())); @@ -95,7 +100,7 @@ void NEGenerateProposalsLayer::configure(const ITensor *scores, const ITensor *d if(!_is_nhwc) { _memory_group.manage(&_deltas_permuted); - _permute_deltas_kernel.configure(deltas, &_deltas_permuted, PermutationVector{ 2, 0, 1 }); + _permute_deltas.configure(deltas, &_deltas_permuted, PermutationVector{ 2, 0, 1 }); _flatten_deltas.configure(&_deltas_permuted, &_deltas_flattened); _deltas_permuted.allocator()->allocate(); } @@ -112,7 +117,7 @@ void NEGenerateProposalsLayer::configure(const ITensor *scores, const ITensor *d if(!_is_nhwc) { _memory_group.manage(&_scores_permuted); - _permute_scores_kernel.configure(scores, &_scores_permuted, PermutationVector{ 2, 0, 1 }); + _permute_scores.configure(scores, &_scores_permuted, PermutationVector{ 2, 0, 1 }); _flatten_scores.configure(&_scores_permuted, &_scores_flattened); _scores_permuted.allocator()->allocate(); } @@ -141,7 +146,7 @@ void NEGenerateProposalsLayer::configure(const ITensor *scores, const ITensor *d // Bounding box transform _memory_group.manage(&_all_proposals); BoundingBoxTransformInfo bbox_info(info.im_width(), info.im_height(), 1.f); - _bounding_box_kernel.configure(anchors_to_use, &_all_proposals, deltas_to_use, bbox_info); + _bounding_box.configure(anchors_to_use, &_all_proposals, deltas_to_use, bbox_info); deltas_to_use->allocator()->allocate(); anchors_to_use->allocator()->allocate(); @@ -197,7 +202,7 @@ void NEGenerateProposalsLayer::configure(const ITensor *scores, const ITensor *d _scores_flattened.allocator()->allocate(); // Add the first column that represents the batch id. This will be all zeros, as we don't support multiple images - _pad_kernel.configure(&_proposals_4_roi_values, proposals, PaddingList{ { 1, 0 } }); + _pad.configure(&_proposals_4_roi_values, proposals, PaddingList{ { 1, 0 } }); _proposals_4_roi_values.allocator()->allocate(); } @@ -229,7 +234,7 @@ Status NEGenerateProposalsLayer::validate(const ITensorInfo *scores, const ITens } TensorInfo all_anchors_info(anchors->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true)); - ARM_COMPUTE_RETURN_ON_ERROR(NEComputeAllAnchorsKernel::validate(anchors, &all_anchors_info, ComputeAnchorsInfo(feat_width, feat_height, info.spatial_scale()))); + ARM_COMPUTE_RETURN_ON_ERROR(NEComputeAllAnchors::validate(anchors, &all_anchors_info, ComputeAnchorsInfo(feat_width, feat_height, info.spatial_scale()))); TensorInfo deltas_permuted_info = deltas->clone()->set_tensor_shape(TensorShape(values_per_roi * num_anchors, feat_width, feat_height)).set_is_resizable(true); TensorInfo scores_permuted_info = scores->clone()->set_tensor_shape(TensorShape(num_anchors, feat_width, feat_height)).set_is_resizable(true); @@ -240,8 +245,8 @@ Status NEGenerateProposalsLayer::validate(const ITensorInfo *scores, const ITens } else { - ARM_COMPUTE_RETURN_ON_ERROR(NEPermuteKernel::validate(deltas, &deltas_permuted_info, PermutationVector{ 2, 0, 1 })); - ARM_COMPUTE_RETURN_ON_ERROR(NEPermuteKernel::validate(scores, &scores_permuted_info, PermutationVector{ 2, 0, 1 })); + ARM_COMPUTE_RETURN_ON_ERROR(NEPermute::validate(deltas, &deltas_permuted_info, PermutationVector{ 2, 0, 1 })); + ARM_COMPUTE_RETURN_ON_ERROR(NEPermute::validate(scores, &scores_permuted_info, PermutationVector{ 2, 0, 1 })); } TensorInfo deltas_flattened_info(deltas->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true)); @@ -258,25 +263,25 @@ Status NEGenerateProposalsLayer::validate(const ITensorInfo *scores, const ITens if(is_qasymm8) { TensorInfo all_anchors_f32_info(anchors->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true).set_data_type(DataType::F32)); - ARM_COMPUTE_RETURN_ON_ERROR(NEDequantizationLayerKernel::validate(&all_anchors_info, &all_anchors_f32_info)); + ARM_COMPUTE_RETURN_ON_ERROR(NEDequantizationLayer::validate(&all_anchors_info, &all_anchors_f32_info)); TensorInfo deltas_flattened_f32_info(deltas->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true).set_data_type(DataType::F32)); - ARM_COMPUTE_RETURN_ON_ERROR(NEDequantizationLayerKernel::validate(&deltas_flattened_info, &deltas_flattened_f32_info)); + ARM_COMPUTE_RETURN_ON_ERROR(NEDequantizationLayer::validate(&deltas_flattened_info, &deltas_flattened_f32_info)); TensorInfo proposals_4_roi_values_f32(deltas->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true).set_data_type(DataType::F32)); - ARM_COMPUTE_RETURN_ON_ERROR(NEBoundingBoxTransformKernel::validate(&all_anchors_f32_info, &proposals_4_roi_values_f32, &deltas_flattened_f32_info, - BoundingBoxTransformInfo(info.im_width(), info.im_height(), 1.f))); + ARM_COMPUTE_RETURN_ON_ERROR(NEBoundingBoxTransform::validate(&all_anchors_f32_info, &proposals_4_roi_values_f32, &deltas_flattened_f32_info, + BoundingBoxTransformInfo(info.im_width(), info.im_height(), 1.f))); - ARM_COMPUTE_RETURN_ON_ERROR(NEQuantizationLayerKernel::validate(&proposals_4_roi_values_f32, &proposals_4_roi_values_quantized)); + ARM_COMPUTE_RETURN_ON_ERROR(NEQuantizationLayer::validate(&proposals_4_roi_values_f32, &proposals_4_roi_values_quantized)); proposals_4_roi_values_to_use = &proposals_4_roi_values_quantized; } else { - ARM_COMPUTE_RETURN_ON_ERROR(NEBoundingBoxTransformKernel::validate(&all_anchors_info, &proposals_4_roi_values, &deltas_flattened_info, - BoundingBoxTransformInfo(info.im_width(), info.im_height(), 1.f))); + ARM_COMPUTE_RETURN_ON_ERROR(NEBoundingBoxTransform::validate(&all_anchors_info, &proposals_4_roi_values, &deltas_flattened_info, + BoundingBoxTransformInfo(info.im_width(), info.im_height(), 1.f))); } - ARM_COMPUTE_RETURN_ON_ERROR(NEPadLayerKernel::validate(proposals_4_roi_values_to_use, proposals, PaddingList{ { 1, 0 } })); + ARM_COMPUTE_RETURN_ON_ERROR(NEPadLayer::validate(proposals_4_roi_values_to_use, proposals, PaddingList{ { 1, 0 } })); if(num_valid_proposals->total_size() > 0) { @@ -319,13 +324,13 @@ void NEGenerateProposalsLayer::run() MemoryGroupResourceScope scope_mg(_memory_group); // Compute all the anchors - NEScheduler::get().schedule(&_compute_anchors_kernel, Window::DimY); + _compute_anchors.run(); // Transpose and reshape the inputs if(!_is_nhwc) { - NEScheduler::get().schedule(&_permute_deltas_kernel, Window::DimY); - NEScheduler::get().schedule(&_permute_scores_kernel, Window::DimY); + _permute_deltas.run(); + _permute_scores.run(); } _flatten_deltas.run(); @@ -333,22 +338,22 @@ void NEGenerateProposalsLayer::run() if(_is_qasymm8) { - NEScheduler::get().schedule(&_dequantize_anchors, Window::DimY); - NEScheduler::get().schedule(&_dequantize_deltas, Window::DimY); + _dequantize_anchors.run(); + _dequantize_deltas.run(); } // Build the boxes - NEScheduler::get().schedule(&_bounding_box_kernel, Window::DimY); + _bounding_box.run(); if(_is_qasymm8) { - NEScheduler::get().schedule(&_quantize_all_proposals, Window::DimY); + _quantize_all_proposals.run(); } // Non maxima suppression _cpp_nms.run(); // Add dummy batch indexes - NEScheduler::get().schedule(&_pad_kernel, Window::DimY); + _pad.run(); } } // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEHOGDescriptor.cpp b/src/runtime/NEON/functions/NEHOGDescriptor.cpp index 10765f9b86..689e64fae7 100644 --- a/src/runtime/NEON/functions/NEHOGDescriptor.cpp +++ b/src/runtime/NEON/functions/NEHOGDescriptor.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -28,8 +28,14 @@ #include "arm_compute/core/Size2D.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEDerivativeKernel.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NEHOGDescriptorKernel.h" +#include "support/MemorySupport.h" -using namespace arm_compute; +namespace arm_compute +{ +NEHOGDescriptor::~NEHOGDescriptor() = default; NEHOGDescriptor::NEHOGDescriptor(std::shared_ptr memory_manager) : _memory_group(std::move(memory_manager)), _gradient(), _orient_bin(), _block_norm(), _mag(), _phase(), _hog_space() @@ -82,10 +88,12 @@ void NEHOGDescriptor::configure(ITensor *input, ITensor *output, const IHOG *hog _memory_group.manage(&_hog_space); // Initialise orientation binning kernel - _orient_bin.configure(&_mag, &_phase, &_hog_space, hog->info()); + _orient_bin = arm_compute::support::cpp14::make_unique(); + _orient_bin->configure(&_mag, &_phase, &_hog_space, hog->info()); // Initialize HOG norm kernel - _block_norm.configure(&_hog_space, output, hog->info()); + _block_norm = arm_compute::support::cpp14::make_unique(); + _block_norm->configure(&_hog_space, output, hog->info()); // Allocate intermediate tensors _mag.allocator()->allocate(); @@ -101,8 +109,9 @@ void NEHOGDescriptor::run() _gradient.run(); // Run orientation binning kernel - NEScheduler::get().schedule(&_orient_bin, Window::DimY); + NEScheduler::get().schedule(_orient_bin.get(), Window::DimY); // Run block normalization kernel - NEScheduler::get().schedule(&_block_norm, Window::DimY); + NEScheduler::get().schedule(_block_norm.get(), Window::DimY); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEHOGDetector.cpp b/src/runtime/NEON/functions/NEHOGDetector.cpp index 21db5f83b7..8468b75f4e 100644 --- a/src/runtime/NEON/functions/NEHOGDetector.cpp +++ b/src/runtime/NEON/functions/NEHOGDetector.cpp @@ -23,10 +23,12 @@ */ #include "arm_compute/runtime/NEON/functions/NEHOGDetector.h" -#include "arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h" +#include "src/core/NEON/kernels/NEHOGDetectorKernel.h" #include "support/MemorySupport.h" -using namespace arm_compute; +namespace arm_compute +{ +NEHOGDetector::~NEHOGDetector() = default; void NEHOGDetector::configure(const ITensor *input, const IHOG *hog, IDetectionWindowArray *detection_windows, const Size2D &detection_window_stride, float threshold, size_t idx_class) { @@ -34,3 +36,4 @@ void NEHOGDetector::configure(const ITensor *input, const IHOG *hog, IDetectionW k->configure(input, hog, detection_windows, detection_window_stride, threshold, idx_class); _kernel = std::move(k); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEHOGGradient.cpp b/src/runtime/NEON/functions/NEHOGGradient.cpp index 8f3559a7ed..7d794bc1a0 100644 --- a/src/runtime/NEON/functions/NEHOGGradient.cpp +++ b/src/runtime/NEON/functions/NEHOGGradient.cpp @@ -23,12 +23,16 @@ */ #include "arm_compute/runtime/NEON/functions/NEHOGGradient.h" -#include "arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEDerivativeKernel.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NEMagnitudePhaseKernel.h" #include "support/MemorySupport.h" -using namespace arm_compute; +namespace arm_compute +{ +NEHOGGradient::~NEHOGGradient() = default; NEHOGGradient::NEHOGGradient(std::shared_ptr memory_manager) // NOLINT : _memory_group(std::move(memory_manager)), @@ -88,3 +92,4 @@ void NEHOGGradient::run() // Run magnitude/phase kernel NEScheduler::get().schedule(_mag_phase.get(), Window::DimY); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEHOGMultiDetection.cpp b/src/runtime/NEON/functions/NEHOGMultiDetection.cpp index e08b699e1c..3e41faad43 100644 --- a/src/runtime/NEON/functions/NEHOGMultiDetection.cpp +++ b/src/runtime/NEON/functions/NEHOGMultiDetection.cpp @@ -28,8 +28,13 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" #include "arm_compute/runtime/Tensor.h" +#include "src/core/NEON/kernels/NEDerivativeKernel.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NEHOGDescriptorKernel.h" -using namespace arm_compute; +namespace arm_compute +{ +NEHOGMultiDetection::~NEHOGMultiDetection() = default; NEHOGMultiDetection::NEHOGMultiDetection(std::shared_ptr memory_manager) // NOLINT : _memory_group(std::move(memory_manager)), @@ -262,3 +267,4 @@ void NEHOGMultiDetection::run() NEScheduler::get().schedule(&_non_maxima_kernel, Window::DimY); } } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEHarrisCorners.cpp b/src/runtime/NEON/functions/NEHarrisCorners.cpp index 3c51eb2249..23fcf8c805 100644 --- a/src/runtime/NEON/functions/NEHarrisCorners.cpp +++ b/src/runtime/NEON/functions/NEHarrisCorners.cpp @@ -24,8 +24,6 @@ #include "arm_compute/runtime/NEON/functions/NEHarrisCorners.h" #include "arm_compute/core/Error.h" -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" -#include "arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/Array.h" @@ -34,12 +32,19 @@ #include "arm_compute/runtime/NEON/functions/NESobel5x5.h" #include "arm_compute/runtime/NEON/functions/NESobel7x7.h" #include "arm_compute/runtime/TensorAllocator.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NEHarrisCornersKernel.h" +#include "src/core/NEON/kernels/NESobel5x5Kernel.h" +#include "src/core/NEON/kernels/NESobel7x7Kernel.h" #include "support/MemorySupport.h" #include #include -using namespace arm_compute; +namespace arm_compute +{ +NEHarrisCorners::~NEHarrisCorners() = default; NEHarrisCorners::NEHarrisCorners(std::shared_ptr memory_manager) // NOLINT : _memory_group(std::move(memory_manager)), @@ -154,8 +159,10 @@ void NEHarrisCorners::configure(IImage *input, float threshold, float min_dist, } // Configure border filling before harris score - _border_gx.configure(&_gx, _harris_score->border_size(), border_mode, constant_border_value); - _border_gy.configure(&_gy, _harris_score->border_size(), border_mode, constant_border_value); + _border_gx = arm_compute::support::cpp14::make_unique(); + _border_gy = arm_compute::support::cpp14::make_unique(); + _border_gx->configure(&_gx, _harris_score->border_size(), border_mode, constant_border_value); + _border_gy->configure(&_gy, _harris_score->border_size(), border_mode, constant_border_value); // Allocate once all the configure methods have been called _gx.allocator()->allocate(); @@ -193,8 +200,8 @@ void NEHarrisCorners::run() _sobel->run(); // Fill border before harris score kernel - NEScheduler::get().schedule(&_border_gx, Window::DimZ); - NEScheduler::get().schedule(&_border_gy, Window::DimZ); + NEScheduler::get().schedule(_border_gx.get(), Window::DimZ); + NEScheduler::get().schedule(_border_gy.get(), Window::DimZ); // Run harris score kernel NEScheduler::get().schedule(_harris_score.get(), Window::DimY); @@ -208,3 +215,4 @@ void NEHarrisCorners::run() // Run sort & euclidean distance NEScheduler::get().schedule(&_sort_euclidean, Window::DimY); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEHistogram.cpp b/src/runtime/NEON/functions/NEHistogram.cpp index 39fad977af..40ea3a16c6 100644 --- a/src/runtime/NEON/functions/NEHistogram.cpp +++ b/src/runtime/NEON/functions/NEHistogram.cpp @@ -29,8 +29,12 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEHistogramKernel.h" +#include "support/MemorySupport.h" -using namespace arm_compute; +namespace arm_compute +{ +NEHistogram::~NEHistogram() = default; NEHistogram::NEHistogram() : _histogram_kernel(), _local_hist(), _window_lut(window_lut_default_size), _local_hist_size(0) @@ -47,11 +51,13 @@ void NEHistogram::configure(const IImage *input, IDistribution1D *output) _local_hist.resize(_local_hist_size); // Configure kernel - _histogram_kernel.configure(input, output, _local_hist.data(), _window_lut.data()); + _histogram_kernel = arm_compute::support::cpp14::make_unique(); + _histogram_kernel->configure(input, output, _local_hist.data(), _window_lut.data()); } void NEHistogram::run() { // Calculate histogram of input. - NEScheduler::get().schedule(&_histogram_kernel, Window::DimY); + NEScheduler::get().schedule(_histogram_kernel.get(), Window::DimY); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEIm2Col.cpp b/src/runtime/NEON/functions/NEIm2Col.cpp index 99e5d3f1df..bc0c60112e 100644 --- a/src/runtime/NEON/functions/NEIm2Col.cpp +++ b/src/runtime/NEON/functions/NEIm2Col.cpp @@ -25,9 +25,13 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEIm2ColKernel.h" +#include "support/MemorySupport.h" namespace arm_compute { +NEIm2Col::~NEIm2Col() = default; + NEIm2Col::NEIm2Col() : _kernel(), _y_dim(1) { @@ -37,7 +41,8 @@ void NEIm2Col::configure(const ITensor *input, ITensor *output, const Size2D &ke { _y_dim = get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::HEIGHT); - _kernel.configure(input, output, kernel_dims, conv_info, has_bias, dilation, num_groups); + _kernel = arm_compute::support::cpp14::make_unique(); + _kernel->configure(input, output, kernel_dims, conv_info, has_bias, dilation, num_groups); } Status NEIm2Col::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation, @@ -48,6 +53,6 @@ Status NEIm2Col::validate(const ITensorInfo *input, const ITensorInfo *output, c void NEIm2Col::run() { - NEScheduler::get().schedule(&_kernel, _y_dim); + NEScheduler::get().schedule(_kernel.get(), _y_dim); } } // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEInstanceNormalizationLayer.cpp b/src/runtime/NEON/functions/NEInstanceNormalizationLayer.cpp index 57d01ff2d6..e3fb284796 100644 --- a/src/runtime/NEON/functions/NEInstanceNormalizationLayer.cpp +++ b/src/runtime/NEON/functions/NEInstanceNormalizationLayer.cpp @@ -26,9 +26,13 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/KernelDescriptors.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h" +#include "support/MemorySupport.h" namespace arm_compute { +NEInstanceNormalizationLayer::~NEInstanceNormalizationLayer() = default; + NEInstanceNormalizationLayer::NEInstanceNormalizationLayer(std::shared_ptr memory_manager) : _memory_group(std::move(memory_manager)), _normalization_kernel(), _is_nchw(false), _permute_input(), _permute_output(), _permuted_input(), _permuted_output() { @@ -42,6 +46,8 @@ void NEInstanceNormalizationLayer::configure(ITensor *input, ITensor *output, fl // Configure Kernels _is_nchw = data_layout == DataLayout::NCHW; + _normalization_kernel = arm_compute::support::cpp14::make_unique(); + if(!_is_nchw) { _memory_group.manage(&_permuted_input); @@ -51,7 +57,7 @@ void NEInstanceNormalizationLayer::configure(ITensor *input, ITensor *output, fl _permute_input.configure(input, &_permuted_input, PermutationVector(1U, 2U, 0U)); _permuted_input.info()->set_data_layout(DataLayout::NCHW); - _normalization_kernel.configure(&_permuted_input, &_permuted_output, kernel_descriptor); + _normalization_kernel->configure(&_permuted_input, &_permuted_output, kernel_descriptor); _permuted_output.info()->set_data_layout(DataLayout::NCHW); _permute_output.configure(&_permuted_output, output != nullptr ? output : input, PermutationVector(2U, 0U, 1U)); @@ -60,7 +66,7 @@ void NEInstanceNormalizationLayer::configure(ITensor *input, ITensor *output, fl } else { - _normalization_kernel.configure(input, output, kernel_descriptor); + _normalization_kernel->configure(input, output, kernel_descriptor); } } @@ -81,7 +87,7 @@ void NEInstanceNormalizationLayer::run() _permute_input.run(); } - NEScheduler::get().schedule(&_normalization_kernel, Window::DimZ); + NEScheduler::get().schedule(_normalization_kernel.get(), Window::DimZ); // Permute output if(!_is_nchw) diff --git a/src/runtime/NEON/functions/NEIntegralImage.cpp b/src/runtime/NEON/functions/NEIntegralImage.cpp index 8ab6bbd76d..63bcd53373 100644 --- a/src/runtime/NEON/functions/NEIntegralImage.cpp +++ b/src/runtime/NEON/functions/NEIntegralImage.cpp @@ -23,18 +23,25 @@ */ #include "arm_compute/runtime/NEON/functions/NEIntegralImage.h" -#include "arm_compute/core/NEON/kernels/NEIntegralImageKernel.h" #include "arm_compute/core/Types.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NEIntegralImageKernel.h" #include "support/MemorySupport.h" #include -using namespace arm_compute; +namespace arm_compute +{ +NEIntegralImage::~NEIntegralImage() = default; void NEIntegralImage::configure(const ITensor *input, ITensor *output) { auto k = arm_compute::support::cpp14::make_unique(); k->configure(input, output); _kernel = std::move(k); - _border_handler.configure(output, _kernel->border_size(), BorderMode::CONSTANT, PixelValue()); + + auto b = arm_compute::support::cpp14::make_unique(); + b->configure(output, _kernel->border_size(), BorderMode::CONSTANT, PixelValue()); + _border_handler = std::move(b); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEL2NormalizeLayer.cpp b/src/runtime/NEON/functions/NEL2NormalizeLayer.cpp index 04cf3a233a..4a99968cc3 100644 --- a/src/runtime/NEON/functions/NEL2NormalizeLayer.cpp +++ b/src/runtime/NEON/functions/NEL2NormalizeLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,6 +25,9 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEL2NormalizeLayerKernel.h" +#include "src/core/NEON/kernels/NEReductionOperationKernel.h" +#include "support/MemorySupport.h" namespace arm_compute { @@ -32,6 +35,7 @@ namespace { constexpr int max_input_tensor_dim = 3; } // namespace +NEL2NormalizeLayer::~NEL2NormalizeLayer() = default; NEL2NormalizeLayer::NEL2NormalizeLayer(std::shared_ptr memory_manager) : _memory_group(std::move(memory_manager)), _reduce_func(), _normalize_kernel(), _sumsq() @@ -46,7 +50,8 @@ void NEL2NormalizeLayer::configure(ITensor *input, ITensor *output, int axis, fl // Configure Kernels const uint32_t actual_axis = wrap_around(axis, max_input_tensor_dim); _reduce_func.configure(input, &_sumsq, actual_axis, ReductionOperation::SUM_SQUARE); - _normalize_kernel.configure(input, &_sumsq, output, axis, epsilon); + _normalize_kernel = arm_compute::support::cpp14::make_unique(); + _normalize_kernel->configure(input, &_sumsq, output, axis, epsilon); // Allocate intermediate tensors _sumsq.allocator()->allocate(); @@ -78,6 +83,6 @@ void NEL2NormalizeLayer::run() MemoryGroupResourceScope scope_mg(_memory_group); _reduce_func.run(); - NEScheduler::get().schedule(&_normalize_kernel, Window::DimY); + NEScheduler::get().schedule(_normalize_kernel.get(), Window::DimY); } } // namespace arm_compute diff --git a/src/runtime/NEON/functions/NELSTMLayer.cpp b/src/runtime/NEON/functions/NELSTMLayer.cpp index dca274acd2..48d69bd6fc 100644 --- a/src/runtime/NEON/functions/NELSTMLayer.cpp +++ b/src/runtime/NEON/functions/NELSTMLayer.cpp @@ -29,12 +29,24 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/common/LSTMParams.h" +#include "src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h" +#include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h" +#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h" +#include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h" +#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" namespace arm_compute { using namespace arm_compute::misc::shape_calculator; using namespace arm_compute::utils::info_helpers; +NELSTMLayer::~NELSTMLayer() = default; + NELSTMLayer::NELSTMLayer(std::shared_ptr memory_manager) : _memory_group(std::move(memory_manager)), _fully_connected_input_gate(), _accum_input_gate1(), _subtract_input_gate(), _pixelwise_mul_input_gate(), _activation_input_gate(), _fully_connected_forget_gate(), _accum_forget_gate1(), _pixelwise_mul_forget_gate(), _activation_forget_gate(), _fully_connected_cell_state(), _gemm_cell_state1(), _transpose_cell_state(), @@ -575,8 +587,8 @@ Status NELSTMLayer::validate(const ITensorInfo *input, } // Validate copy kernel - ARM_COMPUTE_RETURN_ON_ERROR(NECopyKernel::validate(&cell_state_tmp, cell_state_out)); - ARM_COMPUTE_RETURN_ON_ERROR(NECopyKernel::validate(output_state_out, output)); + ARM_COMPUTE_RETURN_ON_ERROR(NECopy::validate(&cell_state_tmp, cell_state_out)); + ARM_COMPUTE_RETURN_ON_ERROR(NECopy::validate(output_state_out, output)); // Validate scratch concatenation std::vector inputs_vector_info_raw; @@ -646,7 +658,7 @@ void NELSTMLayer::run() } _fully_connected_cell_state.run(); - NEScheduler::get().schedule(&_transpose_cell_state, Window::DimY); + _transpose_cell_state.run(); _gemm_cell_state1.run(); _accum_cell_state1.run(); if(_is_layer_norm_lstm) @@ -691,8 +703,8 @@ void NELSTMLayer::run() } } - NEScheduler::get().schedule(&_copy_cell_state, Window::DimY); - NEScheduler::get().schedule(&_copy_output, Window::DimY); + _copy_cell_state.run(); + _copy_output.run(); _concat_scratch_buffer.run(); } diff --git a/src/runtime/NEON/functions/NELSTMLayerQuantized.cpp b/src/runtime/NEON/functions/NELSTMLayerQuantized.cpp index 7610d15787..e43929390e 100644 --- a/src/runtime/NEON/functions/NELSTMLayerQuantized.cpp +++ b/src/runtime/NEON/functions/NELSTMLayerQuantized.cpp @@ -26,6 +26,16 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" +#include "src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h" +#include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h" +#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h" +#include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h" +#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" #include "src/core/helpers/AutoConfiguration.h" #include @@ -42,6 +52,7 @@ const QuantizationInfo qsymm_3(8.f / 32768.f, 0); // qsymm16 with 3 integer bit const QuantizationInfo qsymm_4(16.f / 32768.f, 0); // qsymm16 with 4 integer bit const QuantizationInfo qsymm_0(1.f / 32768.f, 0); // qsymm16 with 0 integer bit } // namespace +NELSTMLayerQuantized::~NELSTMLayerQuantized() = default; NELSTMLayerQuantized::NELSTMLayerQuantized(std::shared_ptr memory_manager) : _memory_group(std::move(memory_manager)), _gemmlowp(), _output_stage(), _transpose_weights(), _concat_input_weights(), _concat_recurrent_weights(), _concat_weights(), _concat_inputs(), diff --git a/src/runtime/NEON/functions/NELaplacianPyramid.cpp b/src/runtime/NEON/functions/NELaplacianPyramid.cpp index 4f0639b64b..a2651dbf36 100644 --- a/src/runtime/NEON/functions/NELaplacianPyramid.cpp +++ b/src/runtime/NEON/functions/NELaplacianPyramid.cpp @@ -29,11 +29,15 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h" #include "arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h" -#include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h" #include "arm_compute/runtime/NEON/functions/NEGaussianPyramid.h" #include "arm_compute/runtime/Tensor.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NEGaussian5x5Kernel.h" +#include "src/core/NEON/kernels/NEGaussianPyramidKernel.h" -using namespace arm_compute; +namespace arm_compute +{ +NELaplacianPyramid::~NELaplacianPyramid() = default; NELaplacianPyramid::NELaplacianPyramid() // NOLINT : _num_levels(0), @@ -105,3 +109,4 @@ void NELaplacianPyramid::configure(const ITensor *input, IPyramid *pyramid, ITen _gauss_pyr.allocate(); _conv_pyr.allocate(); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NELaplacianReconstruct.cpp b/src/runtime/NEON/functions/NELaplacianReconstruct.cpp index aa5f8a21ca..a50e7ccbef 100644 --- a/src/runtime/NEON/functions/NELaplacianReconstruct.cpp +++ b/src/runtime/NEON/functions/NELaplacianReconstruct.cpp @@ -23,6 +23,7 @@ */ #include "arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h" +#include "arm_compute/core/CPP/ICPPKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/IPyramid.h" #include "arm_compute/core/ITensor.h" @@ -31,7 +32,9 @@ #include -using namespace arm_compute; +namespace arm_compute +{ +NELaplacianReconstruct::~NELaplacianReconstruct() = default; NELaplacianReconstruct::NELaplacianReconstruct() // NOLINT : _tmp_pyr(), @@ -100,3 +103,4 @@ void NELaplacianReconstruct::run() _depthf.run(); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NELocallyConnectedLayer.cpp b/src/runtime/NEON/functions/NELocallyConnectedLayer.cpp index af502be1e9..131ac82ba8 100644 --- a/src/runtime/NEON/functions/NELocallyConnectedLayer.cpp +++ b/src/runtime/NEON/functions/NELocallyConnectedLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -27,12 +27,16 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEIm2ColKernel.h" +#include "src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEWeightsReshapeKernel.h" +#include "support/MemorySupport.h" #include #include -using namespace arm_compute; - +namespace arm_compute +{ namespace { void calculate_shapes(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, @@ -70,9 +74,10 @@ void calculate_shapes(const ITensorInfo *input, const ITensorInfo *weights, cons shape_gemm.set(1, mat_input_rows); } } // namespace +NELocallyConnectedLayer::~NELocallyConnectedLayer() = default; NELocallyConnectedLayer::NELocallyConnectedLayer(std::shared_ptr memory_manager) - : _memory_group(std::move(memory_manager)), _input_im2col_kernel(), _weights_reshape_kernel(), _mm_kernel(), _output_col2im_kernel(), _input_im2col_reshaped(), _weights_reshaped(), _gemm_output(), + : _memory_group(std::move(memory_manager)), _input_im2col(), _weights_reshape_kernel(), _mm_kernel(), _output_col2im(), _input_im2col_reshaped(), _weights_reshaped(), _gemm_output(), _is_prepared(false), _original_weights(nullptr) { } @@ -113,10 +118,10 @@ Status NELocallyConnectedLayer::validate(const ITensorInfo *input, const ITensor TensorInfo input_im2col_reshaped_info(shape_im2col, 1, input->data_type()); TensorInfo gemm_output_info(shape_gemm, 1, input->data_type()); - ARM_COMPUTE_RETURN_ON_ERROR(NEIm2ColKernel::validate(input, &input_im2col_reshaped_info, Size2D(kernel_width, kernel_height), conv_info, has_bias)); + ARM_COMPUTE_RETURN_ON_ERROR(NEIm2Col::validate(input, &input_im2col_reshaped_info, Size2D(kernel_width, kernel_height), conv_info, has_bias)); ARM_COMPUTE_RETURN_ON_ERROR(NEWeightsReshapeKernel::validate(weights, biases, &weights_reshaped_info)); ARM_COMPUTE_RETURN_ON_ERROR(NELocallyConnectedMatrixMultiplyKernel::validate(&input_im2col_reshaped_info, &weights_reshaped_info, &gemm_output_info)); - ARM_COMPUTE_RETURN_ON_ERROR(NECol2ImKernel::validate(&gemm_output_info, output, Size2D(conv_w, conv_h))); + ARM_COMPUTE_RETURN_ON_ERROR(NECol2Im::validate(&gemm_output_info, output, Size2D(conv_w, conv_h))); return Status{}; } @@ -154,10 +159,12 @@ void NELocallyConnectedLayer::configure(const ITensor *input, const ITensor *wei _memory_group.manage(&_gemm_output); // Configure kernels - _input_im2col_kernel.configure(input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _has_bias); - _weights_reshape_kernel.configure(weights, biases, &_weights_reshaped); - _mm_kernel.configure(&_input_im2col_reshaped, &_weights_reshaped, &_gemm_output); - _output_col2im_kernel.configure(&_gemm_output, output, Size2D(conv_w, conv_h)); + _input_im2col.configure(input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _has_bias); + _weights_reshape_kernel = arm_compute::support::cpp14::make_unique(); + _weights_reshape_kernel->configure(weights, biases, &_weights_reshaped); + _mm_kernel = arm_compute::support::cpp14::make_unique(); + _mm_kernel->configure(&_input_im2col_reshaped, &_weights_reshaped, &_gemm_output); + _output_col2im.configure(&_gemm_output, output, Size2D(conv_w, conv_h)); // Allocate intermediate tensors _input_im2col_reshaped.allocator()->allocate(); @@ -171,13 +178,13 @@ void NELocallyConnectedLayer::run() MemoryGroupResourceScope scope_mg(_memory_group); // Run input reshaping - NEScheduler::get().schedule(&_input_im2col_kernel, Window::DimY); + _input_im2col.run(); // Runs GEMM on reshaped matrices - NEScheduler::get().schedule(&_mm_kernel, Window::DimX); + NEScheduler::get().schedule(_mm_kernel.get(), Window::DimX); // Reshape output matrix - NEScheduler::get().schedule(&_output_col2im_kernel, Window::DimY); + _output_col2im.run(); } void NELocallyConnectedLayer::prepare() @@ -188,9 +195,10 @@ void NELocallyConnectedLayer::prepare() // Run weights reshaping and mark original weights tensor as unused _weights_reshaped.allocator()->allocate(); - NEScheduler::get().schedule(&_weights_reshape_kernel, 3); + NEScheduler::get().schedule(_weights_reshape_kernel.get(), 3); _original_weights->mark_as_unused(); _is_prepared = true; } } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEMagnitude.cpp b/src/runtime/NEON/functions/NEMagnitude.cpp index 5ca672e1d6..06ed8d46c9 100644 --- a/src/runtime/NEON/functions/NEMagnitude.cpp +++ b/src/runtime/NEON/functions/NEMagnitude.cpp @@ -23,13 +23,15 @@ */ #include "arm_compute/runtime/NEON/functions/NEMagnitude.h" -#include "arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h" #include "arm_compute/core/Types.h" +#include "src/core/NEON/kernels/NEMagnitudePhaseKernel.h" #include "support/MemorySupport.h" #include -using namespace arm_compute; +namespace arm_compute +{ +NEMagnitude::~NEMagnitude() = default; void NEMagnitude::configure(const ITensor *input1, const ITensor *input2, ITensor *output, MagnitudeType mag_type) { @@ -46,3 +48,4 @@ void NEMagnitude::configure(const ITensor *input1, const ITensor *input2, ITenso _kernel = std::move(k); } } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEMaxUnpoolingLayer.cpp b/src/runtime/NEON/functions/NEMaxUnpoolingLayer.cpp index 9d3f34fba4..e8c9d09d95 100644 --- a/src/runtime/NEON/functions/NEMaxUnpoolingLayer.cpp +++ b/src/runtime/NEON/functions/NEMaxUnpoolingLayer.cpp @@ -25,9 +25,14 @@ #include "arm_compute/core/ITensor.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h" +#include "src/core/NEON/kernels/NEMemsetKernel.h" +#include "support/MemorySupport.h" namespace arm_compute { +NEMaxUnpoolingLayer::~NEMaxUnpoolingLayer() = default; + NEMaxUnpoolingLayer::NEMaxUnpoolingLayer() : _memset_kernel(), _unpooling_layer_kernel() @@ -37,8 +42,10 @@ NEMaxUnpoolingLayer::NEMaxUnpoolingLayer() void NEMaxUnpoolingLayer::configure(ITensor *input, ITensor *indices, ITensor *output, const PoolingLayerInfo &pool_info) { const PixelValue zero_value(0.f); - _memset_kernel.configure(output, zero_value); - _unpooling_layer_kernel.configure(input, indices, output, pool_info); + _memset_kernel = arm_compute::support::cpp14::make_unique(); + _unpooling_layer_kernel = arm_compute::support::cpp14::make_unique(); + _memset_kernel->configure(output, zero_value); + _unpooling_layer_kernel->configure(input, indices, output, pool_info); } Status NEMaxUnpoolingLayer::validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, const PoolingLayerInfo &pool_info) @@ -48,7 +55,7 @@ Status NEMaxUnpoolingLayer::validate(const ITensorInfo *input, const ITensorInfo void NEMaxUnpoolingLayer::run() { - NEScheduler::get().schedule(&_memset_kernel, Window::DimY); - NEScheduler::get().schedule(&_unpooling_layer_kernel, Window::DimY); + NEScheduler::get().schedule(_memset_kernel.get(), Window::DimY); + NEScheduler::get().schedule(_unpooling_layer_kernel.get(), Window::DimY); } } /* namespace arm_compute */ diff --git a/src/runtime/NEON/functions/NEMeanStdDev.cpp b/src/runtime/NEON/functions/NEMeanStdDev.cpp index 57363f05ff..e073420114 100644 --- a/src/runtime/NEON/functions/NEMeanStdDev.cpp +++ b/src/runtime/NEON/functions/NEMeanStdDev.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,8 +24,13 @@ #include "arm_compute/runtime/NEON/functions/NEMeanStdDev.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NEMeanStdDevKernel.h" +#include "support/MemorySupport.h" -using namespace arm_compute; +namespace arm_compute +{ +NEMeanStdDev::~NEMeanStdDev() = default; NEMeanStdDev::NEMeanStdDev() : _mean_stddev_kernel(), _fill_border_kernel(), _global_sum(0), _global_sum_squared(0) @@ -34,8 +39,11 @@ NEMeanStdDev::NEMeanStdDev() void NEMeanStdDev::configure(IImage *input, float *mean, float *stddev) { - _mean_stddev_kernel.configure(input, mean, &_global_sum, stddev, &_global_sum_squared); - _fill_border_kernel.configure(input, _mean_stddev_kernel.border_size(), BorderMode::CONSTANT, PixelValue(static_cast(0))); + _mean_stddev_kernel = arm_compute::support::cpp14::make_unique(); + _fill_border_kernel = arm_compute::support::cpp14::make_unique(); + + _mean_stddev_kernel->configure(input, mean, &_global_sum, stddev, &_global_sum_squared); + _fill_border_kernel->configure(input, _mean_stddev_kernel->border_size(), BorderMode::CONSTANT, PixelValue(static_cast(0))); } void NEMeanStdDev::run() @@ -43,6 +51,7 @@ void NEMeanStdDev::run() _global_sum = 0; _global_sum_squared = 0; - NEScheduler::get().schedule(&_fill_border_kernel, Window::DimZ); - NEScheduler::get().schedule(&_mean_stddev_kernel, Window::DimY); + NEScheduler::get().schedule(_fill_border_kernel.get(), Window::DimZ); + NEScheduler::get().schedule(_mean_stddev_kernel.get(), Window::DimY); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.cpp b/src/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.cpp index a88732b67d..d128c4456a 100644 --- a/src/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.cpp +++ b/src/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.cpp @@ -23,11 +23,13 @@ */ #include "arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h" -#include "arm_compute/core/NEON/kernels/NEMeanStdDevNormalizationKernel.h" +#include "src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.h" #include "support/MemorySupport.h" namespace arm_compute { +NEMeanStdDevNormalizationLayer::~NEMeanStdDevNormalizationLayer() = default; + void NEMeanStdDevNormalizationLayer::configure(ITensor *input, ITensor *output, float epsilon) { auto k = arm_compute::support::cpp14::make_unique(); diff --git a/src/runtime/NEON/functions/NEMedian3x3.cpp b/src/runtime/NEON/functions/NEMedian3x3.cpp index 2bbe8d39ae..b7b7c2cb47 100644 --- a/src/runtime/NEON/functions/NEMedian3x3.cpp +++ b/src/runtime/NEON/functions/NEMedian3x3.cpp @@ -23,18 +23,23 @@ */ #include "arm_compute/runtime/NEON/functions/NEMedian3x3.h" -#include "arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h" #include "arm_compute/core/PixelValue.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NEMedian3x3Kernel.h" #include "support/MemorySupport.h" #include -using namespace arm_compute; - +namespace arm_compute +{ void NEMedian3x3::configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value) { auto k = arm_compute::support::cpp14::make_unique(); k->configure(input, output, border_mode == BorderMode::UNDEFINED); _kernel = std::move(k); - _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + + auto b = arm_compute::support::cpp14::make_unique(); + b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + _border_handler = std::move(b); } +} // namespace arm_compute \ No newline at end of file diff --git a/src/runtime/NEON/functions/NEMinMaxLocation.cpp b/src/runtime/NEON/functions/NEMinMaxLocation.cpp index ca63937770..3c2219ca07 100644 --- a/src/runtime/NEON/functions/NEMinMaxLocation.cpp +++ b/src/runtime/NEON/functions/NEMinMaxLocation.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,8 +24,12 @@ #include "arm_compute/runtime/NEON/functions/NEMinMaxLocation.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEMinMaxLocationKernel.h" +#include "support/MemorySupport.h" -using namespace arm_compute; +namespace arm_compute +{ +NEMinMaxLocation::~NEMinMaxLocation() = default; NEMinMaxLocation::NEMinMaxLocation() : _min_max(), _min_max_loc() @@ -34,17 +38,21 @@ NEMinMaxLocation::NEMinMaxLocation() void NEMinMaxLocation::configure(const IImage *input, void *min, void *max, ICoordinates2DArray *min_loc, ICoordinates2DArray *max_loc, uint32_t *min_count, uint32_t *max_count) { - _min_max.configure(input, min, max); - _min_max_loc.configure(input, min, max, min_loc, max_loc, min_count, max_count); + _min_max = arm_compute::support::cpp14::make_unique(); + _min_max->configure(input, min, max); + + _min_max_loc = arm_compute::support::cpp14::make_unique(); + _min_max_loc->configure(input, min, max, min_loc, max_loc, min_count, max_count); } void NEMinMaxLocation::run() { - _min_max.reset(); + _min_max->reset(); /* Run min max kernel */ - NEScheduler::get().schedule(&_min_max, Window::DimY); + NEScheduler::get().schedule(_min_max.get(), Window::DimY); /* Run min max location */ - NEScheduler::get().schedule(&_min_max_loc, Window::DimY); + NEScheduler::get().schedule(_min_max_loc.get(), Window::DimY); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NENonLinearFilter.cpp b/src/runtime/NEON/functions/NENonLinearFilter.cpp index b7c72acb9a..4d8fd00cbd 100644 --- a/src/runtime/NEON/functions/NENonLinearFilter.cpp +++ b/src/runtime/NEON/functions/NENonLinearFilter.cpp @@ -23,14 +23,15 @@ */ #include "arm_compute/runtime/NEON/functions/NENonLinearFilter.h" -#include "arm_compute/core/NEON/kernels/NENonLinearFilterKernel.h" #include "arm_compute/core/PixelValue.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NENonLinearFilterKernel.h" #include "support/MemorySupport.h" #include -using namespace arm_compute; - +namespace arm_compute +{ void NENonLinearFilter::configure(ITensor *input, ITensor *output, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, BorderMode border_mode, uint8_t constant_border_value) @@ -38,5 +39,9 @@ void NENonLinearFilter::configure(ITensor *input, ITensor *output, NonLinearFilt auto k = arm_compute::support::cpp14::make_unique(); k->configure(input, output, function, mask_size, pattern, mask, border_mode == BorderMode::UNDEFINED); _kernel = std::move(k); - _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + + auto b = arm_compute::support::cpp14::make_unique(); + b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + _border_handler = std::move(b); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NENonMaximaSuppression3x3.cpp b/src/runtime/NEON/functions/NENonMaximaSuppression3x3.cpp index 4d9edf7fc7..b8f5c251b7 100644 --- a/src/runtime/NEON/functions/NENonMaximaSuppression3x3.cpp +++ b/src/runtime/NEON/functions/NENonMaximaSuppression3x3.cpp @@ -23,25 +23,29 @@ */ #include "arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h" -#include "arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h" #include "support/MemorySupport.h" #include -using namespace arm_compute; - +namespace arm_compute +{ void NENonMaximaSuppression3x3::configure(ITensor *input, ITensor *output, BorderMode border_mode) { auto k = arm_compute::support::cpp14::make_unique(); k->configure(input, output, border_mode == BorderMode::UNDEFINED); _kernel = std::move(k); + auto b = arm_compute::support::cpp14::make_unique(); if(border_mode != BorderMode::UNDEFINED) { - _border_handler.configure(input, BorderSize(1), BorderMode::CONSTANT, static_cast(0.f)); + b->configure(input, BorderSize(1), BorderMode::CONSTANT, static_cast(0.f)); } else { - _border_handler.configure(input, BorderSize(1), BorderMode::UNDEFINED, static_cast(0.f)); + b->configure(input, BorderSize(1), BorderMode::UNDEFINED, static_cast(0.f)); } + _border_handler = std::move(b); } +} // namespace arm_compute \ No newline at end of file diff --git a/src/runtime/NEON/functions/NENormalizationLayer.cpp b/src/runtime/NEON/functions/NENormalizationLayer.cpp index 10ee938335..dfc73b2a57 100644 --- a/src/runtime/NEON/functions/NENormalizationLayer.cpp +++ b/src/runtime/NEON/functions/NENormalizationLayer.cpp @@ -29,9 +29,13 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NENormalizationLayerKernel.h" +#include "support/MemorySupport.h" namespace arm_compute { +NENormalizationLayer::~NENormalizationLayer() = default; + NENormalizationLayer::NENormalizationLayer(std::shared_ptr memory_manager) : _memory_group(std::move(memory_manager)), _norm_kernel(), _multiply_f(), _input_squared() { @@ -48,7 +52,8 @@ void NENormalizationLayer::configure(const ITensor *input, ITensor *output, cons _memory_group.manage(&_input_squared); // Configure kernels - _norm_kernel.configure(input, &_input_squared, output, norm_info); + _norm_kernel = arm_compute::support::cpp14::make_unique(); + _norm_kernel->configure(input, &_input_squared, output, norm_info); _multiply_f.configure(input, input, &_input_squared, 1.0f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO); // Allocate the tensor once the configure methods have been called @@ -70,6 +75,6 @@ void NENormalizationLayer::run() { MemoryGroupResourceScope scope_mg(_memory_group); _multiply_f.run(); - NEScheduler::get().schedule(&_norm_kernel, Window::DimY); + NEScheduler::get().schedule(_norm_kernel.get(), Window::DimY); } } \ No newline at end of file diff --git a/src/runtime/NEON/functions/NEOpticalFlow.cpp b/src/runtime/NEON/functions/NEOpticalFlow.cpp index c9e07483e6..565346bfce 100644 --- a/src/runtime/NEON/functions/NEOpticalFlow.cpp +++ b/src/runtime/NEON/functions/NEOpticalFlow.cpp @@ -25,7 +25,6 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/kernels/NELKTrackerKernel.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Window.h" #include "arm_compute/runtime/NEON/NEScheduler.h" @@ -33,8 +32,13 @@ #include "arm_compute/runtime/Pyramid.h" #include "arm_compute/runtime/Tensor.h" #include "arm_compute/runtime/TensorAllocator.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NELKTrackerKernel.h" +#include "support/MemorySupport.h" -using namespace arm_compute; +namespace arm_compute +{ +NEOpticalFlow::~NEOpticalFlow() = default; NEOpticalFlow::NEOpticalFlow(std::shared_ptr memory_manager) // NOLINT : _memory_group(std::move(memory_manager)), @@ -110,11 +114,12 @@ void NEOpticalFlow::configure(const Pyramid *old_pyramid, const Pyramid *new_pyr _func_scharr[i].configure(old_ith_input, &_scharr_gx[i], &_scharr_gy[i], border_mode, constant_border_value); // Init Lucas-Kanade kernel - _kernel_tracker[i].configure(old_ith_input, new_ith_input, &_scharr_gx[i], &_scharr_gy[i], - old_points, new_points_estimates, new_points, - &_old_points_internal, &_new_points_internal, - termination, use_initial_estimate, epsilon, num_iterations, window_dimension, - i, _num_levels, pyr_scale); + _kernel_tracker[i] = arm_compute::support::cpp14::make_unique(); + _kernel_tracker[i]->configure(old_ith_input, new_ith_input, &_scharr_gx[i], &_scharr_gy[i], + old_points, new_points_estimates, new_points, + &_old_points_internal, &_new_points_internal, + termination, use_initial_estimate, epsilon, num_iterations, window_dimension, + i, _num_levels, pyr_scale); _scharr_gx[i].allocator()->allocate(); _scharr_gy[i].allocator()->allocate(); @@ -133,6 +138,7 @@ void NEOpticalFlow::run() _func_scharr[level - 1].run(); // Run Lucas-Kanade kernel - NEScheduler::get().schedule(&_kernel_tracker[level - 1], Window::DimX); + NEScheduler::get().schedule(_kernel_tracker[level - 1].get(), Window::DimX); } } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEPReluLayer.cpp b/src/runtime/NEON/functions/NEPReluLayer.cpp index f9393a4d92..00a1a4257a 100644 --- a/src/runtime/NEON/functions/NEPReluLayer.cpp +++ b/src/runtime/NEON/functions/NEPReluLayer.cpp @@ -24,7 +24,7 @@ #include "arm_compute/runtime/NEON/functions/NEPReluLayer.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h" +#include "src/core/NEON/kernels/NEElementwiseOperationKernel.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/NEON/functions/NEPadLayer.cpp b/src/runtime/NEON/functions/NEPadLayer.cpp index 03c597a3bf..92659f39a2 100644 --- a/src/runtime/NEON/functions/NEPadLayer.cpp +++ b/src/runtime/NEON/functions/NEPadLayer.cpp @@ -27,7 +27,10 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/NEON/kernels/NECopyKernel.h" +#include "src/core/NEON/kernels/NEPadLayerKernel.h" #include "src/core/helpers/AutoConfiguration.h" +#include "support/MemorySupport.h" namespace arm_compute { @@ -47,6 +50,8 @@ uint32_t last_padding_dimension(const PaddingList &padding) } } // namespace +NEPadLayer::~NEPadLayer() = default; + NEPadLayer::NEPadLayer() : _copy_kernel(), _pad_kernel(), _mode(), _padding(), _num_dimensions(0), _slice_functions(), _concat_functions(), _slice_results(), _concat_results() { @@ -54,7 +59,8 @@ NEPadLayer::NEPadLayer() void NEPadLayer::configure_constant_mode(ITensor *input, ITensor *output, const PaddingList &padding, const PixelValue constant_value) { - _pad_kernel.configure(input, output, padding, constant_value, PaddingMode::CONSTANT); + _pad_kernel = arm_compute::support::cpp14::make_unique(); + _pad_kernel->configure(input, output, padding, constant_value, PaddingMode::CONSTANT); } void NEPadLayer::configure_reflect_symmetric_mode(ITensor *input, ITensor *output) @@ -195,7 +201,8 @@ void NEPadLayer::configure(ITensor *input, ITensor *output, const PaddingList &p else { // Copy the input to the whole output if no padding is applied - _copy_kernel.configure(input, output); + _copy_kernel = arm_compute::support::cpp14::make_unique(); + _copy_kernel->configure(input, output); } } @@ -251,7 +258,7 @@ void NEPadLayer::run() { case PaddingMode::CONSTANT: { - NEScheduler::get().schedule(&_pad_kernel, Window::DimZ); + NEScheduler::get().schedule(_pad_kernel.get(), Window::DimZ); break; } case PaddingMode::REFLECT: @@ -280,7 +287,7 @@ void NEPadLayer::run() } else { - NEScheduler::get().schedule(&_copy_kernel, Window::DimY); + NEScheduler::get().schedule(_copy_kernel.get(), Window::DimY); } } } // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEPermute.cpp b/src/runtime/NEON/functions/NEPermute.cpp index 698add86b9..d2a115fdc8 100644 --- a/src/runtime/NEON/functions/NEPermute.cpp +++ b/src/runtime/NEON/functions/NEPermute.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/NEON/functions/NEPermute.h" -#include "arm_compute/core/NEON/kernels/NEPermuteKernel.h" +#include "src/core/NEON/kernels/NEPermuteKernel.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/NEON/functions/NEPhase.cpp b/src/runtime/NEON/functions/NEPhase.cpp index 85779611cd..3b6182a269 100644 --- a/src/runtime/NEON/functions/NEPhase.cpp +++ b/src/runtime/NEON/functions/NEPhase.cpp @@ -23,13 +23,13 @@ */ #include "arm_compute/runtime/NEON/functions/NEPhase.h" -#include "arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h" +#include "src/core/NEON/kernels/NEMagnitudePhaseKernel.h" #include "support/MemorySupport.h" #include -using namespace arm_compute; - +namespace arm_compute +{ void NEPhase::configure(const ITensor *input1, const ITensor *input2, ITensor *output, PhaseType phase_type) { if(phase_type == PhaseType::UNSIGNED) @@ -45,3 +45,4 @@ void NEPhase::configure(const ITensor *input1, const ITensor *input2, ITensor *o _kernel = std::move(k); } } +} // namespace arm_compute \ No newline at end of file diff --git a/src/runtime/NEON/functions/NEPixelWiseMultiplication.cpp b/src/runtime/NEON/functions/NEPixelWiseMultiplication.cpp index 4208878b75..f7f4437554 100644 --- a/src/runtime/NEON/functions/NEPixelWiseMultiplication.cpp +++ b/src/runtime/NEON/functions/NEPixelWiseMultiplication.cpp @@ -24,7 +24,7 @@ #include "arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h" +#include "src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/NEON/functions/NEPoolingLayer.cpp b/src/runtime/NEON/functions/NEPoolingLayer.cpp index 81bd00d44d..12ac8d6d7d 100644 --- a/src/runtime/NEON/functions/NEPoolingLayer.cpp +++ b/src/runtime/NEON/functions/NEPoolingLayer.cpp @@ -25,8 +25,13 @@ #include "arm_compute/core/ITensor.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NEPoolingLayerKernel.h" +#include "support/MemorySupport.h" -using namespace arm_compute; +namespace arm_compute +{ +NEPoolingLayer::~NEPoolingLayer() = default; NEPoolingLayer::NEPoolingLayer() : _pooling_layer_kernel(), _border_handler(), _is_global_pooling_layer(false), _data_layout(DataLayout::NCHW) @@ -42,7 +47,8 @@ void NEPoolingLayer::configure(ITensor *input, ITensor *output, const PoolingLay _data_layout = pool_info.data_layout == DataLayout::UNKNOWN ? input->info()->data_layout() : pool_info.data_layout; // Configure pooling kernel - _pooling_layer_kernel.configure(input, output, pool_info, indices); + _pooling_layer_kernel = arm_compute::support::cpp14::make_unique(); + _pooling_layer_kernel->configure(input, output, pool_info, indices); switch(_data_layout) { @@ -55,7 +61,8 @@ void NEPoolingLayer::configure(ITensor *input, ITensor *output, const PoolingLay { zero_value = PixelValue(0, input->info()->data_type(), input->info()->quantization_info()); } - _border_handler.configure(input, _pooling_layer_kernel.border_size(), border_mode, zero_value); + _border_handler = arm_compute::support::cpp14::make_unique(); + _border_handler->configure(input, _pooling_layer_kernel->border_size(), border_mode, zero_value); break; } case DataLayout::NHWC: @@ -76,16 +83,18 @@ void NEPoolingLayer::run() { case DataLayout::NCHW: // Fill border - NEScheduler::get().schedule(&_border_handler, Window::DimY); + NEScheduler::get().schedule(_border_handler.get(), Window::DimY); // Run pooling layer - NEScheduler::get().schedule(&_pooling_layer_kernel, _is_global_pooling_layer ? Window::DimZ : Window::DimY); + NEScheduler::get().schedule(_pooling_layer_kernel.get(), _is_global_pooling_layer ? Window::DimZ : Window::DimY); break; case DataLayout::NHWC: // Run pooling layer - NEScheduler::get().schedule(&_pooling_layer_kernel, Window::DimX); + NEScheduler::get().schedule(_pooling_layer_kernel.get(), Window::DimX); break; default: ARM_COMPUTE_ERROR("Data layout not supported"); } } + +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEPriorBoxLayer.cpp b/src/runtime/NEON/functions/NEPriorBoxLayer.cpp index bcf6bef9c7..bfa06da04e 100644 --- a/src/runtime/NEON/functions/NEPriorBoxLayer.cpp +++ b/src/runtime/NEON/functions/NEPriorBoxLayer.cpp @@ -30,6 +30,7 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEPriorBoxLayerKernel.h" #include "support/MemorySupport.h" diff --git a/src/runtime/NEON/functions/NEQLSTMLayer.cpp b/src/runtime/NEON/functions/NEQLSTMLayer.cpp index e41962451c..1013730235 100644 --- a/src/runtime/NEON/functions/NEQLSTMLayer.cpp +++ b/src/runtime/NEON/functions/NEQLSTMLayer.cpp @@ -30,7 +30,16 @@ #include "arm_compute/core/utils/misc/InfoHelpers.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h" +#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h" +#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" +#include "src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h" #include "src/core/helpers/WindowHelpers.h" +#include "support/MemorySupport.h" namespace arm_compute { @@ -47,6 +56,31 @@ Status validate_mm(GEMMLowpOutputStageInfo &gemmlowp_info, const ITensorInfo *mm } } // namespace +Status NEQLSTMLayer::validate_layer_norm(const ITensorInfo &in, const ITensorInfo &weight, const ITensorInfo &bias) +{ + // Output quantization scale will be different, but ignored here + // since it will be configured at configure() stage. + const TensorInfo out + { + in + }; + return NEQLSTMLayerNormalizationKernel::validate(&in, &out, &weight, &bias); +} + +void NEQLSTMLayer::configure_layer_norm(NEQLSTMLayer::LayerNormGate g, const ITensor *in) +{ + ARM_COMPUTE_ERROR_ON(!_has_layer_norm); + + Tensor &out = get_layer_norm_output(g); + _memory_group.manage(&out); + out.allocator()->init(*(in->info())); + + get_layer_norm(g) = arm_compute::support::cpp14::make_unique(); + get_layer_norm(g)->configure(in, &out, get_layer_norm_weight(g), get_layer_norm_bias(g)); +} + +NEQLSTMLayer::TensorCopyKernel::~TensorCopyKernel() = default; + Status NEQLSTMLayer::TensorCopyKernel::validate(const ITensorInfo &src, const ITensorInfo &dst) { ARM_COMPUTE_RETURN_ERROR_ON(src.tensor_shape().num_dimensions() > max_dimension_supported); @@ -77,7 +111,21 @@ void NEQLSTMLayer::TensorCopyKernel::run() input_iter, output_iter); } +NEQLSTMLayer::~NEQLSTMLayer() = default; + NEQLSTMLayer::NEQLSTMLayer(std::shared_ptr memory_manager) + : _memory_group(), _transpose_input_to_forget_weights(), _transpose_input_to_cell_weights(), _transpose_input_to_output_weights(), _transpose_input_to_input_weights(), + _transpose_recurrent_to_forget_weights(), _transpose_recurrent_to_cell_weights(), _transpose_recurrent_to_output_weights(), _transpose_recurrent_to_input_weights(), _transpose_projection_weights(), + _input_to_input_reduction(), _recurrent_to_input_reduction(), _input_to_forget_reduction(), _recurrent_to_forget_reduction(), _input_to_cell_reduction(), _recurrent_to_cell_reduction(), + _input_to_output_reduction(), _recurrent_to_output_reduction(), _projection_reduction(), _projection_bias_add(), _mm_input_to_forget(), _mm_recurrent_to_forget(), _pixelwise_mul_cell_to_forget(), + _input_to_forget_outstage(), _recurrent_to_forget_outstage(), _cell_to_forget_outstage(), _accumulate_input_recurrent_forget(), _accumulate_cell_forget(), _forget_gate_sigmoid(), _mm_input_to_cell(), + _input_to_cell_outstage(), _mm_recurrent_to_cell(), _recurrent_to_cell_outstage(), _accumulate_input_recurrent_modulation(), _cell_gate_tanh(), _input_gate_sub(), _mm_input_to_input(), + _input_to_input_outstage(), _mm_recurrent_to_input(), _recurrent_to_input_outstage(), _accumulate_input_recurrent_input(), _pixelwise_mul_cell_to_input(), _cell_to_input_outstage(), + _accumulate_cell_input(), _input_gate_sigmoid(), _pixelwise_mul_forget_cell(), _pixelwise_mul_input_cell(), _add_forget_cell(), _cell_clip(), _mm_input_to_output(), _input_to_output_outstage(), + _mm_recurrent_to_output(), _recurrent_to_output_outstage(), _accumulate_input_recurrent_output(), _pixelwise_mul_cell_to_output(), _cell_to_output_outstage(), _accumulate_cell_to_output(), + _output_gate_sigmoid(), _hidden_tanh(), _pixelwise_mul_hidden(), _hidden_outstage(), _mm_projection(), _projection_outstage(), _accumulate_projection(), _projection_clip(), _projection_bias_copy(), + _projection_output_to_accumulate_copy(), _projection_accumulate_to_output_copy(), _hidden_to_output_copy(), _layer_norms(), _copy_output(), _layer_norm_weights(), _layer_norm_bias(), + _layer_norm_output() { _memory_group = MemoryGroup(std::move(memory_manager)); } @@ -178,18 +226,29 @@ void NEQLSTMLayer::configure(const ITensor *input, _input_to_input_weights = lstm_params.input_to_input_weights(); _recurrent_to_input_weights = lstm_params.recurrent_to_input_weights(); - _input_to_input_reduction.configure(_input_to_input_weights, &_input_to_input_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true)); - _recurrent_to_input_reduction.configure(_recurrent_to_input_weights, &_recurrent_to_input_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true)); + _input_to_input_reduction = arm_compute::support::cpp14::make_unique(); + _recurrent_to_input_reduction = arm_compute::support::cpp14::make_unique(); + _input_to_input_reduction->configure(_input_to_input_weights, &_input_to_input_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true)); + _recurrent_to_input_reduction->configure(_recurrent_to_input_weights, &_recurrent_to_input_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true)); } - _input_to_forget_reduction.configure(input_to_forget_weights, &_input_to_forget_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true)); - _recurrent_to_forget_reduction.configure(recurrent_to_forget_weights, &_recurrent_to_forget_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true)); - _input_to_cell_reduction.configure(input_to_cell_weights, &_input_to_cell_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true)); - _recurrent_to_cell_reduction.configure(recurrent_to_cell_weights, &_recurrent_to_cell_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true)); - _input_to_output_reduction.configure(input_to_output_weights, &_input_to_output_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true)); - _recurrent_to_output_reduction.configure(recurrent_to_output_weights, &_recurrent_to_output_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true)); + + _input_to_forget_reduction = arm_compute::support::cpp14::make_unique(); + _recurrent_to_forget_reduction = arm_compute::support::cpp14::make_unique(); + _input_to_cell_reduction = arm_compute::support::cpp14::make_unique(); + _recurrent_to_cell_reduction = arm_compute::support::cpp14::make_unique(); + _input_to_output_reduction = arm_compute::support::cpp14::make_unique(); + _recurrent_to_output_reduction = arm_compute::support::cpp14::make_unique(); + + _recurrent_to_cell_reduction->configure(input_to_forget_weights, &_input_to_forget_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true)); + _recurrent_to_forget_reduction->configure(recurrent_to_forget_weights, &_recurrent_to_forget_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true)); + _input_to_cell_reduction->configure(input_to_cell_weights, &_input_to_cell_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true)); + _recurrent_to_cell_reduction->configure(recurrent_to_cell_weights, &_recurrent_to_cell_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true)); + _input_to_output_reduction->configure(input_to_output_weights, &_input_to_output_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true)); + _recurrent_to_output_reduction->configure(recurrent_to_output_weights, &_recurrent_to_output_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true)); if(_has_projection) { - _projection_reduction.configure(_projection_weights, &_projection_eff_bias, GEMMLowpReductionKernelInfo(output_size, false, lstm_params.hidden_state_zero(), true)); + _projection_reduction = arm_compute::support::cpp14::make_unique(); + _projection_reduction->configure(_projection_weights, &_projection_eff_bias, GEMMLowpReductionKernelInfo(output_size, false, lstm_params.hidden_state_zero(), true)); if(_projection_bias != nullptr) { _projection_bias_add.configure(_projection_bias, &_projection_eff_bias, &_projection_eff_bias, ConvertPolicy::SATURATE); @@ -878,7 +937,7 @@ Status NEQLSTMLayer::validate(const ITensorInfo *input, ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output_state_in, output_state_out); } - ARM_COMPUTE_RETURN_ON_ERROR(NECopyKernel::validate(output_state_out, output)); + ARM_COMPUTE_RETURN_ON_ERROR(NECopy::validate(output_state_out, output)); return Status{}; } @@ -906,7 +965,7 @@ void NEQLSTMLayer::run() if(_has_layer_norm) { - NEScheduler::get().schedule(&get_layer_norm(LayerNormGate::Forget), Window::DimY); + NEScheduler::get().schedule(get_layer_norm(LayerNormGate::Forget).get(), Window::DimY); } _forget_gate_sigmoid.run(); @@ -921,7 +980,7 @@ void NEQLSTMLayer::run() if(_has_layer_norm) { - NEScheduler::get().schedule(&get_layer_norm(LayerNormGate::Cell), Window::DimY); + NEScheduler::get().schedule(get_layer_norm(LayerNormGate::Cell).get(), Window::DimY); } _cell_gate_tanh.run(); @@ -948,7 +1007,7 @@ void NEQLSTMLayer::run() if(_has_layer_norm) { - NEScheduler::get().schedule(&get_layer_norm(LayerNormGate::Input), Window::DimY); + NEScheduler::get().schedule(get_layer_norm(LayerNormGate::Input).get(), Window::DimY); } _input_gate_sigmoid.run(); @@ -979,7 +1038,7 @@ void NEQLSTMLayer::run() if(_has_layer_norm) { - NEScheduler::get().schedule(&get_layer_norm(LayerNormGate::Output), Window::DimY); + NEScheduler::get().schedule(get_layer_norm(LayerNormGate::Output).get(), Window::DimY); } _output_gate_sigmoid.run(); @@ -1021,7 +1080,7 @@ void NEQLSTMLayer::run() } // Copy output_state_out to output - NEScheduler::get().schedule(&_copy_output, Window::DimY); + _copy_output.run(); } void NEQLSTMLayer::prepare() @@ -1051,8 +1110,8 @@ void NEQLSTMLayer::prepare() { _input_to_input_eff_bias.allocator()->allocate(); _recurrent_to_input_eff_bias.allocator()->allocate(); - NEScheduler::get().schedule(&_input_to_input_reduction, Window::DimY); - NEScheduler::get().schedule(&_recurrent_to_input_reduction, Window::DimY); + NEScheduler::get().schedule(_input_to_input_reduction.get(), Window::DimY); + NEScheduler::get().schedule(_recurrent_to_input_reduction.get(), Window::DimY); _input_to_input_weights_transposed.allocator()->allocate(); _recurrent_to_input_weights_transposed.allocator()->allocate(); @@ -1067,17 +1126,17 @@ void NEQLSTMLayer::prepare() _recurrent_to_cell_eff_bias.allocator()->allocate(); _input_to_output_eff_bias.allocator()->allocate(); _recurrent_to_output_eff_bias.allocator()->allocate(); - NEScheduler::get().schedule(&_input_to_forget_reduction, Window::DimY); - NEScheduler::get().schedule(&_recurrent_to_forget_reduction, Window::DimY); - NEScheduler::get().schedule(&_input_to_cell_reduction, Window::DimY); - NEScheduler::get().schedule(&_recurrent_to_cell_reduction, Window::DimY); - NEScheduler::get().schedule(&_input_to_output_reduction, Window::DimY); - NEScheduler::get().schedule(&_recurrent_to_output_reduction, Window::DimY); + NEScheduler::get().schedule(_input_to_forget_reduction.get(), Window::DimY); + NEScheduler::get().schedule(_recurrent_to_forget_reduction.get(), Window::DimY); + NEScheduler::get().schedule(_input_to_cell_reduction.get(), Window::DimY); + NEScheduler::get().schedule(_recurrent_to_cell_reduction.get(), Window::DimY); + NEScheduler::get().schedule(_input_to_output_reduction.get(), Window::DimY); + NEScheduler::get().schedule(_recurrent_to_output_reduction.get(), Window::DimY); if(_has_projection) { _projection_eff_bias.allocator()->allocate(); - NEScheduler::get().schedule(&_projection_reduction, Window::DimY); + NEScheduler::get().schedule(_projection_reduction.get(), Window::DimY); if(_projection_bias != nullptr) { _projection_bias_add.run(); @@ -1106,5 +1165,4 @@ void NEQLSTMLayer::prepare() _is_prepared = true; } } - } // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEQuantizationLayer.cpp b/src/runtime/NEON/functions/NEQuantizationLayer.cpp index c042705a72..a20ffb8858 100644 --- a/src/runtime/NEON/functions/NEQuantizationLayer.cpp +++ b/src/runtime/NEON/functions/NEQuantizationLayer.cpp @@ -26,6 +26,7 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/NEON/kernels/NEQuantizationLayerKernel.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/NEON/functions/NERNNLayer.cpp b/src/runtime/NEON/functions/NERNNLayer.cpp index b7415bd44c..a8e10482a7 100644 --- a/src/runtime/NEON/functions/NERNNLayer.cpp +++ b/src/runtime/NEON/functions/NERNNLayer.cpp @@ -30,9 +30,24 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h" +#include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h" +#include "src/core/NEON/kernels/NECopyKernel.h" +#include "src/core/NEON/kernels/NEFlattenLayerKernel.h" +#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h" +#include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h" +#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" +#include "support/MemorySupport.h" namespace arm_compute { +NERNNLayer::~NERNNLayer() = default; + NERNNLayer::NERNNLayer(std::shared_ptr memory_manager) : _memory_group(std::move(memory_manager)), _gemm_state_f(), _add_f(), _activation(), _fully_connected(memory_manager), _copy_kernel(), _fully_connected_out(), _gemm_output(), _add_output(), _is_prepared(false) @@ -99,7 +114,8 @@ void NERNNLayer::configure(const ITensor *input, const ITensor *weights, const I _activation.configure(&_add_output, hidden_state, info); _add_output.allocator()->allocate(); - _copy_kernel.configure(hidden_state, output); + _copy_kernel = arm_compute::support::cpp14::make_unique(); + _copy_kernel->configure(hidden_state, output); } void NERNNLayer::run() @@ -116,7 +132,7 @@ void NERNNLayer::run() _activation.run(); // copy hidden out to output - NEScheduler::get().schedule(&_copy_kernel, Window::DimY); + NEScheduler::get().schedule(_copy_kernel.get(), Window::DimY); } void NERNNLayer::prepare() diff --git a/src/runtime/NEON/functions/NEROIAlignLayer.cpp b/src/runtime/NEON/functions/NEROIAlignLayer.cpp index a3b116a55e..a046140551 100644 --- a/src/runtime/NEON/functions/NEROIAlignLayer.cpp +++ b/src/runtime/NEON/functions/NEROIAlignLayer.cpp @@ -23,7 +23,8 @@ */ #include "arm_compute/runtime/NEON/functions/NEROIAlignLayer.h" -#include "arm_compute/core/NEON/kernels/NEROIAlignLayerKernel.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NEROIAlignLayerKernel.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/NEON/functions/NEROIPoolingLayer.cpp b/src/runtime/NEON/functions/NEROIPoolingLayer.cpp index 4aecadbc09..8bcf152881 100644 --- a/src/runtime/NEON/functions/NEROIPoolingLayer.cpp +++ b/src/runtime/NEON/functions/NEROIPoolingLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,11 +24,14 @@ #include "arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h" #include "arm_compute/core/Helpers.h" -#include "arm_compute/core/NEON/kernels/NEROIPoolingLayerKernel.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEROIPoolingLayerKernel.h" +#include "support/MemorySupport.h" namespace arm_compute { +NEROIPoolingLayer::~NEROIPoolingLayer() = default; + NEROIPoolingLayer::NEROIPoolingLayer() : _roi_kernel() { @@ -36,11 +39,12 @@ NEROIPoolingLayer::NEROIPoolingLayer() void NEROIPoolingLayer::configure(const ITensor *input, const ITensor *rois, ITensor *output, const ROIPoolingLayerInfo &pool_info) { - _roi_kernel.configure(input, rois, output, pool_info); + _roi_kernel = arm_compute::support::cpp14::make_unique(); + _roi_kernel->configure(input, rois, output, pool_info); } void NEROIPoolingLayer::run() { - NEScheduler::get().schedule(&_roi_kernel, Window::DimX); + NEScheduler::get().schedule(_roi_kernel.get(), Window::DimX); } } // namespace arm_compute \ No newline at end of file diff --git a/src/runtime/NEON/functions/NERange.cpp b/src/runtime/NEON/functions/NERange.cpp index 138b458fab..ba166b2d58 100644 --- a/src/runtime/NEON/functions/NERange.cpp +++ b/src/runtime/NEON/functions/NERange.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,9 +24,13 @@ #include "arm_compute/runtime/NEON/functions/NERange.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NERangeKernel.h" +#include "support/MemorySupport.h" namespace arm_compute { +NERange::~NERange() = default; + NERange::NERange() : _kernel() { @@ -34,7 +38,8 @@ NERange::NERange() void NERange::configure(ITensor *output, const float start, const float end, const float step) { - _kernel.configure(output, start, end, step); + _kernel = arm_compute::support::cpp14::make_unique(); + _kernel->configure(output, start, end, step); } Status NERange::validate(const ITensorInfo *output, const float start, const float end, const float step) @@ -44,6 +49,6 @@ Status NERange::validate(const ITensorInfo *output, const float start, const flo void NERange::run() { - NEScheduler::get().schedule(&_kernel, Window::DimX); + NEScheduler::get().schedule(_kernel.get(), Window::DimX); } } // namespace arm_compute \ No newline at end of file diff --git a/src/runtime/NEON/functions/NEReduceMean.cpp b/src/runtime/NEON/functions/NEReduceMean.cpp index c3c5529c09..b50a925f44 100644 --- a/src/runtime/NEON/functions/NEReduceMean.cpp +++ b/src/runtime/NEON/functions/NEReduceMean.cpp @@ -28,6 +28,7 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/NEON/NEScheduler.h" #include "src/core/CPP/Validate.h" +#include "src/core/NEON/kernels/NEReductionOperationKernel.h" #include "src/core/helpers/AutoConfiguration.h" namespace arm_compute @@ -96,6 +97,8 @@ Status validate_config(const ITensorInfo *input, const Coordinates &reduction_ax } } // namespace +NEReduceMean::~NEReduceMean() = default; + NEReduceMean::NEReduceMean(std::shared_ptr memory_manager) : _memory_group(std::move(memory_manager)), _reduction_kernels(), _reduced_outs(), _reshape(), _dequant(), _requant(), _reduction_ops(), _keep_dims(), _do_requant(), _input_no_quant(), _output_no_quant() diff --git a/src/runtime/NEON/functions/NEReductionOperation.cpp b/src/runtime/NEON/functions/NEReductionOperation.cpp index 4938a56b3f..463b65ec28 100644 --- a/src/runtime/NEON/functions/NEReductionOperation.cpp +++ b/src/runtime/NEON/functions/NEReductionOperation.cpp @@ -26,7 +26,9 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEReductionOperationKernel.h" #include "src/core/helpers/AutoConfiguration.h" +#include "support/MemorySupport.h" namespace arm_compute { @@ -54,6 +56,8 @@ size_t reduction_window_split_dimension(unsigned int axis) } } // namespace +NEReductionOperation::~NEReductionOperation() = default; + NEReductionOperation::NEReductionOperation(std::shared_ptr memory_manager) : _memory_group(memory_manager), _reduction_kernel(), _reshape(), _output_internal(), _window_split(0), _reduction_axis(), _is_reshape_required(false) { @@ -125,7 +129,8 @@ void NEReductionOperation::configure(ITensor *input, ITensor *output, unsigned i ARM_COMPUTE_ERROR_THROW_ON(NEReductionOperation::validate(input->info(), output->info(), axis, op, keep_dims)); // Configure reduction kernel - _reduction_kernel.configure(input, output_internal, axis, op); + _reduction_kernel = arm_compute::support::cpp14::make_unique(); + _reduction_kernel->configure(input, output_internal, axis, op); _window_split = reduction_window_split_dimension(axis); _reduction_axis = axis; @@ -139,7 +144,7 @@ void NEReductionOperation::configure(ITensor *input, ITensor *output, unsigned i void NEReductionOperation::run() { MemoryGroupResourceScope scope_mg(_memory_group); - NEScheduler::get().schedule(&_reduction_kernel, _window_split); + NEScheduler::get().schedule(_reduction_kernel.get(), _window_split); if(_is_reshape_required) { _reshape.run(); diff --git a/src/runtime/NEON/functions/NERemap.cpp b/src/runtime/NEON/functions/NERemap.cpp index d4e7f838c6..9276d49cf5 100644 --- a/src/runtime/NEON/functions/NERemap.cpp +++ b/src/runtime/NEON/functions/NERemap.cpp @@ -25,17 +25,18 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/kernels/NERemapKernel.h" #include "arm_compute/core/PixelValue.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/TensorAllocator.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NERemapKernel.h" #include "support/MemorySupport.h" #include -using namespace arm_compute; - +namespace arm_compute +{ void NERemap::configure(ITensor *input, const ITensor *map_x, const ITensor *map_y, ITensor *output, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); @@ -45,9 +46,11 @@ void NERemap::configure(ITensor *input, const ITensor *map_x, const ITensor *map ARM_COMPUTE_ERROR_ON_MSG(policy == InterpolationPolicy::AREA, "Area interpolation is not supported"); auto k = arm_compute::support::cpp14::make_unique(); - k->configure(input, map_x, map_y, output, policy); - _kernel = std::move(k); - _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + + auto b = arm_compute::support::cpp14::make_unique(); + b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + _border_handler = std::move(b); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEReorgLayer.cpp b/src/runtime/NEON/functions/NEReorgLayer.cpp index dfe002a503..77ec7fbfb1 100644 --- a/src/runtime/NEON/functions/NEReorgLayer.cpp +++ b/src/runtime/NEON/functions/NEReorgLayer.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/NEON/functions/NEReorgLayer.h" -#include "arm_compute/core/NEON/kernels/NEReorgLayerKernel.h" +#include "src/core/NEON/kernels/NEReorgLayerKernel.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/NEON/functions/NEReshapeLayer.cpp b/src/runtime/NEON/functions/NEReshapeLayer.cpp index c1c88c1c7a..915d5d408f 100644 --- a/src/runtime/NEON/functions/NEReshapeLayer.cpp +++ b/src/runtime/NEON/functions/NEReshapeLayer.cpp @@ -23,10 +23,10 @@ */ #include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h" -#include "arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" #include "arm_compute/runtime/Types.h" +#include "src/core/NEON/kernels/NEReshapeLayerKernel.h" #include "support/MemorySupport.h" #include @@ -35,6 +35,8 @@ namespace arm_compute { namespace experimental { +NEReshape::~NEReshape() = default; + void NEReshape::configure(const ITensorInfo *input, ITensorInfo *output) { auto k = arm_compute::support::cpp14::make_unique(); diff --git a/src/runtime/NEON/functions/NEReverse.cpp b/src/runtime/NEON/functions/NEReverse.cpp index c60c84e897..3ed0688386 100644 --- a/src/runtime/NEON/functions/NEReverse.cpp +++ b/src/runtime/NEON/functions/NEReverse.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/NEON/functions/NEReverse.h" -#include "arm_compute/core/NEON/kernels/NEReverseKernel.h" +#include "src/core/NEON/kernels/NEReverseKernel.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/NEON/functions/NEScale.cpp b/src/runtime/NEON/functions/NEScale.cpp index bbf8343c2b..0290fe5a01 100644 --- a/src/runtime/NEON/functions/NEScale.cpp +++ b/src/runtime/NEON/functions/NEScale.cpp @@ -32,6 +32,7 @@ #include "arm_compute/core/Window.h" #include "arm_compute/runtime/NEON/NEScheduler.h" #include "arm_compute/runtime/TensorAllocator.h" +#include "src/core/NEON/kernels/NEScaleKernel.h" #include "src/core/utils/ScaleUtils.h" diff --git a/src/runtime/NEON/functions/NEScharr3x3.cpp b/src/runtime/NEON/functions/NEScharr3x3.cpp index bf787e1440..cea0eefdb0 100644 --- a/src/runtime/NEON/functions/NEScharr3x3.cpp +++ b/src/runtime/NEON/functions/NEScharr3x3.cpp @@ -23,8 +23,9 @@ */ #include "arm_compute/runtime/NEON/functions/NEScharr3x3.h" -#include "arm_compute/core/NEON/kernels/NEScharr3x3Kernel.h" #include "arm_compute/core/PixelValue.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NEScharr3x3Kernel.h" #include "support/MemorySupport.h" #include @@ -36,5 +37,8 @@ void NEScharr3x3::configure(ITensor *input, ITensor *output_x, ITensor *output_y auto k = arm_compute::support::cpp14::make_unique(); k->configure(input, output_x, output_y, border_mode == BorderMode::UNDEFINED); _kernel = std::move(k); - _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + + auto b = arm_compute::support::cpp14::make_unique(); + b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + _border_handler = std::move(b); } diff --git a/src/runtime/NEON/functions/NESelect.cpp b/src/runtime/NEON/functions/NESelect.cpp index 8def123c5d..0d1f490767 100644 --- a/src/runtime/NEON/functions/NESelect.cpp +++ b/src/runtime/NEON/functions/NESelect.cpp @@ -23,8 +23,8 @@ */ #include "arm_compute/runtime/NEON/functions/NESelect.h" -#include "arm_compute/core/NEON/kernels/NESelectKernel.h" #include "arm_compute/core/Types.h" +#include "src/core/NEON/kernels/NESelectKernel.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/NEON/functions/NESlice.cpp b/src/runtime/NEON/functions/NESlice.cpp index 2bacf2ee2a..dd56eaba8b 100644 --- a/src/runtime/NEON/functions/NESlice.cpp +++ b/src/runtime/NEON/functions/NESlice.cpp @@ -24,10 +24,10 @@ #include "arm_compute/runtime/NEON/functions/NESlice.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/kernels/NEStridedSliceKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/helpers/tensor_transform.h" +#include "src/core/NEON/kernels/NEStridedSliceKernel.h" #include "support/MemorySupport.h" diff --git a/src/runtime/NEON/functions/NESobel3x3.cpp b/src/runtime/NEON/functions/NESobel3x3.cpp index cfd68d70af..38d2dc227e 100644 --- a/src/runtime/NEON/functions/NESobel3x3.cpp +++ b/src/runtime/NEON/functions/NESobel3x3.cpp @@ -23,18 +23,23 @@ */ #include "arm_compute/runtime/NEON/functions/NESobel3x3.h" -#include "arm_compute/core/NEON/kernels/NESobel3x3Kernel.h" #include "arm_compute/core/PixelValue.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NESobel3x3Kernel.h" #include "support/MemorySupport.h" #include -using namespace arm_compute; - +namespace arm_compute +{ void NESobel3x3::configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value) { auto k = arm_compute::support::cpp14::make_unique(); k->configure(input, output_x, output_y, border_mode == BorderMode::UNDEFINED); _kernel = std::move(k); - _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + + auto b = arm_compute::support::cpp14::make_unique(); + b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + _border_handler = std::move(b); } +} // namespace arm_compute \ No newline at end of file diff --git a/src/runtime/NEON/functions/NESobel5x5.cpp b/src/runtime/NEON/functions/NESobel5x5.cpp index 092c510bcf..e631fb3ed7 100644 --- a/src/runtime/NEON/functions/NESobel5x5.cpp +++ b/src/runtime/NEON/functions/NESobel5x5.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -29,8 +29,13 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" #include "arm_compute/runtime/TensorAllocator.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NESobel5x5Kernel.h" +#include "support/MemorySupport.h" -using namespace arm_compute; +namespace arm_compute +{ +NESobel5x5::~NESobel5x5() = default; NESobel5x5::NESobel5x5(std::shared_ptr memory_manager) : _memory_group(std::move(memory_manager)), _sobel_hor(), _sobel_vert(), _tmp_x(), _tmp_y(), _border_handler() @@ -46,14 +51,18 @@ void NESobel5x5::configure(ITensor *input, ITensor *output_x, ITensor *output_y, TensorInfo tensor_info(input->info()->tensor_shape(), Format::S16); + _sobel_hor = arm_compute::support::cpp14::make_unique(); + _sobel_vert = arm_compute::support::cpp14::make_unique(); + _border_handler = arm_compute::support::cpp14::make_unique(); + if(run_sobel_x && run_sobel_y) { _tmp_x.allocator()->init(tensor_info); _tmp_y.allocator()->init(tensor_info); _memory_group.manage(&_tmp_x); _memory_group.manage(&_tmp_y); - _sobel_hor.configure(input, &_tmp_x, &_tmp_y, border_mode == BorderMode::UNDEFINED); - _sobel_vert.configure(&_tmp_x, &_tmp_y, output_x, output_y, border_mode == BorderMode::UNDEFINED); + _sobel_hor->configure(input, &_tmp_x, &_tmp_y, border_mode == BorderMode::UNDEFINED); + _sobel_vert->configure(&_tmp_x, &_tmp_y, output_x, output_y, border_mode == BorderMode::UNDEFINED); _tmp_x.allocator()->allocate(); _tmp_y.allocator()->allocate(); } @@ -61,28 +70,29 @@ void NESobel5x5::configure(ITensor *input, ITensor *output_x, ITensor *output_y, { _tmp_x.allocator()->init(tensor_info); _memory_group.manage(&_tmp_x); - _sobel_hor.configure(input, &_tmp_x, nullptr, border_mode == BorderMode::UNDEFINED); - _sobel_vert.configure(&_tmp_x, nullptr, output_x, nullptr, border_mode == BorderMode::UNDEFINED); + _sobel_hor->configure(input, &_tmp_x, nullptr, border_mode == BorderMode::UNDEFINED); + _sobel_vert->configure(&_tmp_x, nullptr, output_x, nullptr, border_mode == BorderMode::UNDEFINED); _tmp_x.allocator()->allocate(); } else if(run_sobel_y) { _tmp_y.allocator()->init(tensor_info); _memory_group.manage(&_tmp_y); - _sobel_hor.configure(input, nullptr, &_tmp_y, border_mode == BorderMode::UNDEFINED); - _sobel_vert.configure(nullptr, &_tmp_y, nullptr, output_y, border_mode == BorderMode::UNDEFINED); + _sobel_hor->configure(input, nullptr, &_tmp_y, border_mode == BorderMode::UNDEFINED); + _sobel_vert->configure(nullptr, &_tmp_y, nullptr, output_y, border_mode == BorderMode::UNDEFINED); _tmp_y.allocator()->allocate(); } - _border_handler.configure(input, _sobel_hor.border_size(), border_mode, PixelValue(constant_border_value)); + _border_handler->configure(input, _sobel_hor->border_size(), border_mode, PixelValue(constant_border_value)); } void NESobel5x5::run() { - NEScheduler::get().schedule(&_border_handler, Window::DimZ); + NEScheduler::get().schedule(_border_handler.get(), Window::DimZ); MemoryGroupResourceScope scope_mg(_memory_group); - NEScheduler::get().schedule(&_sobel_hor, Window::DimY); - NEScheduler::get().schedule(&_sobel_vert, Window::DimY); + NEScheduler::get().schedule(_sobel_hor.get(), Window::DimY); + NEScheduler::get().schedule(_sobel_vert.get(), Window::DimY); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NESobel7x7.cpp b/src/runtime/NEON/functions/NESobel7x7.cpp index 87ec81f7b0..bc5f87c1ec 100644 --- a/src/runtime/NEON/functions/NESobel7x7.cpp +++ b/src/runtime/NEON/functions/NESobel7x7.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -29,8 +29,13 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" #include "arm_compute/runtime/TensorAllocator.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NESobel7x7Kernel.h" +#include "support/MemorySupport.h" -using namespace arm_compute; +namespace arm_compute +{ +NESobel7x7::~NESobel7x7() = default; NESobel7x7::NESobel7x7(std::shared_ptr memory_manager) : _memory_group(std::move(memory_manager)), _sobel_hor(), _sobel_vert(), _tmp_x(), _tmp_y(), _border_handler() @@ -45,6 +50,9 @@ void NESobel7x7::configure(ITensor *input, ITensor *output_x, ITensor *output_y, const bool run_sobel_y = output_y != nullptr; TensorInfo tensor_info(input->info()->tensor_shape(), Format::S32); + _sobel_hor = arm_compute::support::cpp14::make_unique(); + _sobel_vert = arm_compute::support::cpp14::make_unique(); + _border_handler = arm_compute::support::cpp14::make_unique(); if(run_sobel_x && run_sobel_y) { @@ -52,8 +60,8 @@ void NESobel7x7::configure(ITensor *input, ITensor *output_x, ITensor *output_y, _tmp_y.allocator()->init(tensor_info); _memory_group.manage(&_tmp_x); _memory_group.manage(&_tmp_y); - _sobel_hor.configure(input, &_tmp_x, &_tmp_y, border_mode == BorderMode::UNDEFINED); - _sobel_vert.configure(&_tmp_x, &_tmp_y, output_x, output_y, border_mode == BorderMode::UNDEFINED); + _sobel_hor->configure(input, &_tmp_x, &_tmp_y, border_mode == BorderMode::UNDEFINED); + _sobel_vert->configure(&_tmp_x, &_tmp_y, output_x, output_y, border_mode == BorderMode::UNDEFINED); _tmp_x.allocator()->allocate(); _tmp_y.allocator()->allocate(); } @@ -61,28 +69,29 @@ void NESobel7x7::configure(ITensor *input, ITensor *output_x, ITensor *output_y, { _tmp_x.allocator()->init(tensor_info); _memory_group.manage(&_tmp_x); - _sobel_hor.configure(input, &_tmp_x, nullptr, border_mode == BorderMode::UNDEFINED); - _sobel_vert.configure(&_tmp_x, nullptr, output_x, nullptr, border_mode == BorderMode::UNDEFINED); + _sobel_hor->configure(input, &_tmp_x, nullptr, border_mode == BorderMode::UNDEFINED); + _sobel_vert->configure(&_tmp_x, nullptr, output_x, nullptr, border_mode == BorderMode::UNDEFINED); _tmp_x.allocator()->allocate(); } else if(run_sobel_y) { _tmp_y.allocator()->init(tensor_info); _memory_group.manage(&_tmp_y); - _sobel_hor.configure(input, nullptr, &_tmp_y, border_mode == BorderMode::UNDEFINED); - _sobel_vert.configure(nullptr, &_tmp_y, nullptr, output_y, border_mode == BorderMode::UNDEFINED); + _sobel_hor->configure(input, nullptr, &_tmp_y, border_mode == BorderMode::UNDEFINED); + _sobel_vert->configure(nullptr, &_tmp_y, nullptr, output_y, border_mode == BorderMode::UNDEFINED); _tmp_y.allocator()->allocate(); } - _border_handler.configure(input, _sobel_hor.border_size(), border_mode, PixelValue(constant_border_value)); + _border_handler->configure(input, _sobel_hor->border_size(), border_mode, PixelValue(constant_border_value)); } void NESobel7x7::run() { - NEScheduler::get().schedule(&_border_handler, Window::DimZ); + NEScheduler::get().schedule(_border_handler.get(), Window::DimZ); MemoryGroupResourceScope scope_mg(_memory_group); - NEScheduler::get().schedule(&_sobel_hor, Window::DimY); - NEScheduler::get().schedule(&_sobel_vert, Window::DimY); + NEScheduler::get().schedule(_sobel_hor.get(), Window::DimY); + NEScheduler::get().schedule(_sobel_vert.get(), Window::DimY); } +} // namespace arm_compute \ No newline at end of file diff --git a/src/runtime/NEON/functions/NESoftmaxLayer.cpp b/src/runtime/NEON/functions/NESoftmaxLayer.cpp index 4f773861d2..e79ab0ee2d 100644 --- a/src/runtime/NEON/functions/NESoftmaxLayer.cpp +++ b/src/runtime/NEON/functions/NESoftmaxLayer.cpp @@ -24,13 +24,19 @@ #include "arm_compute/runtime/NEON/functions/NESoftmaxLayer.h" #include "arm_compute/core/Helpers.h" -#include "arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NESoftmaxLayerKernel.h" +#include "src/core/NEON/kernels/NESoftmaxLayerKernel.h" #include "src/core/helpers/SoftmaxHelpers.h" +#include "support/MemorySupport.h" namespace arm_compute { +template +NESoftmaxLayerGeneric::~NESoftmaxLayerGeneric() = default; + template NESoftmaxLayerGeneric::NESoftmaxLayerGeneric(std::shared_ptr memory_manager) : _memory_group(std::move(memory_manager)), _permute_input(), _permute_output(), _max_kernel(), _softmax_kernel(), _fill_border_kernel(), _max(), _tmp(), _input_permuted(), _output_permuted(), @@ -76,15 +82,17 @@ void NESoftmaxLayerGeneric::configure(ITensor *input, ITensor *output, f _memory_group.manage(&_max); _memory_group.manage(&_tmp); - // Configure Kernels - _max_kernel.configure(tmp_input, &_max); + // Configure kernels + _max_kernel = arm_compute::support::cpp14::make_unique(); + _softmax_kernel = arm_compute::support::cpp14::make_unique>(); + _max_kernel->configure(tmp_input, &_max); if(_needs_permute) { // Add to the memory manager _output_permuted _memory_group.manage(&_output_permuted); // The normalization kernel stores the result in a permuted output tensor - _softmax_kernel.configure(tmp_input, &_max, &_output_permuted, beta, &_tmp); + _softmax_kernel->configure(tmp_input, &_max, &_output_permuted, beta, &_tmp); _input_permuted.allocator()->allocate(); // Re-permute the permuted output into the requested (4D) output @@ -96,8 +104,9 @@ void NESoftmaxLayerGeneric::configure(ITensor *input, ITensor *output, f else { // Softmax 2D case - _fill_border_kernel.configure(tmp_input, _max_kernel.border_size(), BorderMode::REPLICATE); - _softmax_kernel.configure(tmp_input, &_max, output, beta, &_tmp); + _fill_border_kernel = arm_compute::support::cpp14::make_unique(); + _fill_border_kernel->configure(tmp_input, _max_kernel->border_size(), BorderMode::REPLICATE); + _softmax_kernel->configure(tmp_input, &_max, output, beta, &_tmp); } // Allocate intermediate buffers @@ -152,10 +161,13 @@ void NESoftmaxLayerGeneric::run() { _permute_input.run(); } + else + { + NEScheduler::get().schedule(_fill_border_kernel.get(), Window::DimY); + } - NEScheduler::get().schedule(&_fill_border_kernel, Window::DimY); - NEScheduler::get().schedule(&_max_kernel, Window::DimY); - NEScheduler::get().schedule(&_softmax_kernel, Window::DimY); + NEScheduler::get().schedule(_max_kernel.get(), Window::DimY); + NEScheduler::get().schedule(_softmax_kernel.get(), Window::DimY); if(_needs_permute) { diff --git a/src/runtime/NEON/functions/NESpaceToBatchLayer.cpp b/src/runtime/NEON/functions/NESpaceToBatchLayer.cpp index 97e793f6fb..516e8d604c 100644 --- a/src/runtime/NEON/functions/NESpaceToBatchLayer.cpp +++ b/src/runtime/NEON/functions/NESpaceToBatchLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -29,9 +29,14 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEMemsetKernel.h" +#include "src/core/NEON/kernels/NESpaceToBatchLayerKernel.h" +#include "support/MemorySupport.h" namespace arm_compute { +NESpaceToBatchLayer::~NESpaceToBatchLayer() = default; + NESpaceToBatchLayer::NESpaceToBatchLayer() : _space_to_batch_kernel(), _memset_kernel(), _has_padding(false) { @@ -43,10 +48,12 @@ void NESpaceToBatchLayer::configure(const ITensor *input, const ITensor *block_s if(input->info()->tensor_shape().total_size() != output->info()->tensor_shape().total_size()) { - _has_padding = true; - _memset_kernel.configure(output, PixelValue(0, input->info()->data_type(), input->info()->quantization_info())); + _has_padding = true; + _memset_kernel = arm_compute::support::cpp14::make_unique(); + _memset_kernel->configure(output, PixelValue(0, input->info()->data_type(), input->info()->quantization_info())); } - _space_to_batch_kernel.configure(input, block_shape, paddings, output); + _space_to_batch_kernel = arm_compute::support::cpp14::make_unique(); + _space_to_batch_kernel->configure(input, block_shape, paddings, output); } void NESpaceToBatchLayer::configure(const ITensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, ITensor *output) @@ -55,10 +62,12 @@ void NESpaceToBatchLayer::configure(const ITensor *input, const int block_shape_ if(input->info()->tensor_shape().total_size() != output->info()->tensor_shape().total_size()) { - _has_padding = true; - _memset_kernel.configure(output, PixelValue(0, input->info()->data_type(), input->info()->quantization_info())); + _has_padding = true; + _memset_kernel = arm_compute::support::cpp14::make_unique(); + _memset_kernel->configure(output, PixelValue(0, input->info()->data_type(), input->info()->quantization_info())); } - _space_to_batch_kernel.configure(input, block_shape_x, block_shape_y, padding_left, padding_right, output); + _space_to_batch_kernel = arm_compute::support::cpp14::make_unique(); + _space_to_batch_kernel->configure(input, block_shape_x, block_shape_y, padding_left, padding_right, output); } Status NESpaceToBatchLayer::validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *paddings, const ITensorInfo *output) @@ -81,8 +90,8 @@ void NESpaceToBatchLayer::run() // Zero out output only if we have paddings if(_has_padding) { - NEScheduler::get().schedule(&_memset_kernel, Window::DimY); + NEScheduler::get().schedule(_memset_kernel.get(), Window::DimY); } - NEScheduler::get().schedule(&_space_to_batch_kernel, Window::DimY); + NEScheduler::get().schedule(_space_to_batch_kernel.get(), Window::DimY); } } // namespace arm_compute diff --git a/src/runtime/NEON/functions/NESpaceToDepthLayer.cpp b/src/runtime/NEON/functions/NESpaceToDepthLayer.cpp index 3e1ec80687..a834600199 100644 --- a/src/runtime/NEON/functions/NESpaceToDepthLayer.cpp +++ b/src/runtime/NEON/functions/NESpaceToDepthLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -29,9 +29,13 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NESpaceToDepthLayerKernel.h" +#include "support/MemorySupport.h" namespace arm_compute { +NESpaceToDepthLayer::~NESpaceToDepthLayer() = default; + NESpaceToDepthLayer::NESpaceToDepthLayer() : _space_to_depth_kernel() { @@ -40,7 +44,8 @@ NESpaceToDepthLayer::NESpaceToDepthLayer() void NESpaceToDepthLayer::configure(const ITensor *input, ITensor *output, int32_t block_shape) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - _space_to_depth_kernel.configure(input, output, block_shape); + _space_to_depth_kernel = arm_compute::support::cpp14::make_unique(); + _space_to_depth_kernel->configure(input, output, block_shape); } Status NESpaceToDepthLayer::validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape) @@ -51,6 +56,6 @@ Status NESpaceToDepthLayer::validate(const ITensorInfo *input, const ITensorInfo void NESpaceToDepthLayer::run() { - NEScheduler::get().schedule(&_space_to_depth_kernel, Window::DimY); + NEScheduler::get().schedule(_space_to_depth_kernel.get(), Window::DimY); } } // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEStackLayer.cpp b/src/runtime/NEON/functions/NEStackLayer.cpp index a99a95ab2a..e38ff6bee7 100644 --- a/src/runtime/NEON/functions/NEStackLayer.cpp +++ b/src/runtime/NEON/functions/NEStackLayer.cpp @@ -30,9 +30,13 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEStackLayerKernel.h" +#include "support/MemorySupport.h" namespace arm_compute { +NEStackLayer::~NEStackLayer() = default; + NEStackLayer::NEStackLayer() // NOLINT : _input(), _stack_kernels(), @@ -50,7 +54,8 @@ void NEStackLayer::configure(const std::vector &input, int axis, ITen for(unsigned int i = 0; i < _num_inputs; i++) { - _stack_kernels[i].configure(input[i], axis_u, i, _num_inputs, output); + _stack_kernels[i] = arm_compute::support::cpp14::make_unique(); + _stack_kernels[i]->configure(input[i], axis_u, i, _num_inputs, output); } } @@ -80,7 +85,7 @@ void NEStackLayer::run() { for(unsigned i = 0; i < _num_inputs; i++) { - NEScheduler::get().schedule(&_stack_kernels[i], Window::DimY); + NEScheduler::get().schedule(_stack_kernels[i].get(), Window::DimY); } } } // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEStridedSlice.cpp b/src/runtime/NEON/functions/NEStridedSlice.cpp index 8bf81e8270..308b856ec6 100644 --- a/src/runtime/NEON/functions/NEStridedSlice.cpp +++ b/src/runtime/NEON/functions/NEStridedSlice.cpp @@ -24,8 +24,8 @@ #include "arm_compute/runtime/NEON/functions/NEStridedSlice.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/kernels/NEStridedSliceKernel.h" #include "arm_compute/core/Types.h" +#include "src/core/NEON/kernels/NEStridedSliceKernel.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/NEON/functions/NETableLookup.cpp b/src/runtime/NEON/functions/NETableLookup.cpp index b8d765f76b..9295bf0ece 100644 --- a/src/runtime/NEON/functions/NETableLookup.cpp +++ b/src/runtime/NEON/functions/NETableLookup.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/NEON/functions/NETableLookup.h" -#include "arm_compute/core/NEON/kernels/NETableLookupKernel.h" +#include "src/core/NEON/kernels/NETableLookupKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/NEON/functions/NEThreshold.cpp b/src/runtime/NEON/functions/NEThreshold.cpp index e21511ed65..2f1e3047b5 100644 --- a/src/runtime/NEON/functions/NEThreshold.cpp +++ b/src/runtime/NEON/functions/NEThreshold.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/NEON/functions/NEThreshold.h" -#include "arm_compute/core/NEON/kernels/NEThresholdKernel.h" +#include "src/core/NEON/kernels/NEThresholdKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/NEON/functions/NETile.cpp b/src/runtime/NEON/functions/NETile.cpp index 6fda3a5ba6..6a1e20ddf8 100644 --- a/src/runtime/NEON/functions/NETile.cpp +++ b/src/runtime/NEON/functions/NETile.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/NEON/functions/NETile.h" -#include "arm_compute/core/NEON/kernels/NETileKernel.h" +#include "src/core/NEON/kernels/NETileKernel.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/NEON/functions/NETranspose.cpp b/src/runtime/NEON/functions/NETranspose.cpp index 88d1672173..5af417f4ed 100644 --- a/src/runtime/NEON/functions/NETranspose.cpp +++ b/src/runtime/NEON/functions/NETranspose.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/NEON/functions/NETranspose.h" -#include "arm_compute/core/NEON/kernels/NETransposeKernel.h" +#include "src/core/NEON/kernels/NETransposeKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/NEON/functions/NEUpsampleLayer.cpp b/src/runtime/NEON/functions/NEUpsampleLayer.cpp index 58c050f904..aae58387e2 100644 --- a/src/runtime/NEON/functions/NEUpsampleLayer.cpp +++ b/src/runtime/NEON/functions/NEUpsampleLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,10 +23,13 @@ */ #include "arm_compute/runtime/NEON/functions/NEUpsampleLayer.h" -#include "arm_compute/core/NEON/kernels/NEUpsampleLayerKernel.h" +#include "src/core/NEON/kernels/NEUpsampleLayerKernel.h" +#include "support/MemorySupport.h" namespace arm_compute { +NEUpsampleLayer::~NEUpsampleLayer() = default; + NEUpsampleLayer::NEUpsampleLayer() : _kernel(), _data_layout() { @@ -41,12 +44,13 @@ Status NEUpsampleLayer::validate(const ITensorInfo *input, const ITensorInfo *ou void NEUpsampleLayer::configure(const ITensor *input, ITensor *output, const Size2D &info, const InterpolationPolicy &policy) { _data_layout = input->info()->data_layout(); - _kernel.configure(input, output, info, policy); + _kernel = arm_compute::support::cpp14::make_unique(); + _kernel->configure(input, output, info, policy); } void NEUpsampleLayer::run() { const auto win = (_data_layout == DataLayout::NCHW) ? Window::DimZ : Window::DimX; - NEScheduler::get().schedule(&_kernel, win); + NEScheduler::get().schedule(_kernel.get(), win); } } // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEWarpAffine.cpp b/src/runtime/NEON/functions/NEWarpAffine.cpp index ec2c6883ba..b5dbfe0d5c 100644 --- a/src/runtime/NEON/functions/NEWarpAffine.cpp +++ b/src/runtime/NEON/functions/NEWarpAffine.cpp @@ -24,8 +24,9 @@ #include "arm_compute/runtime/NEON/functions/NEWarpAffine.h" #include "arm_compute/core/Error.h" -#include "arm_compute/core/NEON/kernels/NEWarpKernel.h" #include "arm_compute/core/Validate.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NEWarpKernel.h" #include "support/MemorySupport.h" #include @@ -58,5 +59,7 @@ void NEWarpAffine::configure(ITensor *input, ITensor *output, const std::arrayborder_size(), border_mode, constant_border_value); + auto b = arm_compute::support::cpp14::make_unique(); + b->configure(input, _kernel->border_size(), border_mode, constant_border_value); + _border_handler = std::move(b); } diff --git a/src/runtime/NEON/functions/NEWarpPerspective.cpp b/src/runtime/NEON/functions/NEWarpPerspective.cpp index bf361b8ab9..8d42121005 100644 --- a/src/runtime/NEON/functions/NEWarpPerspective.cpp +++ b/src/runtime/NEON/functions/NEWarpPerspective.cpp @@ -24,14 +24,15 @@ #include "arm_compute/runtime/NEON/functions/NEWarpPerspective.h" #include "arm_compute/core/Error.h" -#include "arm_compute/core/NEON/kernels/NEWarpKernel.h" #include "arm_compute/core/Validate.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NEWarpKernel.h" #include "support/MemorySupport.h" #include -using namespace arm_compute; - +namespace arm_compute +{ void NEWarpPerspective::configure(ITensor *input, ITensor *output, const std::array &matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); @@ -58,5 +59,8 @@ void NEWarpPerspective::configure(ITensor *input, ITensor *output, const std::ar ARM_COMPUTE_ERROR("Interpolation type not supported"); } - _border_handler.configure(input, _kernel->border_size(), border_mode, constant_border_value); + auto b = arm_compute::support::cpp14::make_unique(); + b->configure(input, _kernel->border_size(), border_mode, constant_border_value); + _border_handler = std::move(b); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp b/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp index 23b9f60c38..1cb2458e13 100644 --- a/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp @@ -30,6 +30,10 @@ #include "arm_compute/runtime/NEON/NEScheduler.h" #include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" #include "src/core/CPP/Validate.h" +#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" +#include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h" +#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" #include "src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h" #include "support/MemorySupport.h" diff --git a/src/runtime/NEON/functions/NEYOLOLayer.cpp b/src/runtime/NEON/functions/NEYOLOLayer.cpp index 233afb727a..5cad53bffd 100644 --- a/src/runtime/NEON/functions/NEYOLOLayer.cpp +++ b/src/runtime/NEON/functions/NEYOLOLayer.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/NEON/functions/NEYOLOLayer.h" -#include "arm_compute/core/NEON/kernels/NEYOLOLayerKernel.h" +#include "src/core/NEON/kernels/NEYOLOLayerKernel.h" #include "support/MemorySupport.h" namespace arm_compute -- cgit v1.2.1