aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/NEON/functions
diff options
context:
space:
mode:
authorMichalis Spyrou <michalis.spyrou@arm.com>2020-10-21 00:04:14 +0100
committerMichalis Spyrou <michalis.spyrou@arm.com>2020-11-03 15:10:47 +0000
commitebcebf1dee7f8314976b1e0cabd62b4cf893d765 (patch)
tree95d3e691a0e88a3e213a1d30446a9224497f2055 /src/runtime/NEON/functions
parentda4b1b2055d96aaf73704eb9b0b82d74dc2d699c (diff)
downloadComputeLibrary-ebcebf1dee7f8314976b1e0cabd62b4cf893d765.tar.gz
COMPMID-3638: Move NEON kernels
Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com> Change-Id: Ieed3e4bc8be7fef80c90c5094599b477a56fc473 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4285 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/runtime/NEON/functions')
-rw-r--r--src/runtime/NEON/functions/NEAbsoluteDifference.cpp7
-rw-r--r--src/runtime/NEON/functions/NEAccumulate.cpp11
-rw-r--r--src/runtime/NEON/functions/NEActivationLayer.cpp4
-rw-r--r--src/runtime/NEON/functions/NEArgMinMaxLayer.cpp3
-rw-r--r--src/runtime/NEON/functions/NEArithmeticAddition.cpp4
-rw-r--r--src/runtime/NEON/functions/NEArithmeticSubtraction.cpp2
-rw-r--r--src/runtime/NEON/functions/NEBatchNormalizationLayer.cpp11
-rw-r--r--src/runtime/NEON/functions/NEBatchToSpaceLayer.cpp1
-rw-r--r--src/runtime/NEON/functions/NEBitwiseAnd.cpp2
-rw-r--r--src/runtime/NEON/functions/NEBitwiseNot.cpp2
-rw-r--r--src/runtime/NEON/functions/NEBitwiseOr.cpp2
-rw-r--r--src/runtime/NEON/functions/NEBitwiseXor.cpp2
-rw-r--r--src/runtime/NEON/functions/NEBoundingBoxTransform.cpp1
-rw-r--r--src/runtime/NEON/functions/NEBox3x3.cpp12
-rw-r--r--src/runtime/NEON/functions/NECannyEdge.cpp31
-rw-r--r--src/runtime/NEON/functions/NECast.cpp2
-rw-r--r--src/runtime/NEON/functions/NEChannelCombine.cpp2
-rw-r--r--src/runtime/NEON/functions/NEChannelExtract.cpp2
-rw-r--r--src/runtime/NEON/functions/NEChannelShuffleLayer.cpp2
-rw-r--r--src/runtime/NEON/functions/NECol2Im.cpp2
-rw-r--r--src/runtime/NEON/functions/NEColorConvert.cpp2
-rw-r--r--src/runtime/NEON/functions/NEComputeAllAnchors.cpp1
-rw-r--r--src/runtime/NEON/functions/NEConcatenateLayer.cpp8
-rw-r--r--src/runtime/NEON/functions/NEConvertFullyConnectedWeights.cpp11
-rw-r--r--src/runtime/NEON/functions/NEConvolution.cpp48
-rw-r--r--src/runtime/NEON/functions/NEConvolutionLayer.cpp21
-rw-r--r--src/runtime/NEON/functions/NECopy.cpp4
-rw-r--r--src/runtime/NEON/functions/NECropResize.cpp3
-rw-r--r--src/runtime/NEON/functions/NEDeconvolutionLayer.cpp1
-rw-r--r--src/runtime/NEON/functions/NEDepthConvertLayer.cpp2
-rw-r--r--src/runtime/NEON/functions/NEDepthToSpaceLayer.cpp1
-rw-r--r--src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp14
-rw-r--r--src/runtime/NEON/functions/NEDequantizationLayer.cpp2
-rw-r--r--src/runtime/NEON/functions/NEDerivative.cpp22
-rw-r--r--src/runtime/NEON/functions/NEDilate.cpp8
-rw-r--r--src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp23
-rw-r--r--src/runtime/NEON/functions/NEElementwiseOperators.cpp2
-rw-r--r--src/runtime/NEON/functions/NEElementwiseUnaryLayer.cpp2
-rw-r--r--src/runtime/NEON/functions/NEEqualizeHistogram.cpp28
-rw-r--r--src/runtime/NEON/functions/NEErode.cpp13
-rw-r--r--src/runtime/NEON/functions/NEFFT1D.cpp21
-rw-r--r--src/runtime/NEON/functions/NEFFT2D.cpp5
-rw-r--r--src/runtime/NEON/functions/NEFFTConvolutionLayer.cpp7
-rw-r--r--src/runtime/NEON/functions/NEFastCorners.cpp35
-rw-r--r--src/runtime/NEON/functions/NEFill.cpp1
-rw-r--r--src/runtime/NEON/functions/NEFillBorder.cpp9
-rw-r--r--src/runtime/NEON/functions/NEFlattenLayer.cpp2
-rw-r--r--src/runtime/NEON/functions/NEFloor.cpp2
-rw-r--r--src/runtime/NEON/functions/NEFullyConnectedLayer.cpp21
-rw-r--r--src/runtime/NEON/functions/NEFuseBatchNormalization.cpp11
-rw-r--r--src/runtime/NEON/functions/NEGEMM.cpp32
-rw-r--r--src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp33
-rw-r--r--src/runtime/NEON/functions/NEGEMMInterleave4x4.cpp2
-rw-r--r--src/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.cpp11
-rw-r--r--src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp75
-rw-r--r--src/runtime/NEON/functions/NEGEMMLowpOutputStage.cpp16
-rw-r--r--src/runtime/NEON/functions/NEGEMMTranspose1xW.cpp2
-rw-r--r--src/runtime/NEON/functions/NEGather.cpp2
-rw-r--r--src/runtime/NEON/functions/NEGaussian3x3.cpp13
-rw-r--r--src/runtime/NEON/functions/NEGaussian5x5.cpp27
-rw-r--r--src/runtime/NEON/functions/NEGaussianPyramid.cpp32
-rw-r--r--src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp63
-rw-r--r--src/runtime/NEON/functions/NEHOGDescriptor.cpp21
-rw-r--r--src/runtime/NEON/functions/NEHOGDetector.cpp7
-rw-r--r--src/runtime/NEON/functions/NEHOGGradient.cpp9
-rw-r--r--src/runtime/NEON/functions/NEHOGMultiDetection.cpp8
-rw-r--r--src/runtime/NEON/functions/NEHarrisCorners.cpp22
-rw-r--r--src/runtime/NEON/functions/NEHistogram.cpp12
-rw-r--r--src/runtime/NEON/functions/NEIm2Col.cpp9
-rw-r--r--src/runtime/NEON/functions/NEInstanceNormalizationLayer.cpp12
-rw-r--r--src/runtime/NEON/functions/NEIntegralImage.cpp13
-rw-r--r--src/runtime/NEON/functions/NEL2NormalizeLayer.cpp11
-rw-r--r--src/runtime/NEON/functions/NELSTMLayer.cpp22
-rw-r--r--src/runtime/NEON/functions/NELSTMLayerQuantized.cpp11
-rw-r--r--src/runtime/NEON/functions/NELaplacianPyramid.cpp9
-rw-r--r--src/runtime/NEON/functions/NELaplacianReconstruct.cpp6
-rw-r--r--src/runtime/NEON/functions/NELocallyConnectedLayer.cpp36
-rw-r--r--src/runtime/NEON/functions/NEMagnitude.cpp7
-rw-r--r--src/runtime/NEON/functions/NEMaxUnpoolingLayer.cpp15
-rw-r--r--src/runtime/NEON/functions/NEMeanStdDev.cpp21
-rw-r--r--src/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.cpp4
-rw-r--r--src/runtime/NEON/functions/NEMedian3x3.cpp13
-rw-r--r--src/runtime/NEON/functions/NEMinMaxLocation.cpp22
-rw-r--r--src/runtime/NEON/functions/NENonLinearFilter.cpp13
-rw-r--r--src/runtime/NEON/functions/NENonMaximaSuppression3x3.cpp14
-rw-r--r--src/runtime/NEON/functions/NENormalizationLayer.cpp9
-rw-r--r--src/runtime/NEON/functions/NEOpticalFlow.cpp22
-rw-r--r--src/runtime/NEON/functions/NEPReluLayer.cpp2
-rw-r--r--src/runtime/NEON/functions/NEPadLayer.cpp15
-rw-r--r--src/runtime/NEON/functions/NEPermute.cpp2
-rw-r--r--src/runtime/NEON/functions/NEPhase.cpp7
-rw-r--r--src/runtime/NEON/functions/NEPixelWiseMultiplication.cpp2
-rw-r--r--src/runtime/NEON/functions/NEPoolingLayer.cpp21
-rw-r--r--src/runtime/NEON/functions/NEPriorBoxLayer.cpp1
-rw-r--r--src/runtime/NEON/functions/NEQLSTMLayer.cpp108
-rw-r--r--src/runtime/NEON/functions/NEQuantizationLayer.cpp1
-rw-r--r--src/runtime/NEON/functions/NERNNLayer.cpp20
-rw-r--r--src/runtime/NEON/functions/NEROIAlignLayer.cpp3
-rw-r--r--src/runtime/NEON/functions/NEROIPoolingLayer.cpp12
-rw-r--r--src/runtime/NEON/functions/NERange.cpp11
-rw-r--r--src/runtime/NEON/functions/NEReduceMean.cpp3
-rw-r--r--src/runtime/NEON/functions/NEReductionOperation.cpp9
-rw-r--r--src/runtime/NEON/functions/NERemap.cpp15
-rw-r--r--src/runtime/NEON/functions/NEReorgLayer.cpp2
-rw-r--r--src/runtime/NEON/functions/NEReshapeLayer.cpp4
-rw-r--r--src/runtime/NEON/functions/NEReverse.cpp2
-rw-r--r--src/runtime/NEON/functions/NEScale.cpp1
-rw-r--r--src/runtime/NEON/functions/NEScharr3x3.cpp8
-rw-r--r--src/runtime/NEON/functions/NESelect.cpp2
-rw-r--r--src/runtime/NEON/functions/NESlice.cpp2
-rw-r--r--src/runtime/NEON/functions/NESobel3x3.cpp13
-rw-r--r--src/runtime/NEON/functions/NESobel5x5.cpp34
-rw-r--r--src/runtime/NEON/functions/NESobel7x7.cpp33
-rw-r--r--src/runtime/NEON/functions/NESoftmaxLayer.cpp30
-rw-r--r--src/runtime/NEON/functions/NESpaceToBatchLayer.cpp27
-rw-r--r--src/runtime/NEON/functions/NESpaceToDepthLayer.cpp11
-rw-r--r--src/runtime/NEON/functions/NEStackLayer.cpp9
-rw-r--r--src/runtime/NEON/functions/NEStridedSlice.cpp2
-rw-r--r--src/runtime/NEON/functions/NETableLookup.cpp2
-rw-r--r--src/runtime/NEON/functions/NEThreshold.cpp2
-rw-r--r--src/runtime/NEON/functions/NETile.cpp2
-rw-r--r--src/runtime/NEON/functions/NETranspose.cpp2
-rw-r--r--src/runtime/NEON/functions/NEUpsampleLayer.cpp12
-rw-r--r--src/runtime/NEON/functions/NEWarpAffine.cpp7
-rw-r--r--src/runtime/NEON/functions/NEWarpPerspective.cpp12
-rw-r--r--src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp4
-rw-r--r--src/runtime/NEON/functions/NEYOLOLayer.cpp2
127 files changed, 1090 insertions, 458 deletions
diff --git a/src/runtime/NEON/functions/NEAbsoluteDifference.cpp b/src/runtime/NEON/functions/NEAbsoluteDifference.cpp
index ec27820126..df2bc7d72e 100644
--- a/src/runtime/NEON/functions/NEAbsoluteDifference.cpp
+++ b/src/runtime/NEON/functions/NEAbsoluteDifference.cpp
@@ -23,12 +23,14 @@
*/
#include "arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h"
-#include "arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h"
+#include "src/core/NEON/kernels/NEAbsoluteDifferenceKernel.h"
#include "support/MemorySupport.h"
#include <utility>
-using namespace arm_compute;
+namespace arm_compute
+{
+NEAbsoluteDifference::~NEAbsoluteDifference() = default;
void NEAbsoluteDifference::configure(const ITensor *input1, const ITensor *input2, ITensor *output)
{
@@ -36,3 +38,4 @@ void NEAbsoluteDifference::configure(const ITensor *input1, const ITensor *input
k->configure(input1, input2, output);
_kernel = std::move(k);
}
+} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEAccumulate.cpp b/src/runtime/NEON/functions/NEAccumulate.cpp
index 662f8ccb5b..20eefd9d2d 100644
--- a/src/runtime/NEON/functions/NEAccumulate.cpp
+++ b/src/runtime/NEON/functions/NEAccumulate.cpp
@@ -23,12 +23,14 @@
*/
#include "arm_compute/runtime/NEON/functions/NEAccumulate.h"
-#include "arm_compute/core/NEON/kernels/NEAccumulateKernel.h"
+#include "src/core/NEON/kernels/NEAccumulateKernel.h"
#include "support/MemorySupport.h"
#include <utility>
-using namespace arm_compute;
+namespace arm_compute
+{
+NEAccumulate::~NEAccumulate() = default;
void NEAccumulate::configure(const ITensor *input, ITensor *output)
{
@@ -37,6 +39,8 @@ void NEAccumulate::configure(const ITensor *input, ITensor *output)
_kernel = std::move(k);
}
+NEAccumulateWeighted::~NEAccumulateWeighted() = default;
+
void NEAccumulateWeighted::configure(const ITensor *input, float alpha, ITensor *output, bool use_fp16)
{
if(use_fp16)
@@ -53,9 +57,12 @@ void NEAccumulateWeighted::configure(const ITensor *input, float alpha, ITensor
}
}
+NEAccumulateSquared::~NEAccumulateSquared() = default;
+
void NEAccumulateSquared::configure(const ITensor *input, uint32_t shift, ITensor *output)
{
auto k = arm_compute::support::cpp14::make_unique<NEAccumulateSquaredKernel>();
k->configure(input, shift, output);
_kernel = std::move(k);
}
+} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEActivationLayer.cpp b/src/runtime/NEON/functions/NEActivationLayer.cpp
index 7f55edbf70..f9ad298e4d 100644
--- a/src/runtime/NEON/functions/NEActivationLayer.cpp
+++ b/src/runtime/NEON/functions/NEActivationLayer.cpp
@@ -24,16 +24,18 @@
#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
#include "arm_compute/core/Error.h"
-#include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h"
#include "arm_compute/core/experimental/Types.h"
#include "arm_compute/runtime/IRuntimeContext.h"
#include "arm_compute/runtime/Tensor.h"
+#include "src/core/NEON/kernels/NEActivationLayerKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
{
namespace experimental
{
+NEActivationLayer::~NEActivationLayer() = default;
+
void NEActivationLayer::configure(const ITensorInfo *input, ITensorInfo *output, const ActivationLayerInfo &activation_info)
{
auto k = arm_compute::support::cpp14::make_unique<NEActivationLayerKernel>();
diff --git a/src/runtime/NEON/functions/NEArgMinMaxLayer.cpp b/src/runtime/NEON/functions/NEArgMinMaxLayer.cpp
index 70bbba62ad..2a9bb76c7f 100644
--- a/src/runtime/NEON/functions/NEArgMinMaxLayer.cpp
+++ b/src/runtime/NEON/functions/NEArgMinMaxLayer.cpp
@@ -29,11 +29,14 @@
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/NEON/kernels/NEReductionOperationKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
{
+NEArgMinMaxLayer::~NEArgMinMaxLayer() = default;
+
NEArgMinMaxLayer::NEArgMinMaxLayer(std::shared_ptr<IMemoryManager> memory_manager)
: _reduction_function(support::cpp14::make_unique<NEReductionOperation>())
{
diff --git a/src/runtime/NEON/functions/NEArithmeticAddition.cpp b/src/runtime/NEON/functions/NEArithmeticAddition.cpp
index 4453a015e8..0bf9a09333 100644
--- a/src/runtime/NEON/functions/NEArithmeticAddition.cpp
+++ b/src/runtime/NEON/functions/NEArithmeticAddition.cpp
@@ -24,7 +24,7 @@
#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h"
+#include "src/core/NEON/kernels/NEArithmeticAdditionKernel.h"
#include "support/MemorySupport.h"
#include <utility>
@@ -33,6 +33,8 @@ namespace arm_compute
{
namespace experimental
{
+NEArithmeticAddition::~NEArithmeticAddition() = default;
+
void NEArithmeticAddition::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info)
{
ARM_COMPUTE_UNUSED(act_info);
diff --git a/src/runtime/NEON/functions/NEArithmeticSubtraction.cpp b/src/runtime/NEON/functions/NEArithmeticSubtraction.cpp
index 1c95bbfae8..ba3f426269 100644
--- a/src/runtime/NEON/functions/NEArithmeticSubtraction.cpp
+++ b/src/runtime/NEON/functions/NEArithmeticSubtraction.cpp
@@ -24,7 +24,7 @@
#include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h"
+#include "src/core/NEON/kernels/NEArithmeticSubtractionKernel.h"
#include "support/MemorySupport.h"
#include <utility>
diff --git a/src/runtime/NEON/functions/NEBatchNormalizationLayer.cpp b/src/runtime/NEON/functions/NEBatchNormalizationLayer.cpp
index eab40ac5be..d0fdfcf101 100644
--- a/src/runtime/NEON/functions/NEBatchNormalizationLayer.cpp
+++ b/src/runtime/NEON/functions/NEBatchNormalizationLayer.cpp
@@ -29,10 +29,13 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEBatchNormalizationLayerKernel.h"
#include "support/MemorySupport.h"
-using namespace arm_compute;
+namespace arm_compute
+{
+NEBatchNormalizationLayer::~NEBatchNormalizationLayer() = default;
NEBatchNormalizationLayer::NEBatchNormalizationLayer()
: _norm_kernel()
@@ -43,7 +46,8 @@ void NEBatchNormalizationLayer::configure(ITensor *input, ITensor *output, const
ActivationLayerInfo act_info)
{
// Configure kernel
- _norm_kernel.configure(input, output, mean, var, beta, gamma, epsilon, act_info);
+ _norm_kernel = arm_compute::support::cpp14::make_unique<NEBatchNormalizationLayerKernel>();
+ _norm_kernel->configure(input, output, mean, var, beta, gamma, epsilon, act_info);
}
Status NEBatchNormalizationLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *mean, const ITensorInfo *var, const ITensorInfo *beta, const ITensorInfo *gamma,
@@ -55,5 +59,6 @@ Status NEBatchNormalizationLayer::validate(const ITensorInfo *input, const ITens
void NEBatchNormalizationLayer::run()
{
- NEScheduler::get().schedule(&_norm_kernel, Window::DimY);
+ NEScheduler::get().schedule(_norm_kernel.get(), Window::DimY);
}
+} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEBatchToSpaceLayer.cpp b/src/runtime/NEON/functions/NEBatchToSpaceLayer.cpp
index 2705cffe68..77a63c0f63 100644
--- a/src/runtime/NEON/functions/NEBatchToSpaceLayer.cpp
+++ b/src/runtime/NEON/functions/NEBatchToSpaceLayer.cpp
@@ -28,6 +28,7 @@
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/NEON/kernels/NEBatchToSpaceLayerKernel.h"
#include "support/MemorySupport.h"
diff --git a/src/runtime/NEON/functions/NEBitwiseAnd.cpp b/src/runtime/NEON/functions/NEBitwiseAnd.cpp
index 1d89308565..f3b5220ccf 100644
--- a/src/runtime/NEON/functions/NEBitwiseAnd.cpp
+++ b/src/runtime/NEON/functions/NEBitwiseAnd.cpp
@@ -23,7 +23,7 @@
*/
#include "arm_compute/runtime/NEON/functions/NEBitwiseAnd.h"
-#include "arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h"
+#include "src/core/NEON/kernels/NEBitwiseAndKernel.h"
#include "support/MemorySupport.h"
#include <utility>
diff --git a/src/runtime/NEON/functions/NEBitwiseNot.cpp b/src/runtime/NEON/functions/NEBitwiseNot.cpp
index 585b059005..036584ea1a 100644
--- a/src/runtime/NEON/functions/NEBitwiseNot.cpp
+++ b/src/runtime/NEON/functions/NEBitwiseNot.cpp
@@ -23,7 +23,7 @@
*/
#include "arm_compute/runtime/NEON/functions/NEBitwiseNot.h"
-#include "arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h"
+#include "src/core/NEON/kernels/NEBitwiseNotKernel.h"
#include "support/MemorySupport.h"
#include <utility>
diff --git a/src/runtime/NEON/functions/NEBitwiseOr.cpp b/src/runtime/NEON/functions/NEBitwiseOr.cpp
index bba866d97a..fc905a0919 100644
--- a/src/runtime/NEON/functions/NEBitwiseOr.cpp
+++ b/src/runtime/NEON/functions/NEBitwiseOr.cpp
@@ -23,7 +23,7 @@
*/
#include "arm_compute/runtime/NEON/functions/NEBitwiseOr.h"
-#include "arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h"
+#include "src/core/NEON/kernels/NEBitwiseOrKernel.h"
#include "support/MemorySupport.h"
#include <utility>
diff --git a/src/runtime/NEON/functions/NEBitwiseXor.cpp b/src/runtime/NEON/functions/NEBitwiseXor.cpp
index 188fe3d9ef..301a0c4659 100644
--- a/src/runtime/NEON/functions/NEBitwiseXor.cpp
+++ b/src/runtime/NEON/functions/NEBitwiseXor.cpp
@@ -23,7 +23,7 @@
*/
#include "arm_compute/runtime/NEON/functions/NEBitwiseXor.h"
-#include "arm_compute/core/NEON/kernels/NEBitwiseXorKernel.h"
+#include "src/core/NEON/kernels/NEBitwiseXorKernel.h"
#include "support/MemorySupport.h"
#include <utility>
diff --git a/src/runtime/NEON/functions/NEBoundingBoxTransform.cpp b/src/runtime/NEON/functions/NEBoundingBoxTransform.cpp
index b1ecfaf314..0b639430b1 100644
--- a/src/runtime/NEON/functions/NEBoundingBoxTransform.cpp
+++ b/src/runtime/NEON/functions/NEBoundingBoxTransform.cpp
@@ -22,6 +22,7 @@
* SOFTWARE.
*/
#include "arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h"
+#include "src/core/NEON/kernels/NEBoundingBoxTransformKernel.h"
#include "support/MemorySupport.h"
diff --git a/src/runtime/NEON/functions/NEBox3x3.cpp b/src/runtime/NEON/functions/NEBox3x3.cpp
index a380377daa..01d2356a4c 100644
--- a/src/runtime/NEON/functions/NEBox3x3.cpp
+++ b/src/runtime/NEON/functions/NEBox3x3.cpp
@@ -23,14 +23,15 @@
*/
#include "arm_compute/runtime/NEON/functions/NEBox3x3.h"
-#include "arm_compute/core/NEON/kernels/NEBox3x3Kernel.h"
#include "arm_compute/core/PixelValue.h"
+#include "src/core/NEON/kernels/NEBox3x3Kernel.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
#include "support/MemorySupport.h"
#include <utility>
-using namespace arm_compute;
-
+namespace arm_compute
+{
void NEBox3x3::configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value, bool use_fp16)
{
if(use_fp16)
@@ -45,5 +46,8 @@ void NEBox3x3::configure(ITensor *input, ITensor *output, BorderMode border_mode
k->configure(input, output, border_mode == BorderMode::UNDEFINED);
_kernel = std::move(k);
}
- _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+ auto b = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+ b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler = std::move(b);
}
+} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NECannyEdge.cpp b/src/runtime/NEON/functions/NECannyEdge.cpp
index d7ec52c5ac..bf4f7d7933 100644
--- a/src/runtime/NEON/functions/NECannyEdge.cpp
+++ b/src/runtime/NEON/functions/NECannyEdge.cpp
@@ -25,8 +25,6 @@
#include "arm_compute/core/Error.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/kernels/NECannyEdgeKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
@@ -34,13 +32,19 @@
#include "arm_compute/runtime/NEON/functions/NESobel5x5.h"
#include "arm_compute/runtime/NEON/functions/NESobel7x7.h"
#include "arm_compute/runtime/TensorAllocator.h"
+#include "src/core/NEON/kernels/NECannyEdgeKernel.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NESobel5x5Kernel.h"
+#include "src/core/NEON/kernels/NESobel7x7Kernel.h"
#include "support/MemorySupport.h"
#include <cstring>
#include <inttypes.h>
#include <utility>
-using namespace arm_compute;
+namespace arm_compute
+{
+NECannyEdge::~NECannyEdge() = default;
NECannyEdge::NECannyEdge(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
: _memory_group(std::move(memory_manager)),
@@ -139,21 +143,25 @@ void NECannyEdge::configure(ITensor *input, ITensor *output, int32_t upper_thr,
_memory_group.manage(&_nonmax);
// Configure non-maxima suppression
- _non_max_suppr.configure(&_magnitude, &_phase, &_nonmax, upper_thr, lower_thr, border_mode == BorderMode::UNDEFINED);
+ _non_max_suppr = arm_compute::support::cpp14::make_unique<NEEdgeNonMaxSuppressionKernel>();
+ _non_max_suppr->configure(&_magnitude, &_phase, &_nonmax, upper_thr, lower_thr, border_mode == BorderMode::UNDEFINED);
// Fill border around magnitude image as non-maxima suppression will access
// it. If border mode is undefined filling the border is a nop.
- _border_mag_gradient.configure(&_magnitude, _non_max_suppr.border_size(), border_mode, constant_border_value);
+ _border_mag_gradient = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+ _border_mag_gradient->configure(&_magnitude, _non_max_suppr->border_size(), border_mode, constant_border_value);
// Allocate intermediate tensors
_phase.allocator()->allocate();
_magnitude.allocator()->allocate();
// Configure edge tracing
- _edge_trace.configure(&_nonmax, output);
+ _edge_trace = arm_compute::support::cpp14::make_unique<NEEdgeTraceKernel>();
+ _edge_trace->configure(&_nonmax, output);
// Fill border with "No edge" to stop recursion in edge trace
- _border_edge_trace.configure(&_nonmax, _edge_trace.border_size(), BorderMode::CONSTANT, static_cast<float>(0.f));
+ _border_edge_trace = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+ _border_edge_trace->configure(&_nonmax, _edge_trace->border_size(), BorderMode::CONSTANT, static_cast<float>(0.f));
// Allocate intermediate tensors
_nonmax.allocator()->allocate();
@@ -172,17 +180,18 @@ void NECannyEdge::run()
NEScheduler::get().schedule(_gradient.get(), Window::DimY);
// Fill border before non-maxima suppression. Nop for border mode undefined.
- NEScheduler::get().schedule(&_border_mag_gradient, Window::DimZ);
+ NEScheduler::get().schedule(_border_mag_gradient.get(), Window::DimZ);
// Run non-maxima suppression
- NEScheduler::get().schedule(&_non_max_suppr, Window::DimY);
+ NEScheduler::get().schedule(_non_max_suppr.get(), Window::DimY);
ARM_COMPUTE_ERROR_ON(_output->buffer() == nullptr);
std::fill_n(_output->buffer(), _output->info()->total_size(), 0);
// Fill border before edge trace
- NEScheduler::get().schedule(&_border_edge_trace, Window::DimZ);
+ NEScheduler::get().schedule(_border_edge_trace.get(), Window::DimZ);
// Run edge tracing
- NEScheduler::get().schedule(&_edge_trace, Window::DimY);
+ NEScheduler::get().schedule(_edge_trace.get(), Window::DimY);
}
+} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NECast.cpp b/src/runtime/NEON/functions/NECast.cpp
index 4b35110417..7fd2605fd2 100644
--- a/src/runtime/NEON/functions/NECast.cpp
+++ b/src/runtime/NEON/functions/NECast.cpp
@@ -24,8 +24,8 @@
#include "arm_compute/runtime/NEON/functions/NECast.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h"
#include "arm_compute/core/TensorInfo.h"
+#include "src/core/NEON/kernels/NEDepthConvertLayerKernel.h"
#include "support/MemorySupport.h"
#include <utility>
diff --git a/src/runtime/NEON/functions/NEChannelCombine.cpp b/src/runtime/NEON/functions/NEChannelCombine.cpp
index e987951097..f8a9be0313 100644
--- a/src/runtime/NEON/functions/NEChannelCombine.cpp
+++ b/src/runtime/NEON/functions/NEChannelCombine.cpp
@@ -23,7 +23,7 @@
*/
#include "arm_compute/runtime/NEON/functions/NEChannelCombine.h"
-#include "arm_compute/core/NEON/kernels/NEChannelCombineKernel.h"
+#include "src/core/NEON/kernels/NEChannelCombineKernel.h"
#include "support/MemorySupport.h"
#include <utility>
diff --git a/src/runtime/NEON/functions/NEChannelExtract.cpp b/src/runtime/NEON/functions/NEChannelExtract.cpp
index d78a8f8301..8f5e4d47d9 100644
--- a/src/runtime/NEON/functions/NEChannelExtract.cpp
+++ b/src/runtime/NEON/functions/NEChannelExtract.cpp
@@ -23,7 +23,7 @@
*/
#include "arm_compute/runtime/NEON/functions/NEChannelExtract.h"
-#include "arm_compute/core/NEON/kernels/NEChannelExtractKernel.h"
+#include "src/core/NEON/kernels/NEChannelExtractKernel.h"
#include "support/MemorySupport.h"
#include <utility>
diff --git a/src/runtime/NEON/functions/NEChannelShuffleLayer.cpp b/src/runtime/NEON/functions/NEChannelShuffleLayer.cpp
index 0392a92663..c72dec67ee 100644
--- a/src/runtime/NEON/functions/NEChannelShuffleLayer.cpp
+++ b/src/runtime/NEON/functions/NEChannelShuffleLayer.cpp
@@ -23,8 +23,8 @@
*/
#include "arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h"
-#include "arm_compute/core/NEON/kernels/NEChannelShuffleLayerKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/NEON/kernels/NEChannelShuffleLayerKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
diff --git a/src/runtime/NEON/functions/NECol2Im.cpp b/src/runtime/NEON/functions/NECol2Im.cpp
index e4fe36fd25..0706125157 100644
--- a/src/runtime/NEON/functions/NECol2Im.cpp
+++ b/src/runtime/NEON/functions/NECol2Im.cpp
@@ -23,7 +23,7 @@
*/
#include "arm_compute/runtime/NEON/functions/NECol2Im.h"
-#include "arm_compute/core/NEON/kernels/NECol2ImKernel.h"
+#include "src/core/NEON/kernels/NECol2ImKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEColorConvert.cpp b/src/runtime/NEON/functions/NEColorConvert.cpp
index 7befac7aa3..ebdd1046ce 100644
--- a/src/runtime/NEON/functions/NEColorConvert.cpp
+++ b/src/runtime/NEON/functions/NEColorConvert.cpp
@@ -23,7 +23,7 @@
*/
#include "arm_compute/runtime/NEON/functions/NEColorConvert.h"
-#include "arm_compute/core/NEON/kernels/NEColorConvertKernel.h"
+#include "src/core/NEON/kernels/NEColorConvertKernel.h"
#include "support/MemorySupport.h"
#include <utility>
diff --git a/src/runtime/NEON/functions/NEComputeAllAnchors.cpp b/src/runtime/NEON/functions/NEComputeAllAnchors.cpp
index cb89117ff9..3f5712dd3a 100644
--- a/src/runtime/NEON/functions/NEComputeAllAnchors.cpp
+++ b/src/runtime/NEON/functions/NEComputeAllAnchors.cpp
@@ -23,6 +23,7 @@
*/
#include "arm_compute/runtime/NEON/functions/NEComputeAllAnchors.h"
+#include "src/core/NEON/kernels/NEGenerateProposalsLayerKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEConcatenateLayer.cpp b/src/runtime/NEON/functions/NEConcatenateLayer.cpp
index 72bd9e6b19..03a01aec6b 100644
--- a/src/runtime/NEON/functions/NEConcatenateLayer.cpp
+++ b/src/runtime/NEON/functions/NEConcatenateLayer.cpp
@@ -23,10 +23,10 @@
*/
#include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h"
-#include "arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h"
+#include "src/core/NEON/kernels/NEBatchConcatenateLayerKernel.h"
+#include "src/core/NEON/kernels/NEDepthConcatenateLayerKernel.h"
+#include "src/core/NEON/kernels/NEHeightConcatenateLayerKernel.h"
+#include "src/core/NEON/kernels/NEWidthConcatenateLayerKernel.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
diff --git a/src/runtime/NEON/functions/NEConvertFullyConnectedWeights.cpp b/src/runtime/NEON/functions/NEConvertFullyConnectedWeights.cpp
index f697efb367..291afe0273 100644
--- a/src/runtime/NEON/functions/NEConvertFullyConnectedWeights.cpp
+++ b/src/runtime/NEON/functions/NEConvertFullyConnectedWeights.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -22,9 +22,13 @@
* SOFTWARE.
*/
#include "arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h"
+#include "src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
+NEConvertFullyConnectedWeights::~NEConvertFullyConnectedWeights() = default;
+
NEConvertFullyConnectedWeights::NEConvertFullyConnectedWeights()
: _kernel()
{
@@ -33,7 +37,8 @@ NEConvertFullyConnectedWeights::NEConvertFullyConnectedWeights()
void NEConvertFullyConnectedWeights::configure(const ITensor *input, ITensor *output, const TensorShape &original_input_shape,
DataLayout data_layout)
{
- _kernel.configure(input, output, original_input_shape, data_layout);
+ _kernel = arm_compute::support::cpp14::make_unique<NEConvertFullyConnectedWeightsKernel>();
+ _kernel->configure(input, output, original_input_shape, data_layout);
}
Status NEConvertFullyConnectedWeights::validate(const ITensorInfo *input, const ITensorInfo *output, const TensorShape &original_input_shape,
@@ -44,6 +49,6 @@ Status NEConvertFullyConnectedWeights::validate(const ITensorInfo *input, const
void NEConvertFullyConnectedWeights::run()
{
- NEScheduler::get().schedule(&_kernel, Window::DimZ);
+ NEScheduler::get().schedule(_kernel.get(), Window::DimZ);
}
} // namespace arm_compute \ No newline at end of file
diff --git a/src/runtime/NEON/functions/NEConvolution.cpp b/src/runtime/NEON/functions/NEConvolution.cpp
index 8200a08ca8..07ac8bd42b 100644
--- a/src/runtime/NEON/functions/NEConvolution.cpp
+++ b/src/runtime/NEON/functions/NEConvolution.cpp
@@ -25,29 +25,39 @@
#include "arm_compute/core/Error.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/kernels/NEConvolutionKernel.h"
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
#include "arm_compute/runtime/TensorAllocator.h"
+#include "src/core/NEON/kernels/NEConvolutionKernel.h"
+#include "src/core/NEON/kernels/NEConvolutionKernel.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
#include "support/MemorySupport.h"
#include <array>
#include <utility>
-using namespace arm_compute;
+namespace arm_compute
+{
+NEConvolution3x3::~NEConvolution3x3() = default;
void NEConvolution3x3::configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value)
{
auto k = arm_compute::support::cpp14::make_unique<NEConvolution3x3Kernel>();
k->configure(input, output, conv, scale, border_mode == BorderMode::UNDEFINED);
_kernel = std::move(k);
- _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+
+ auto b = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+ b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler = std::move(b);
}
template <unsigned int matrix_size>
+NEConvolutionSquare<matrix_size>::~NEConvolutionSquare() = default;
+
+template <unsigned int matrix_size>
NEConvolutionSquare<matrix_size>::NEConvolutionSquare(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)), _tmp(), _is_separable(false), _kernel_hor(), _kernel_vert(), _kernel(), _border_handler()
{
@@ -66,6 +76,7 @@ void NEConvolutionSquare<matrix_size>::configure(ITensor *input, ITensor *output
_is_separable = separate_matrix(conv, conv_col.data(), conv_row.data(), matrix_size);
+ auto b = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
if(_is_separable)
{
DataType intermediate_type = DataType::UNKNOWN;
@@ -82,35 +93,40 @@ void NEConvolutionSquare<matrix_size>::configure(ITensor *input, ITensor *output
scale = calculate_matrix_scale(conv, matrix_size);
}
- _kernel_hor.configure(input, &_tmp, conv_row.data(), border_mode == BorderMode::UNDEFINED);
- _kernel_vert.configure(&_tmp, output, conv_col.data(), scale, border_mode == BorderMode::UNDEFINED);
+ _kernel_hor = arm_compute::support::cpp14::make_unique<NESeparableConvolutionHorKernel<matrix_size>>();
+ _kernel_vert = arm_compute::support::cpp14::make_unique<NESeparableConvolutionVertKernel<matrix_size>>();
+
+ _kernel_hor->configure(input, &_tmp, conv_row.data(), border_mode == BorderMode::UNDEFINED);
+ _kernel_vert->configure(&_tmp, output, conv_col.data(), scale, border_mode == BorderMode::UNDEFINED);
_tmp.allocator()->allocate();
- _border_handler.configure(input, _kernel_hor.border_size(), border_mode, PixelValue(constant_border_value));
+ b->configure(input, _kernel_hor->border_size(), border_mode, PixelValue(constant_border_value));
}
else
{
- _kernel.configure(input, output, conv, scale, border_mode == BorderMode::UNDEFINED);
- _border_handler.configure(input, _kernel.border_size(), border_mode, PixelValue(constant_border_value));
+ _kernel = arm_compute::support::cpp14::make_unique<NEConvolutionKernel<matrix_size>>();
+ _kernel->configure(input, output, conv, scale, border_mode == BorderMode::UNDEFINED);
+ b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
}
+ _border_handler = std::move(b);
}
template <unsigned int matrix_size>
void NEConvolutionSquare<matrix_size>::run()
{
- NEScheduler::get().schedule(&_border_handler, Window::DimZ);
+ NEScheduler::get().schedule(_border_handler.get(), Window::DimZ);
if(_is_separable)
{
MemoryGroupResourceScope scope_mg(_memory_group);
- NEScheduler::get().schedule(&_kernel_hor, Window::DimY);
- NEScheduler::get().schedule(&_kernel_vert, Window::DimY);
+ NEScheduler::get().schedule(_kernel_hor.get(), Window::DimY);
+ NEScheduler::get().schedule(_kernel_vert.get(), Window::DimY);
}
else
{
- NEScheduler::get().schedule(&_kernel, Window::DimY);
+ NEScheduler::get().schedule(_kernel.get(), Window::DimY);
}
}
@@ -118,10 +134,16 @@ template class arm_compute::NEConvolutionSquare<5>;
template class arm_compute::NEConvolutionSquare<7>;
template class arm_compute::NEConvolutionSquare<9>;
+NEConvolutionRectangle::~NEConvolutionRectangle() = default;
+
void NEConvolutionRectangle::configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t rows, uint32_t cols, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value)
{
auto k = arm_compute::support::cpp14::make_unique<NEConvolutionRectangleKernel>();
k->configure(input, output, conv, rows, cols, scale, border_mode == BorderMode::UNDEFINED);
_kernel = std::move(k);
- _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+
+ auto b = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+ b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler = std::move(b);
}
+} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEConvolutionLayer.cpp b/src/runtime/NEON/functions/NEConvolutionLayer.cpp
index 491425c487..901b1e880e 100644
--- a/src/runtime/NEON/functions/NEConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEConvolutionLayer.cpp
@@ -27,6 +27,27 @@
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NECol2ImKernel.h"
+#include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h"
+#include "src/core/NEON/kernels/NECopyKernel.h"
+#include "src/core/NEON/kernels/NEDirectConvolutionLayerKernel.h"
+#include "src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h"
+#include "src/core/NEON/kernels/NEFFTDigitReverseKernel.h"
+#include "src/core/NEON/kernels/NEFFTRadixStageKernel.h"
+#include "src/core/NEON/kernels/NEFFTScaleKernel.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h"
+#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
+#include "src/core/NEON/kernels/NEIm2ColKernel.h"
+#include "src/core/NEON/kernels/NEPadLayerKernel.h"
+#include "src/core/NEON/kernels/NEReductionOperationKernel.h"
+#include "src/core/NEON/kernels/NEWeightsReshapeKernel.h"
#include "support/MemorySupport.h"
#include <cmath>
diff --git a/src/runtime/NEON/functions/NECopy.cpp b/src/runtime/NEON/functions/NECopy.cpp
index a461c18894..9e7bf40559 100644
--- a/src/runtime/NEON/functions/NECopy.cpp
+++ b/src/runtime/NEON/functions/NECopy.cpp
@@ -23,13 +23,15 @@
*/
#include "arm_compute/runtime/NEON/functions/NECopy.h"
-#include "arm_compute/core/NEON/kernels/NECopyKernel.h"
+#include "src/core/NEON/kernels/NECopyKernel.h"
#include "support/MemorySupport.h"
#include <utility>
namespace arm_compute
{
+NECopy::~NECopy() = default;
+
void NECopy::configure(ITensor *input, ITensor *output)
{
auto k = arm_compute::support::cpp14::make_unique<NECopyKernel>();
diff --git a/src/runtime/NEON/functions/NECropResize.cpp b/src/runtime/NEON/functions/NECropResize.cpp
index f8f99169aa..2e2d2251b6 100644
--- a/src/runtime/NEON/functions/NECropResize.cpp
+++ b/src/runtime/NEON/functions/NECropResize.cpp
@@ -24,6 +24,7 @@
#include "arm_compute/runtime/NEON/NEScheduler.h"
#include "arm_compute/runtime/NEON/functions/NECropResize.h"
+#include "src/core/NEON/kernels/NECropKernel.h"
#include "support/MemorySupport.h"
@@ -31,6 +32,8 @@
namespace arm_compute
{
+NECropResize::~NECropResize() = default;
+
NECropResize::NECropResize()
: _output(nullptr), _num_boxes(0), _method(), _extrapolation_value(0), _crop(), _scale(), _crop_results(), _scaled_results()
{
diff --git a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
index cb9ab168a7..2b5b0082c4 100644
--- a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
@@ -28,6 +28,7 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEWeightsReshapeKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
using namespace arm_compute::misc::shape_calculator;
diff --git a/src/runtime/NEON/functions/NEDepthConvertLayer.cpp b/src/runtime/NEON/functions/NEDepthConvertLayer.cpp
index 1ffcca0d7f..af0f5efb69 100644
--- a/src/runtime/NEON/functions/NEDepthConvertLayer.cpp
+++ b/src/runtime/NEON/functions/NEDepthConvertLayer.cpp
@@ -23,7 +23,7 @@
*/
#include "arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h"
-#include "arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h"
+#include "src/core/NEON/kernels/NEDepthConvertLayerKernel.h"
#include "support/MemorySupport.h"
#include <utility>
diff --git a/src/runtime/NEON/functions/NEDepthToSpaceLayer.cpp b/src/runtime/NEON/functions/NEDepthToSpaceLayer.cpp
index 0aaa37ec92..c4f15e3b68 100644
--- a/src/runtime/NEON/functions/NEDepthToSpaceLayer.cpp
+++ b/src/runtime/NEON/functions/NEDepthToSpaceLayer.cpp
@@ -28,6 +28,7 @@
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/NEON/kernels/NEDepthToSpaceLayerKernel.h"
#include "support/MemorySupport.h"
diff --git a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
index 6c22523bcb..fc97279211 100644
--- a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
@@ -27,6 +27,8 @@
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h"
+#include "support/MemorySupport.h"
using namespace arm_compute::misc;
using namespace arm_compute::misc::shape_calculator;
@@ -69,10 +71,11 @@ Status validate_arguments_optimized(const ITensorInfo *input, const ITensorInfo
}
} // namespace
+NEDepthwiseConvolutionLayer::~NEDepthwiseConvolutionLayer() = default;
+
NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::NEDepthwiseConvolutionLayerOptimizedInternal(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(memory_manager), _dwc_optimized_func(memory_manager), _output_stage_kernel(), _border_handler(), _permute_input(), _permute_weights(), _permute_output(), _activationlayer_function(),
- _accumulator(), _permuted_input(), _permuted_weights(), _permuted_output(), _original_weights(nullptr), _has_bias(false), _is_quantized(false), _is_nchw(true), _permute(false),
- _is_activationlayer_enabled(false), _is_prepared(false)
+ : _memory_group(memory_manager), _dwc_optimized_func(memory_manager), _permute_input(), _permute_weights(), _permute_output(), _activationlayer_function(), _accumulator(), _permuted_input(),
+ _permuted_weights(), _permuted_output(), _original_weights(nullptr), _has_bias(false), _is_quantized(false), _is_nchw(true), _permute(false), _is_activationlayer_enabled(false), _is_prepared(false)
{
}
@@ -243,7 +246,8 @@ void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::configure(
}
_original_weights = weights_to_use;
- _depthwise_conv_kernel.configure(input_to_use, weights_to_use, biases, output_to_use, conv_info, depth_multiplier, dilation);
+ _depthwise_conv_kernel = arm_compute::support::cpp14::make_unique<NEDepthwiseConvolutionLayerNativeKernel>();
+ _depthwise_conv_kernel->configure(input_to_use, weights_to_use, biases, output_to_use, conv_info, depth_multiplier, dilation);
if(_is_nchw)
{
@@ -309,7 +313,7 @@ void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::run()
_permute_input.run();
}
- NEScheduler::get().schedule(&_depthwise_conv_kernel, Window::DimY);
+ NEScheduler::get().schedule(_depthwise_conv_kernel.get(), Window::DimY);
if(_is_nchw)
{
diff --git a/src/runtime/NEON/functions/NEDequantizationLayer.cpp b/src/runtime/NEON/functions/NEDequantizationLayer.cpp
index a4a3a43b2e..0c0f86c82b 100644
--- a/src/runtime/NEON/functions/NEDequantizationLayer.cpp
+++ b/src/runtime/NEON/functions/NEDequantizationLayer.cpp
@@ -24,7 +24,7 @@
#include "arm_compute/runtime/NEON/functions/NEDequantizationLayer.h"
-#include "arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h"
+#include "src/core/NEON/kernels/NEDequantizationLayerKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEDerivative.cpp b/src/runtime/NEON/functions/NEDerivative.cpp
index 24991400b8..f007e9fda3 100644
--- a/src/runtime/NEON/functions/NEDerivative.cpp
+++ b/src/runtime/NEON/functions/NEDerivative.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,12 +24,16 @@
#include "arm_compute/runtime/NEON/functions/NEDerivative.h"
#include "arm_compute/core/Error.h"
-#include "arm_compute/core/NEON/kernels/NEDerivativeKernel.h"
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEDerivativeKernel.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "support/MemorySupport.h"
-using namespace arm_compute;
+namespace arm_compute
+{
+NEDerivative::~NEDerivative() = default;
NEDerivative::NEDerivative()
: _kernel(), _border_handler()
@@ -41,12 +45,16 @@ void NEDerivative::configure(ITensor *input, ITensor *output_x, ITensor *output_
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
ARM_COMPUTE_ERROR_ON((output_x == nullptr) && (output_y == nullptr));
- _kernel.configure(input, output_x, output_y, border_mode == BorderMode::UNDEFINED);
- _border_handler.configure(input, BorderSize(1), border_mode, PixelValue(constant_border_value));
+ _kernel = arm_compute::support::cpp14::make_unique<NEDerivativeKernel>();
+ _border_handler = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+
+ _kernel->configure(input, output_x, output_y, border_mode == BorderMode::UNDEFINED);
+ _border_handler->configure(input, BorderSize(1), border_mode, PixelValue(constant_border_value));
}
void NEDerivative::run()
{
- NEScheduler::get().schedule(&_border_handler, Window::DimZ);
- NEScheduler::get().schedule(&_kernel, Window::DimY);
+ NEScheduler::get().schedule(_border_handler.get(), Window::DimZ);
+ NEScheduler::get().schedule(_kernel.get(), Window::DimY);
}
+} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEDilate.cpp b/src/runtime/NEON/functions/NEDilate.cpp
index 7f503865b4..70c0b61639 100644
--- a/src/runtime/NEON/functions/NEDilate.cpp
+++ b/src/runtime/NEON/functions/NEDilate.cpp
@@ -23,8 +23,9 @@
*/
#include "arm_compute/runtime/NEON/functions/NEDilate.h"
-#include "arm_compute/core/NEON/kernels/NEDilateKernel.h"
#include "arm_compute/core/PixelValue.h"
+#include "src/core/NEON/kernels/NEDilateKernel.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
#include "support/MemorySupport.h"
#include <utility>
@@ -36,5 +37,8 @@ void NEDilate::configure(ITensor *input, ITensor *output, BorderMode border_mode
auto k = arm_compute::support::cpp14::make_unique<NEDilateKernel>();
k->configure(input, output, border_mode == BorderMode::UNDEFINED);
_kernel = std::move(k);
- _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+
+ auto b = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+ b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler = std::move(b);
}
diff --git a/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp
index fe545905d5..98d6386ffe 100644
--- a/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp
@@ -27,9 +27,15 @@
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEDirectConvolutionLayerKernel.h"
+#include "src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
+NEDirectConvolutionLayer::~NEDirectConvolutionLayer() = default;
+
NEDirectConvolutionLayer::NEDirectConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)), _output_stage_kernel(), _conv_kernel(), _input_border_handler(), _activationlayer_function(), _accumulator(), _has_bias(false),
_is_activationlayer_enabled(false), _dim_split(Window::DimZ), _is_padding_required()
@@ -39,6 +45,9 @@ NEDirectConvolutionLayer::NEDirectConvolutionLayer(std::shared_ptr<IMemoryManage
void NEDirectConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info)
{
ARM_COMPUTE_ERROR_ON(input->info()->data_layout() == DataLayout::UNKNOWN);
+ _output_stage_kernel = arm_compute::support::cpp14::make_unique<NEDirectConvolutionLayerOutputStageKernel>();
+ _conv_kernel = arm_compute::support::cpp14::make_unique<NEDirectConvolutionLayerKernel>();
+ _input_border_handler = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
// Free accumulator
if(_accumulator.buffer() != nullptr)
@@ -51,17 +60,17 @@ void NEDirectConvolutionLayer::configure(ITensor *input, const ITensor *weights,
// Check if bias should be added in the convolution result
_has_bias = (bias != nullptr);
- _conv_kernel.configure(input, weights, output, conv_info);
+ _conv_kernel->configure(input, weights, output, conv_info);
if(_has_bias)
{
- _output_stage_kernel.configure(output, bias);
+ _output_stage_kernel->configure(output, bias);
}
- _is_padding_required = !_conv_kernel.border_size().empty();
+ _is_padding_required = !_conv_kernel->border_size().empty();
if(_is_padding_required)
{
// Add zero padding XY
- _input_border_handler.configure(input, _conv_kernel.border_size(), BorderMode::CONSTANT, PixelValue(static_cast<float>(0.f)));
+ _input_border_handler->configure(input, _conv_kernel->border_size(), BorderMode::CONSTANT, PixelValue(static_cast<float>(0.f)));
}
//Configure Activation Layer
@@ -109,12 +118,12 @@ void NEDirectConvolutionLayer::run()
if(_is_padding_required)
{
- NEScheduler::get().schedule(&_input_border_handler, Window::DimZ);
+ NEScheduler::get().schedule(_input_border_handler.get(), Window::DimZ);
}
- NEScheduler::get().schedule(&_conv_kernel, _dim_split);
+ NEScheduler::get().schedule(_conv_kernel.get(), _dim_split);
if(_has_bias)
{
- NEScheduler::get().schedule(&_output_stage_kernel, Window::DimY);
+ NEScheduler::get().schedule(_output_stage_kernel.get(), Window::DimY);
}
if(_is_activationlayer_enabled)
diff --git a/src/runtime/NEON/functions/NEElementwiseOperators.cpp b/src/runtime/NEON/functions/NEElementwiseOperators.cpp
index d1f60c71e1..7f3fe8b30b 100644
--- a/src/runtime/NEON/functions/NEElementwiseOperators.cpp
+++ b/src/runtime/NEON/functions/NEElementwiseOperators.cpp
@@ -23,7 +23,7 @@
*/
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/NEON/functions/NEElementwiseOperations.h"
-#include <arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h>
+#include <src/core/NEON/kernels/NEElementwiseOperationKernel.h>
#include "arm_compute/core/ITensor.h"
#include "support/MemorySupport.h"
diff --git a/src/runtime/NEON/functions/NEElementwiseUnaryLayer.cpp b/src/runtime/NEON/functions/NEElementwiseUnaryLayer.cpp
index cb4e3a0b7d..5e130205d2 100644
--- a/src/runtime/NEON/functions/NEElementwiseUnaryLayer.cpp
+++ b/src/runtime/NEON/functions/NEElementwiseUnaryLayer.cpp
@@ -23,7 +23,7 @@
*/
#include "arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h"
-#include "arm_compute/core/NEON/kernels/NEElementwiseUnaryKernel.h"
+#include "src/core/NEON/kernels/NEElementwiseUnaryKernel.h"
#include "support/MemorySupport.h"
#include <utility>
diff --git a/src/runtime/NEON/functions/NEEqualizeHistogram.cpp b/src/runtime/NEON/functions/NEEqualizeHistogram.cpp
index b3d5ad484f..d3ff171323 100644
--- a/src/runtime/NEON/functions/NEEqualizeHistogram.cpp
+++ b/src/runtime/NEON/functions/NEEqualizeHistogram.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -28,8 +28,15 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NECumulativeDistributionKernel.h"
+#include "src/core/NEON/kernels/NEHistogramKernel.h"
+#include "src/core/NEON/kernels/NEHistogramKernel.h"
+#include "src/core/NEON/kernels/NETableLookupKernel.h"
+#include "support/MemorySupport.h"
-using namespace arm_compute;
+namespace arm_compute
+{
+NEEqualizeHistogram::~NEEqualizeHistogram() = default;
NEEqualizeHistogram::NEEqualizeHistogram()
: _histogram_kernel(), _cd_histogram_kernel(), _map_histogram_kernel(), _hist(nr_bins, 0, max_range), _cum_dist(nr_bins, 0, max_range), _cd_lut(nr_bins, DataType::U8)
@@ -43,20 +50,25 @@ void NEEqualizeHistogram::configure(const IImage *input, IImage *output)
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
+ _histogram_kernel = arm_compute::support::cpp14::make_unique<NEHistogramKernel>();
+ _cd_histogram_kernel = arm_compute::support::cpp14::make_unique<NECumulativeDistributionKernel>();
+ _map_histogram_kernel = arm_compute::support::cpp14::make_unique<NETableLookupKernel>();
+
// Configure kernels
- _histogram_kernel.configure(input, &_hist);
- _cd_histogram_kernel.configure(input, &_hist, &_cum_dist, &_cd_lut);
- _map_histogram_kernel.configure(input, &_cd_lut, output);
+ _histogram_kernel->configure(input, &_hist);
+ _cd_histogram_kernel->configure(input, &_hist, &_cum_dist, &_cd_lut);
+ _map_histogram_kernel->configure(input, &_cd_lut, output);
}
void NEEqualizeHistogram::run()
{
// Calculate histogram of input.
- NEScheduler::get().schedule(&_histogram_kernel, Window::DimY);
+ NEScheduler::get().schedule(_histogram_kernel.get(), Window::DimY);
// Calculate cumulative distribution of histogram and create LUT.
- NEScheduler::get().schedule(&_cd_histogram_kernel, Window::DimY);
+ NEScheduler::get().schedule(_cd_histogram_kernel.get(), Window::DimY);
// Map input to output using created LUT.
- NEScheduler::get().schedule(&_map_histogram_kernel, Window::DimY);
+ NEScheduler::get().schedule(_map_histogram_kernel.get(), Window::DimY);
}
+} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEErode.cpp b/src/runtime/NEON/functions/NEErode.cpp
index a89993c1fe..748694fe3f 100644
--- a/src/runtime/NEON/functions/NEErode.cpp
+++ b/src/runtime/NEON/functions/NEErode.cpp
@@ -23,18 +23,23 @@
*/
#include "arm_compute/runtime/NEON/functions/NEErode.h"
-#include "arm_compute/core/NEON/kernels/NEErodeKernel.h"
#include "arm_compute/core/PixelValue.h"
+#include "src/core/NEON/kernels/NEErodeKernel.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
#include "support/MemorySupport.h"
#include <utility>
-using namespace arm_compute;
-
+namespace arm_compute
+{
void NEErode::configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value)
{
auto k = arm_compute::support::cpp14::make_unique<NEErodeKernel>();
k->configure(input, output, border_mode == BorderMode::UNDEFINED);
_kernel = std::move(k);
- _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+
+ auto b = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+ b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler = std::move(b);
}
+} // namespace arm_compute \ No newline at end of file
diff --git a/src/runtime/NEON/functions/NEFFT1D.cpp b/src/runtime/NEON/functions/NEFFT1D.cpp
index 2c53b185df..b94c25832a 100644
--- a/src/runtime/NEON/functions/NEFFT1D.cpp
+++ b/src/runtime/NEON/functions/NEFFT1D.cpp
@@ -26,10 +26,16 @@
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEFFTDigitReverseKernel.h"
+#include "src/core/NEON/kernels/NEFFTRadixStageKernel.h"
+#include "src/core/NEON/kernels/NEFFTScaleKernel.h"
#include "src/core/utils/helpers/fft.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
+NEFFT1D::~NEFFT1D() = default;
+
NEFFT1D::NEFFT1D(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)), _digit_reverse_kernel(), _fft_kernels(), _scale_kernel(), _digit_reversed_input(), _digit_reverse_indices(), _num_ffts(0), _axis(0), _run_scale(false)
{
@@ -58,7 +64,8 @@ void NEFFT1D::configure(const ITensor *input, ITensor *output, const FFT1DInfo &
TensorInfo digit_reverse_indices_info(TensorShape(input->info()->tensor_shape()[config.axis]), 1, DataType::U32);
_digit_reverse_indices.allocator()->init(digit_reverse_indices_info);
_memory_group.manage(&_digit_reversed_input);
- _digit_reverse_kernel.configure(input, &_digit_reversed_input, &_digit_reverse_indices, digit_reverse_config);
+ _digit_reverse_kernel = arm_compute::support::cpp14::make_unique<NEFFTDigitReverseKernel>();
+ _digit_reverse_kernel->configure(input, &_digit_reversed_input, &_digit_reverse_indices, digit_reverse_config);
// Create and configure FFT kernels
unsigned int Nx = 1;
@@ -75,7 +82,8 @@ void NEFFT1D::configure(const ITensor *input, ITensor *output, const FFT1DInfo &
fft_kernel_info.radix = radix_for_stage;
fft_kernel_info.Nx = Nx;
fft_kernel_info.is_first_stage = (i == 0);
- _fft_kernels[i].configure(&_digit_reversed_input, ((i == (_num_ffts - 1)) && !is_c2r) ? output : nullptr, fft_kernel_info);
+ _fft_kernels[i] = arm_compute::support::cpp14::make_unique<NEFFTRadixStageKernel>();
+ _fft_kernels[i]->configure(&_digit_reversed_input, ((i == (_num_ffts - 1)) && !is_c2r) ? output : nullptr, fft_kernel_info);
Nx *= radix_for_stage;
}
@@ -86,7 +94,8 @@ void NEFFT1D::configure(const ITensor *input, ITensor *output, const FFT1DInfo &
FFTScaleKernelInfo scale_config;
scale_config.scale = static_cast<float>(N);
scale_config.conjugate = config.direction == FFTDirection::Inverse;
- is_c2r ? _scale_kernel.configure(&_digit_reversed_input, output, scale_config) : _scale_kernel.configure(output, nullptr, scale_config);
+ _scale_kernel = arm_compute::support::cpp14::make_unique<NEFFTScaleKernel>();
+ is_c2r ? _scale_kernel->configure(&_digit_reversed_input, output, scale_config) : _scale_kernel->configure(output, nullptr, scale_config);
}
// Allocate tensors
@@ -128,17 +137,17 @@ void NEFFT1D::run()
{
MemoryGroupResourceScope scope_mg(_memory_group);
- NEScheduler::get().schedule(&_digit_reverse_kernel, (_axis == 0 ? Window::DimY : Window::DimZ));
+ NEScheduler::get().schedule(_digit_reverse_kernel.get(), (_axis == 0 ? Window::DimY : Window::DimZ));
for(unsigned int i = 0; i < _num_ffts; ++i)
{
- NEScheduler::get().schedule(&_fft_kernels[i], (_axis == 0 ? Window::DimY : Window::DimX));
+ NEScheduler::get().schedule(_fft_kernels[i].get(), (_axis == 0 ? Window::DimY : Window::DimX));
}
// Run output scaling
if(_run_scale)
{
- NEScheduler::get().schedule(&_scale_kernel, Window::DimY);
+ NEScheduler::get().schedule(_scale_kernel.get(), Window::DimY);
}
}
} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEFFT2D.cpp b/src/runtime/NEON/functions/NEFFT2D.cpp
index b63afe59c0..3b787cd523 100644
--- a/src/runtime/NEON/functions/NEFFT2D.cpp
+++ b/src/runtime/NEON/functions/NEFFT2D.cpp
@@ -26,9 +26,14 @@
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/Scheduler.h"
+#include "src/core/NEON/kernels/NEFFTDigitReverseKernel.h"
+#include "src/core/NEON/kernels/NEFFTRadixStageKernel.h"
+#include "src/core/NEON/kernels/NEFFTScaleKernel.h"
namespace arm_compute
{
+NEFFT2D::~NEFFT2D() = default;
+
NEFFT2D::NEFFT2D(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(memory_manager), _first_pass_func(memory_manager), _second_pass_func(memory_manager), _first_pass_tensor()
{
diff --git a/src/runtime/NEON/functions/NEFFTConvolutionLayer.cpp b/src/runtime/NEON/functions/NEFFTConvolutionLayer.cpp
index a46fc9f45f..23788b7c39 100644
--- a/src/runtime/NEON/functions/NEFFTConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEFFTConvolutionLayer.cpp
@@ -27,6 +27,12 @@
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/NEON/kernels/NECopyKernel.h"
+#include "src/core/NEON/kernels/NEFFTDigitReverseKernel.h"
+#include "src/core/NEON/kernels/NEFFTRadixStageKernel.h"
+#include "src/core/NEON/kernels/NEFFTScaleKernel.h"
+#include "src/core/NEON/kernels/NEPadLayerKernel.h"
+#include "src/core/NEON/kernels/NEReductionOperationKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/utils/helpers/fft.h"
@@ -96,6 +102,7 @@ NEFFTConvolutionLayer::NEFFTConvolutionLayer(std::shared_ptr<IMemoryManager> mem
_is_prepared(false)
{
}
+NEFFTConvolutionLayer::~NEFFTConvolutionLayer() = default;
void NEFFTConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
const ActivationLayerInfo &act_info)
diff --git a/src/runtime/NEON/functions/NEFastCorners.cpp b/src/runtime/NEON/functions/NEFastCorners.cpp
index 303c593f84..1bde3cc508 100644
--- a/src/runtime/NEON/functions/NEFastCorners.cpp
+++ b/src/runtime/NEON/functions/NEFastCorners.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,15 +25,21 @@
#include "arm_compute/core/Error.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/Array.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
#include "arm_compute/runtime/TensorAllocator.h"
+#include "src/core/NEON/kernels/NEFastCornersKernel.h"
+#include "src/core/NEON/kernels/NEFillArrayKernel.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h"
+#include "support/MemorySupport.h"
-using namespace arm_compute;
+namespace arm_compute
+{
+NEFastCorners::~NEFastCorners() = default;
NEFastCorners::NEFastCorners(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)),
@@ -62,24 +68,28 @@ void NEFastCorners::configure(IImage *input, float threshold, bool nonmax_suppre
_output.allocator()->init(tensor_info);
_memory_group.manage(&_output);
+ _fast_corners_kernel = arm_compute::support::cpp14::make_unique<NEFastCornersKernel>();
+ _border_handler = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+ _fill_kernel = arm_compute::support::cpp14::make_unique<NEFillArrayKernel>();
// If border is UNDEFINED _fast_corners_kernel will operate in xwindow (3,
// width - 3) and ywindow (3, height -3) so the output image will leave the
// pixels on the borders unchanged. This is reflected in the valid region
// of the output. The non maxima suppression is only run on the valid
// pixels.
- _fast_corners_kernel.configure(input, &_output, threshold, nonmax_suppression, BorderMode::UNDEFINED == border_mode);
- _border_handler.configure(input, _fast_corners_kernel.border_size(), border_mode, constant_border_value);
+ _fast_corners_kernel->configure(input, &_output, threshold, nonmax_suppression, BorderMode::UNDEFINED == border_mode);
+ _border_handler->configure(input, _fast_corners_kernel->border_size(), border_mode, constant_border_value);
if(!_non_max)
{
- _fill_kernel.configure(&_output, 1 /* we keep all texels >0 */, corners);
+ _fill_kernel->configure(&_output, 1 /* we keep all texels >0 */, corners);
}
else
{
_suppressed.allocator()->init(tensor_info);
_memory_group.manage(&_suppressed);
- _nonmax_kernel.configure(&_output, &_suppressed, BorderMode::UNDEFINED == border_mode);
- _fill_kernel.configure(&_suppressed, 1 /* we keep all texels >0 */, corners);
+ _nonmax_kernel = arm_compute::support::cpp14::make_unique<NENonMaximaSuppression3x3Kernel>();
+ _nonmax_kernel->configure(&_output, &_suppressed, BorderMode::UNDEFINED == border_mode);
+ _fill_kernel->configure(&_suppressed, 1 /* we keep all texels >0 */, corners);
// Allocate intermediate tensors
_suppressed.allocator()->allocate();
@@ -91,16 +101,17 @@ void NEFastCorners::configure(IImage *input, float threshold, bool nonmax_suppre
void NEFastCorners::run()
{
- NEScheduler::get().schedule(&_border_handler, Window::DimZ);
+ NEScheduler::get().schedule(_border_handler.get(), Window::DimZ);
MemoryGroupResourceScope scope_mg(_memory_group);
- NEScheduler::get().schedule(&_fast_corners_kernel, Window::DimY);
+ NEScheduler::get().schedule(_fast_corners_kernel.get(), Window::DimY);
if(_non_max)
{
- NEScheduler::get().schedule(&_nonmax_kernel, Window::DimY);
+ NEScheduler::get().schedule(_nonmax_kernel.get(), Window::DimY);
}
- NEScheduler::get().schedule(&_fill_kernel, Window::DimY);
+ NEScheduler::get().schedule(_fill_kernel.get(), Window::DimY);
}
+} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEFill.cpp b/src/runtime/NEON/functions/NEFill.cpp
index 79fe175e69..68292c9ee0 100644
--- a/src/runtime/NEON/functions/NEFill.cpp
+++ b/src/runtime/NEON/functions/NEFill.cpp
@@ -25,6 +25,7 @@
#include "arm_compute/core/Window.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEMemsetKernel.h"
#include "support/MemorySupport.h"
#include <utility>
diff --git a/src/runtime/NEON/functions/NEFillBorder.cpp b/src/runtime/NEON/functions/NEFillBorder.cpp
index de2ef26b80..e96069f97c 100644
--- a/src/runtime/NEON/functions/NEFillBorder.cpp
+++ b/src/runtime/NEON/functions/NEFillBorder.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,16 +25,19 @@
#include "arm_compute/core/Window.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
void NEFillBorder::configure(ITensor *input, unsigned int border_width, BorderMode border_mode, const PixelValue &constant_border_value)
{
- _border_handler.configure(input, BorderSize(border_width), border_mode, constant_border_value);
+ _border_handler = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+ _border_handler->configure(input, BorderSize(border_width), border_mode, constant_border_value);
}
void NEFillBorder::run()
{
- NEScheduler::get().schedule(&_border_handler, Window::DimZ);
+ NEScheduler::get().schedule(_border_handler.get(), Window::DimZ);
}
} // namespace arm_compute \ No newline at end of file
diff --git a/src/runtime/NEON/functions/NEFlattenLayer.cpp b/src/runtime/NEON/functions/NEFlattenLayer.cpp
index 936a70dacc..4dfe96325e 100644
--- a/src/runtime/NEON/functions/NEFlattenLayer.cpp
+++ b/src/runtime/NEON/functions/NEFlattenLayer.cpp
@@ -23,8 +23,8 @@
*/
#include "arm_compute/runtime/NEON/functions/NEFlattenLayer.h"
-#include "arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h"
#include "arm_compute/core/Size2D.h"
+#include "src/core/NEON/kernels/NEFlattenLayerKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEFloor.cpp b/src/runtime/NEON/functions/NEFloor.cpp
index 95b2497ded..5f6bd61017 100644
--- a/src/runtime/NEON/functions/NEFloor.cpp
+++ b/src/runtime/NEON/functions/NEFloor.cpp
@@ -23,7 +23,7 @@
*/
#include "arm_compute/runtime/NEON/functions/NEFloor.h"
-#include "arm_compute/core/NEON/kernels/NEFloorKernel.h"
+#include "src/core/NEON/kernels/NEFloorKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp
index d956d16f4d..714fa58a66 100644
--- a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp
+++ b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp
@@ -29,6 +29,19 @@
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h"
+#include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h"
+#include "src/core/NEON/kernels/NEFlattenLayerKernel.h"
+#include "src/core/NEON/kernels/NEFlattenLayerKernel.h"
+#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h"
+#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
+#include "src/core/NEON/kernels/NETransposeKernel.h"
#include "support/MemorySupport.h"
@@ -145,6 +158,8 @@ Status NEFullyConnectedLayerReshapeWeights::validate(const ITensorInfo *input, c
return NETransposeKernel::validate(input, output);
}
+NEFullyConnectedLayer::~NEFullyConnectedLayer() = default;
+
NEFullyConnectedLayer::NEFullyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager, IWeightsManager *weights_manager)
: _memory_group(std::move(memory_manager)), _weights_manager(weights_manager), _flatten_kernel(), _convert_weights(), _convert_weights_managed(), _reshape_weights_function(),
_reshape_weights_managed_function(), _mm_gemm(nullptr, weights_manager), _mm_gemmlowp(nullptr, weights_manager), _flatten_output(), _converted_weights_output(), _reshape_weights_output(),
@@ -199,7 +214,9 @@ void NEFullyConnectedLayer::configure_conv_fc(const ITensor *input, const ITenso
// Configure flatten kernel
_memory_group.manage(&_flatten_output);
- _flatten_kernel.configure(input, &_flatten_output);
+
+ _flatten_kernel = arm_compute::support::cpp14::make_unique<NEFlattenLayerKernel>();
+ _flatten_kernel->configure(input, &_flatten_output);
// Configure matrix multiply kernel
configure_mm(&_flatten_output, weights, biases, output, act);
@@ -398,7 +415,7 @@ void NEFullyConnectedLayer::run()
// Linearize input if it comes from a convolutional layer
if(_is_fc_after_conv)
{
- NEScheduler::get().schedule(&_flatten_kernel, Window::DimY);
+ NEScheduler::get().schedule(_flatten_kernel.get(), Window::DimY);
}
// Run matrix multiply
diff --git a/src/runtime/NEON/functions/NEFuseBatchNormalization.cpp b/src/runtime/NEON/functions/NEFuseBatchNormalization.cpp
index fd26bb49a7..c64fde050e 100644
--- a/src/runtime/NEON/functions/NEFuseBatchNormalization.cpp
+++ b/src/runtime/NEON/functions/NEFuseBatchNormalization.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -28,9 +28,13 @@
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEFuseBatchNormalizationKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
+NEFuseBatchNormalization::~NEFuseBatchNormalization() = default;
+
NEFuseBatchNormalization::NEFuseBatchNormalization()
: _fuse_bn_kernel()
{
@@ -41,7 +45,8 @@ void NEFuseBatchNormalization::configure(const ITensor *input_weights, const ITe
const ITensor *input_bias, const ITensor *bn_beta, const ITensor *bn_gamma,
float epsilon, FuseBatchNormalizationType fbn_type)
{
- _fuse_bn_kernel.configure(input_weights, bn_mean, bn_var, fused_weights, fused_bias, input_bias, bn_beta, bn_gamma, epsilon, fbn_type);
+ _fuse_bn_kernel = arm_compute::support::cpp14::make_unique<NEFuseBatchNormalizationKernel>();
+ _fuse_bn_kernel->configure(input_weights, bn_mean, bn_var, fused_weights, fused_bias, input_bias, bn_beta, bn_gamma, epsilon, fbn_type);
}
Status NEFuseBatchNormalization::validate(const ITensorInfo *input_weights, const ITensorInfo *bn_mean, const ITensorInfo *bn_var,
@@ -54,6 +59,6 @@ Status NEFuseBatchNormalization::validate(const ITensorInfo *input_weights, cons
void NEFuseBatchNormalization::run()
{
- NEScheduler::get().schedule(&_fuse_bn_kernel, Window::DimY);
+ NEScheduler::get().schedule(_fuse_bn_kernel.get(), Window::DimY);
}
} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEGEMM.cpp b/src/runtime/NEON/functions/NEGEMM.cpp
index 4166cff97a..0215098792 100644
--- a/src/runtime/NEON/functions/NEGEMM.cpp
+++ b/src/runtime/NEON/functions/NEGEMM.cpp
@@ -34,7 +34,12 @@
#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h"
#include "arm_compute/runtime/TensorAllocator.h"
#include "src/core/CPP/Validate.h"
+#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
+#include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h"
+#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
+#include "support/MemorySupport.h"
#include <cmath>
@@ -42,6 +47,8 @@ using namespace arm_compute::misc::shape_calculator;
namespace arm_compute
{
+NEGEMM::~NEGEMM() = default;
+
NEGEMM::NEGEMM(std::shared_ptr<IMemoryManager> memory_manager, IWeightsManager *weights_manager)
: _memory_group(memory_manager), _weights_manager(weights_manager), _interleave_kernel(), _transpose_kernel(), _mm_kernel(), _asm_glue(memory_manager, weights_manager), _ma_kernel(),
_alpha_scale_func(nullptr), _add_bias(), _activation_func(), _tmp_a(), _tmp_b(), _tmp_d(), _original_b(nullptr), _run_vector_matrix_multiplication(false), _run_alpha_scale(false),
@@ -88,11 +95,13 @@ void NEGEMM::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITe
_memory_group.manage(&_tmp_d);
}
+ _mm_kernel = arm_compute::support::cpp14::make_unique<NEGEMMMatrixMultiplyKernel>();
+
// Select between GEMV and GEMM
if(_run_vector_matrix_multiplication)
{
// Configure the matrix multiply kernel
- _mm_kernel.configure(a, b, gemm_output_to_use, alpha, false);
+ _mm_kernel->configure(a, b, gemm_output_to_use, alpha, false);
}
else
{
@@ -124,13 +133,15 @@ void NEGEMM::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITe
int k = a->info()->dimension(0);
// Configure interleave kernel
- _interleave_kernel.configure(a, &_tmp_a);
+ _interleave_kernel = arm_compute::support::cpp14::make_unique<NEGEMMInterleave4x4Kernel>();
+ _interleave_kernel->configure(a, &_tmp_a);
// Configure transpose kernel
- _transpose_kernel.configure(b, &_tmp_b);
+ _transpose_kernel = arm_compute::support::cpp14::make_unique<NEGEMMTranspose1xWKernel>();
+ _transpose_kernel->configure(b, &_tmp_b);
// Configure matrix multiplication kernel
- _mm_kernel.configure(&_tmp_a, &_tmp_b, gemm_output_to_use, alpha, true, GEMMReshapeInfo(m, n, k));
+ _mm_kernel->configure(&_tmp_a, &_tmp_b, gemm_output_to_use, alpha, true, GEMMReshapeInfo(m, n, k));
// Allocate once the all configure methods have been called
_tmp_a.allocator()->allocate();
@@ -150,7 +161,8 @@ void NEGEMM::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITe
// Configure matrix addition kernel
if(_run_addition)
{
- _ma_kernel.configure(c, d, beta);
+ _ma_kernel = arm_compute::support::cpp14::make_unique<NEGEMMMatrixAdditionKernel>();
+ _ma_kernel->configure(c, d, beta);
}
// Configure activation
@@ -298,16 +310,16 @@ void NEGEMM::run()
if(!_run_vector_matrix_multiplication)
{
// Run interleave kernel
- NEScheduler::get().schedule(&_interleave_kernel, Window::DimY);
+ NEScheduler::get().schedule(_interleave_kernel.get(), Window::DimY);
if(!_reshape_b_only_on_first_run)
{
// Run transpose kernel
- NEScheduler::get().schedule(&_transpose_kernel, Window::DimY);
+ NEScheduler::get().schedule(_transpose_kernel.get(), Window::DimY);
}
}
- NEScheduler::get().schedule(&_mm_kernel, _run_vector_matrix_multiplication ? Window::DimX : Window::DimY);
+ NEScheduler::get().schedule(_mm_kernel.get(), _run_vector_matrix_multiplication ? Window::DimX : Window::DimY);
// Run bias addition kernel
if(_run_bias_addition)
@@ -319,7 +331,7 @@ void NEGEMM::run()
// Run matrix addition kernel
if(_run_addition)
{
- NEScheduler::get().schedule(&_ma_kernel, Window::DimY);
+ NEScheduler::get().schedule(_ma_kernel.get(), Window::DimY);
}
// Run activation function
@@ -355,7 +367,7 @@ void NEGEMM::prepare()
}
_tmp_b.allocator()->allocate();
- NEScheduler::get().schedule(&_transpose_kernel, Window::DimY);
+ NEScheduler::get().schedule(_transpose_kernel.get(), Window::DimY);
if(!original_b_managed_by_weights_manager)
{
_original_b->mark_as_unused();
diff --git a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
index 834a66a867..3f50f81af2 100644
--- a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
@@ -30,6 +30,21 @@
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NECol2ImKernel.h"
+#include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h"
+#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h"
+#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
+#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
+#include "src/core/NEON/kernels/NEIm2ColKernel.h"
+#include "src/core/NEON/kernels/NEWeightsReshapeKernel.h"
+#include "support/MemorySupport.h"
+
#include <set>
#include <tuple>
@@ -37,6 +52,7 @@ namespace arm_compute
{
using namespace arm_compute::misc::shape_calculator;
+NEConvolutionLayerReshapeWeights::~NEConvolutionLayerReshapeWeights() = default;
NEConvolutionLayerReshapeWeights::NEConvolutionLayerReshapeWeights()
: _weights_reshape_kernel()
{
@@ -52,7 +68,8 @@ void NEConvolutionLayerReshapeWeights::configure(const ITensor *weights, const I
const bool append_biases = (biases != nullptr) && !is_data_type_quantized_asymmetric(weights->info()->data_type());
const ITensor *biases_to_use = (append_biases) ? biases : nullptr;
- _weights_reshape_kernel.configure(weights, biases_to_use, output);
+ _weights_reshape_kernel = arm_compute::support::cpp14::make_unique<NEWeightsReshapeKernel>();
+ _weights_reshape_kernel->configure(weights, biases_to_use, output);
output->info()->set_quantization_info(weights->info()->quantization_info());
}
@@ -86,9 +103,11 @@ Status NEConvolutionLayerReshapeWeights::validate(const ITensorInfo *weights, co
void NEConvolutionLayerReshapeWeights::run()
{
- NEScheduler::get().schedule(&_weights_reshape_kernel, 3);
+ NEScheduler::get().schedule(_weights_reshape_kernel.get(), 3);
}
+NEGEMMConvolutionLayer::~NEGEMMConvolutionLayer() = default;
+
NEGEMMConvolutionLayer::NEGEMMConvolutionLayer(const std::shared_ptr<IMemoryManager> &memory_manager, IWeightsManager *weights_manager)
: _memory_group(memory_manager), _weights_manager(weights_manager), _reshape_weights(), _reshape_weights_managed(), _im2col_kernel(), _mm_gemm(memory_manager), _mm_gemmlowp(memory_manager),
_col2im_kernel(), _reshape_layer(), _original_weights(nullptr), _im2col_output(), _weights_reshaped(), _gemm_output(), _tmp_output(), _data_layout(DataLayout::NCHW), _skip_im2col(false),
@@ -323,7 +342,8 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig
_memory_group.manage(&_im2col_output);
// Configure
- _im2col_kernel.configure(input, &_im2col_output, Size2D(kernel_width, kernel_height), conv_info, false, dilation);
+ _im2col_kernel = arm_compute::support::cpp14::make_unique<NEIm2ColKernel>();
+ _im2col_kernel->configure(input, &_im2col_output, Size2D(kernel_width, kernel_height), conv_info, false, dilation);
// Update GEMM input
gemm_input_to_use = &_im2col_output;
@@ -365,7 +385,8 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig
if(_data_layout == DataLayout::NCHW)
{
// Configure col2im
- _col2im_kernel.configure(gemm_output_to_use, output, Size2D(conv_w, conv_h));
+ _col2im_kernel = arm_compute::support::cpp14::make_unique<NECol2ImKernel>();
+ _col2im_kernel->configure(gemm_output_to_use, output, Size2D(conv_w, conv_h));
}
else
{
@@ -538,7 +559,7 @@ void NEGEMMConvolutionLayer::run()
{
// Run input reshaping
unsigned int y_dim = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::HEIGHT);
- NEScheduler::get().schedule(&_im2col_kernel, y_dim);
+ NEScheduler::get().schedule(_im2col_kernel.get(), y_dim);
}
// Runs NEGEMM or NEGEMMLowpMatrixMultiplyCore functions
@@ -558,7 +579,7 @@ void NEGEMMConvolutionLayer::run()
{
if(_data_layout == DataLayout::NCHW)
{
- NEScheduler::get().schedule(&_col2im_kernel, Window::DimY);
+ NEScheduler::get().schedule(_col2im_kernel.get(), Window::DimY);
}
else
{
diff --git a/src/runtime/NEON/functions/NEGEMMInterleave4x4.cpp b/src/runtime/NEON/functions/NEGEMMInterleave4x4.cpp
index ad306c3662..70fdcf492d 100644
--- a/src/runtime/NEON/functions/NEGEMMInterleave4x4.cpp
+++ b/src/runtime/NEON/functions/NEGEMMInterleave4x4.cpp
@@ -23,7 +23,7 @@
*/
#include "arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
+#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.cpp b/src/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.cpp
index 6d52f2b15c..09637dd2d6 100644
--- a/src/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.cpp
+++ b/src/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.cpp
@@ -26,17 +26,19 @@
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
#include "arm_compute/runtime/TensorAllocator.h"
+#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h"
+#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
#include "support/MemorySupport.h"
-using namespace arm_compute;
+namespace arm_compute
+{
+NEGEMMLowpAssemblyMatrixMultiplyCore::~NEGEMMLowpAssemblyMatrixMultiplyCore() = default;
NEGEMMLowpAssemblyMatrixMultiplyCore::NEGEMMLowpAssemblyMatrixMultiplyCore(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(memory_manager), _asm_glue(memory_manager), _mm_kernel(nullptr), _mtx_a_reshape_kernel(nullptr), _mtx_b_reshape_kernel(nullptr), _tmp_a(), _tmp_b()
@@ -137,3 +139,4 @@ void NEGEMMLowpAssemblyMatrixMultiplyCore::run()
NEScheduler::get().schedule(_mm_kernel.get(), Window::DimY);
}
}
+} // namespace arm_compute \ No newline at end of file
diff --git a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp
index 36357dde41..9050427b34 100644
--- a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp
+++ b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp
@@ -34,12 +34,23 @@
#include "arm_compute/runtime/NEON/NEScheduler.h"
#include "arm_compute/runtime/TensorAllocator.h"
#include "src/core/helpers/AutoConfiguration.h"
+
+#include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h"
+#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
+
#include "support/MemorySupport.h"
namespace arm_compute
{
using namespace arm_compute::misc::shape_calculator;
+NEGEMMLowpMatrixMultiplyCore::~NEGEMMLowpMatrixMultiplyCore() = default;
+
NEGEMMLowpMatrixMultiplyCore::NEGEMMLowpMatrixMultiplyCore(std::shared_ptr<IMemoryManager> memory_manager, IWeightsManager *weights_manager)
: _memory_group(memory_manager), _weights_manager(weights_manager), _asm_glue(memory_manager, weights_manager), _mm_kernel(), _mtx_a_reshape_kernel(), _mtx_b_reshape_kernel(),
_mtx_a_reduction_kernel(), _mtx_b_reduction_kernel(), _offset_contribution_kernel(), _offset_contribution_output_stage_kernel(), _activation_func(), _convert_to_signed_asymm(),
@@ -80,7 +91,8 @@ void NEGEMMLowpMatrixMultiplyCore::configure(const ITensor *a, const ITensor *b,
_signed_a.allocator()->init(a_to_use->info()->clone()->set_data_type(dt).set_quantization_info(QuantizationInfo(iqinfo.scale, iqinfo.offset + offset_correction)));
_memory_group.manage(&_signed_a);
- _convert_to_signed_asymm.configure(a_to_use, &_signed_a);
+ _convert_to_signed_asymm = arm_compute::support::cpp14::make_unique<NEConvertQuantizedSignednessKernel>();
+ _convert_to_signed_asymm->configure(a_to_use, &_signed_a);
a_to_use = &_signed_a;
_a_offset = _signed_a.info()->quantization_info().uniform().offset;
@@ -153,10 +165,12 @@ void NEGEMMLowpMatrixMultiplyCore::configure(const ITensor *a, const ITensor *b,
}
// Configure interleave kernel
- _mtx_a_reshape_kernel.configure(a_to_use, &_tmp_a);
+ _mtx_a_reshape_kernel = arm_compute::support::cpp14::make_unique<NEGEMMInterleave4x4Kernel>();
+ _mtx_a_reshape_kernel->configure(a_to_use, &_tmp_a);
// Configure transpose kernel
- _mtx_b_reshape_kernel.configure(b, &_tmp_b);
+ _mtx_b_reshape_kernel = arm_compute::support::cpp14::make_unique<NEGEMMTranspose1xWKernel>();
+ _mtx_b_reshape_kernel->configure(b, &_tmp_b);
}
if(!_fused_assembly_path)
@@ -176,7 +190,8 @@ void NEGEMMLowpMatrixMultiplyCore::configure(const ITensor *a, const ITensor *b,
}
// Configure Matrix B reduction kernel
- _mtx_b_reduction_kernel.configure(b, &_vector_sum_col, reduction_info);
+ _mtx_b_reduction_kernel = arm_compute::support::cpp14::make_unique<NEGEMMLowpMatrixBReductionKernel>();
+ _mtx_b_reduction_kernel->configure(b, &_vector_sum_col, reduction_info);
}
// Initialize Matrix A reduction kernel only if _b_offset is not equal to 0
@@ -188,7 +203,8 @@ void NEGEMMLowpMatrixMultiplyCore::configure(const ITensor *a, const ITensor *b,
_memory_group.manage(&_vector_sum_row);
// Configure matrix A reduction kernel
- _mtx_a_reduction_kernel.configure(a_to_use, &_vector_sum_row, reduction_info);
+ _mtx_a_reduction_kernel = arm_compute::support::cpp14::make_unique<NEGEMMLowpMatrixAReductionKernel>();
+ _mtx_a_reduction_kernel->configure(a_to_use, &_vector_sum_row, reduction_info);
}
if(_fuse_output_stage)
@@ -196,19 +212,22 @@ void NEGEMMLowpMatrixMultiplyCore::configure(const ITensor *a, const ITensor *b,
// Configure matrix multiply kernel
if(!_assembly_path)
{
- _mm_kernel.configure(matrix_a, matrix_b, &_mm_result_s32);
+ _mm_kernel = arm_compute::support::cpp14::make_unique<NEGEMMLowpMatrixMultiplyKernel>();
+ _mm_kernel->configure(matrix_a, matrix_b, &_mm_result_s32);
}
- _offset_contribution_output_stage_kernel.configure(&_mm_result_s32,
- _a_offset == 0 ? nullptr : &_vector_sum_col,
- _b_offset == 0 ? nullptr : &_vector_sum_row, c,
- _flip_signedness ? &_signed_output : output,
- a->info()->dimension(0),
- _a_offset, _b_offset, info.gemmlowp_output_stage());
+ _offset_contribution_output_stage_kernel = arm_compute::support::cpp14::make_unique<NEGEMMLowpOffsetContributionOutputStageKernel>();
+ _offset_contribution_output_stage_kernel->configure(&_mm_result_s32,
+ _a_offset == 0 ? nullptr : &_vector_sum_col,
+ _b_offset == 0 ? nullptr : &_vector_sum_row, c,
+ _flip_signedness ? &_signed_output : output,
+ a->info()->dimension(0),
+ _a_offset, _b_offset, info.gemmlowp_output_stage());
if(_flip_signedness)
{
- _convert_from_signed_asymm.configure(&_signed_output, output);
+ _convert_from_signed_asymm = arm_compute::support::cpp14::make_unique<NEConvertQuantizedSignednessKernel>();
+ _convert_from_signed_asymm->configure(&_signed_output, output);
}
}
else
@@ -216,10 +235,12 @@ void NEGEMMLowpMatrixMultiplyCore::configure(const ITensor *a, const ITensor *b,
// Configure matrix multiply kernel
if(!_assembly_path)
{
- _mm_kernel.configure(matrix_a, matrix_b, output);
+ _mm_kernel = arm_compute::support::cpp14::make_unique<NEGEMMLowpMatrixMultiplyKernel>();
+ _mm_kernel->configure(matrix_a, matrix_b, output);
}
// Configure offset contribution kernel
- _offset_contribution_kernel.configure(output, _a_offset == 0 ? nullptr : &_vector_sum_col, _b_offset == 0 ? nullptr : &_vector_sum_row, a_to_use->info()->dimension(0), _a_offset, _b_offset);
+ _offset_contribution_kernel = arm_compute::support::cpp14::make_unique<NEGEMMLowpOffsetContributionKernel>();
+ _offset_contribution_kernel->configure(output, _a_offset == 0 ? nullptr : &_vector_sum_col, _b_offset == 0 ? nullptr : &_vector_sum_row, a_to_use->info()->dimension(0), _a_offset, _b_offset);
}
// Configure activation
@@ -468,7 +489,7 @@ void NEGEMMLowpMatrixMultiplyCore::run()
// Convert QASYMM8->QASYMM8_SIGNED
if(_flip_signedness)
{
- NEScheduler::get().schedule(&_convert_to_signed_asymm, Window::DimY);
+ NEScheduler::get().schedule(_convert_to_signed_asymm.get(), Window::DimY);
}
// Run GEMM
@@ -481,15 +502,15 @@ void NEGEMMLowpMatrixMultiplyCore::run()
if(!_run_vector_matrix_multiplication)
{
// Run interleave kernel
- NEScheduler::get().schedule(&_mtx_a_reshape_kernel, Window::DimY);
+ NEScheduler::get().schedule(_mtx_a_reshape_kernel.get(), Window::DimY);
if(!_reshape_b_only_on_first_run)
{
// Run transpose kernel
- NEScheduler::get().schedule(&_mtx_b_reshape_kernel, Window::DimY);
+ NEScheduler::get().schedule(_mtx_b_reshape_kernel.get(), Window::DimY);
}
}
- NEScheduler::get().schedule(&_mm_kernel, Window::DimY);
+ NEScheduler::get().schedule(_mm_kernel.get(), Window::DimY);
}
if(!_fused_assembly_path)
@@ -497,31 +518,31 @@ void NEGEMMLowpMatrixMultiplyCore::run()
// Run matrix A reduction kernel only if _b_offset is not equal to 0
if(_b_offset != 0)
{
- NEScheduler::get().schedule(&_mtx_a_reduction_kernel, Window::DimX);
+ NEScheduler::get().schedule(_mtx_a_reduction_kernel.get(), Window::DimX);
}
// Run matrix B reduction kernel only if _a_offset is not equal to 0
if(_a_offset != 0 && !_reshape_b_only_on_first_run)
{
- NEScheduler::get().schedule(&_mtx_b_reduction_kernel, Window::DimX);
+ NEScheduler::get().schedule(_mtx_b_reduction_kernel.get(), Window::DimX);
}
if(_fuse_output_stage)
{
// Run offset contribution kernel
- NEScheduler::get().schedule(&_offset_contribution_output_stage_kernel, Window::DimY);
+ NEScheduler::get().schedule(_offset_contribution_output_stage_kernel.get(), Window::DimY);
}
else
{
// Run offset contribution kernel
- NEScheduler::get().schedule(&_offset_contribution_kernel, Window::DimY);
+ NEScheduler::get().schedule(_offset_contribution_kernel.get(), Window::DimY);
}
}
// Convert QASYMM8_SIGNED->QASYMM8
- if(_flip_signedness)
+ if(!_fused_assembly_path && _fuse_output_stage && _flip_signedness)
{
- NEScheduler::get().schedule(&_convert_from_signed_asymm, Window::DimY);
+ NEScheduler::get().schedule(_convert_from_signed_asymm.get(), Window::DimY);
}
// Run fused activation unless already run in the fused assembly
@@ -560,7 +581,7 @@ void NEGEMMLowpMatrixMultiplyCore::prepare()
// Run reshape kernel and mark original weights tensor as unused
_tmp_b.allocator()->allocate();
- NEScheduler::get().schedule(&_mtx_b_reshape_kernel, Window::DimY);
+ NEScheduler::get().schedule(_mtx_b_reshape_kernel.get(), Window::DimY);
if(!original_b_managed_by_weights_manager)
{
_original_b->mark_as_unused();
@@ -571,7 +592,7 @@ void NEGEMMLowpMatrixMultiplyCore::prepare()
if(!_fused_assembly_path && _a_offset != 0 && _reshape_b_only_on_first_run)
{
_vector_sum_col.allocator()->allocate();
- NEScheduler::get().schedule(&_mtx_b_reduction_kernel, Window::DimX);
+ NEScheduler::get().schedule(_mtx_b_reduction_kernel.get(), Window::DimX);
}
_is_prepared = true;
diff --git a/src/runtime/NEON/functions/NEGEMMLowpOutputStage.cpp b/src/runtime/NEON/functions/NEGEMMLowpOutputStage.cpp
index 239a8e668a..9fb8851d7a 100644
--- a/src/runtime/NEON/functions/NEGEMMLowpOutputStage.cpp
+++ b/src/runtime/NEON/functions/NEGEMMLowpOutputStage.cpp
@@ -24,15 +24,17 @@
#include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
{
+NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint::~NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint() = default;
+
void NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint::configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift,
int result_offset_after_shift, int min, int max)
{
@@ -46,6 +48,8 @@ Status NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint::validate(const ITens
return NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::validate(input, bias, output, min, max);
}
+NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint::~NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint() = default;
+
void NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint::configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift,
int result_offset_after_shift, int min, int max)
{
@@ -59,6 +63,8 @@ Status NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint::validate(const ITenso
return NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel::validate(input, bias, output, min, max);
}
+NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint::~NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint() = default;
+
void NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint::configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int min, int max)
{
auto k = arm_compute::support::cpp14::make_unique<NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel>();
@@ -71,6 +77,8 @@ Status NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint::validate(const ITens
return NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel::validate(input, bias, output, min, max);
}
+NEGEMMLowpOutputStage::~NEGEMMLowpOutputStage() = default;
+
void NEGEMMLowpOutputStage::configure(const ITensor *input, const ITensor *bias, ITensor *output, const GEMMLowpOutputStageInfo &info)
{
// Perform validate step
diff --git a/src/runtime/NEON/functions/NEGEMMTranspose1xW.cpp b/src/runtime/NEON/functions/NEGEMMTranspose1xW.cpp
index e807e86299..90cf0bab07 100644
--- a/src/runtime/NEON/functions/NEGEMMTranspose1xW.cpp
+++ b/src/runtime/NEON/functions/NEGEMMTranspose1xW.cpp
@@ -25,9 +25,9 @@
#include "arm_compute/core/Error.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEGather.cpp b/src/runtime/NEON/functions/NEGather.cpp
index 5238936015..5c0dae1507 100644
--- a/src/runtime/NEON/functions/NEGather.cpp
+++ b/src/runtime/NEON/functions/NEGather.cpp
@@ -23,7 +23,7 @@
*/
#include "arm_compute/runtime/NEON/functions/NEGather.h"
-#include "arm_compute/core/NEON/kernels/NEGatherKernel.h"
+#include "src/core/NEON/kernels/NEGatherKernel.h"
#include "support/MemorySupport.h"
#include <utility>
diff --git a/src/runtime/NEON/functions/NEGaussian3x3.cpp b/src/runtime/NEON/functions/NEGaussian3x3.cpp
index fba49ede2a..5290de1348 100644
--- a/src/runtime/NEON/functions/NEGaussian3x3.cpp
+++ b/src/runtime/NEON/functions/NEGaussian3x3.cpp
@@ -23,18 +23,23 @@
*/
#include "arm_compute/runtime/NEON/functions/NEGaussian3x3.h"
-#include "arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h"
#include "arm_compute/core/PixelValue.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NEGaussian3x3Kernel.h"
#include "support/MemorySupport.h"
#include <utility>
-using namespace arm_compute;
-
+namespace arm_compute
+{
void NEGaussian3x3::configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value)
{
auto k = arm_compute::support::cpp14::make_unique<NEGaussian3x3Kernel>();
k->configure(input, output, border_mode == BorderMode::UNDEFINED);
_kernel = std::move(k);
- _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+
+ auto b = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+ b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler = std::move(b);
}
+} // namespace arm_compute \ No newline at end of file
diff --git a/src/runtime/NEON/functions/NEGaussian5x5.cpp b/src/runtime/NEON/functions/NEGaussian5x5.cpp
index 99591f4107..7857710462 100644
--- a/src/runtime/NEON/functions/NEGaussian5x5.cpp
+++ b/src/runtime/NEON/functions/NEGaussian5x5.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,13 +24,17 @@
#include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h"
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
#include "arm_compute/runtime/TensorAllocator.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NEGaussian5x5Kernel.h"
+#include "support/MemorySupport.h"
-using namespace arm_compute;
+namespace arm_compute
+{
+NEGaussian5x5::~NEGaussian5x5() = default;
NEGaussian5x5::NEGaussian5x5(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)), _kernel_hor(), _kernel_vert(), _tmp(), _border_handler()
@@ -46,21 +50,26 @@ void NEGaussian5x5::configure(ITensor *input, ITensor *output, BorderMode border
// Manage intermediate buffers
_memory_group.manage(&_tmp);
+ _kernel_hor = arm_compute::support::cpp14::make_unique<NEGaussian5x5HorKernel>();
+ _kernel_vert = arm_compute::support::cpp14::make_unique<NEGaussian5x5VertKernel>();
+ _border_handler = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+
// Create and configure kernels for the two passes
- _kernel_hor.configure(input, &_tmp, border_mode == BorderMode::UNDEFINED);
- _kernel_vert.configure(&_tmp, output, border_mode == BorderMode::UNDEFINED);
+ _kernel_hor->configure(input, &_tmp, border_mode == BorderMode::UNDEFINED);
+ _kernel_vert->configure(&_tmp, output, border_mode == BorderMode::UNDEFINED);
_tmp.allocator()->allocate();
- _border_handler.configure(input, _kernel_hor.border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler->configure(input, _kernel_hor->border_size(), border_mode, PixelValue(constant_border_value));
}
void NEGaussian5x5::run()
{
- NEScheduler::get().schedule(&_border_handler, Window::DimZ);
+ NEScheduler::get().schedule(_border_handler.get(), Window::DimZ);
MemoryGroupResourceScope scope_mg(_memory_group);
- NEScheduler::get().schedule(&_kernel_hor, Window::DimY);
- NEScheduler::get().schedule(&_kernel_vert, Window::DimY);
+ NEScheduler::get().schedule(_kernel_hor.get(), Window::DimY);
+ NEScheduler::get().schedule(_kernel_vert.get(), Window::DimY);
}
+} // namespace arm_compute \ No newline at end of file
diff --git a/src/runtime/NEON/functions/NEGaussianPyramid.cpp b/src/runtime/NEON/functions/NEGaussianPyramid.cpp
index e4e20e041b..30fe70f0ab 100644
--- a/src/runtime/NEON/functions/NEGaussianPyramid.cpp
+++ b/src/runtime/NEON/functions/NEGaussianPyramid.cpp
@@ -25,16 +25,18 @@
#include "arm_compute/core/Error.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h"
-#include "arm_compute/core/NEON/kernels/NEScaleKernel.h"
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
-#include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h"
#include "arm_compute/runtime/Pyramid.h"
#include "arm_compute/runtime/Tensor.h"
#include "arm_compute/runtime/TensorAllocator.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NEGaussian5x5Kernel.h"
+#include "src/core/NEON/kernels/NEGaussianPyramidKernel.h"
+#include "src/core/NEON/kernels/NEScaleKernel.h"
+#include "support/MemorySupport.h"
#include <cstddef>
@@ -45,6 +47,8 @@ NEGaussianPyramid::NEGaussianPyramid()
{
}
+NEGaussianPyramidHalf::~NEGaussianPyramidHalf() = default;
+
NEGaussianPyramidHalf::NEGaussianPyramidHalf() // NOLINT
: _horizontal_border_handler(),
_vertical_border_handler(),
@@ -94,16 +98,20 @@ void NEGaussianPyramidHalf::configure(const ITensor *input, IPyramid *pyramid, B
for(size_t i = 0; i < num_stages; ++i)
{
/* Configure horizontal kernel */
- _horizontal_reduction[i].configure(_pyramid->get_pyramid_level(i), _tmp.get_pyramid_level(i));
+ _horizontal_reduction[i] = arm_compute::support::cpp14::make_unique<NEGaussianPyramidHorKernel>();
+ _horizontal_reduction[i]->configure(_pyramid->get_pyramid_level(i), _tmp.get_pyramid_level(i));
/* Configure vertical kernel */
- _vertical_reduction[i].configure(_tmp.get_pyramid_level(i), _pyramid->get_pyramid_level(i + 1));
+ _vertical_reduction[i] = arm_compute::support::cpp14::make_unique<NEGaussianPyramidVertKernel>();
+ _vertical_reduction[i]->configure(_tmp.get_pyramid_level(i), _pyramid->get_pyramid_level(i + 1));
/* Configure border */
- _horizontal_border_handler[i].configure(_pyramid->get_pyramid_level(i), _horizontal_reduction[i].border_size(), border_mode, PixelValue(constant_border_value));
+ _horizontal_border_handler[i] = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+ _horizontal_border_handler[i]->configure(_pyramid->get_pyramid_level(i), _horizontal_reduction[i]->border_size(), border_mode, PixelValue(constant_border_value));
/* Configure border */
- _vertical_border_handler[i].configure(_tmp.get_pyramid_level(i), _vertical_reduction[i].border_size(), border_mode, PixelValue(pixel_value_u16));
+ _vertical_border_handler[i] = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+ _vertical_border_handler[i]->configure(_tmp.get_pyramid_level(i), _vertical_reduction[i]->border_size(), border_mode, PixelValue(pixel_value_u16));
}
_tmp.allocate();
@@ -122,13 +130,15 @@ void NEGaussianPyramidHalf::run()
for(unsigned int i = 0; i < num_levels - 1; ++i)
{
- NEScheduler::get().schedule(&_horizontal_border_handler[i], Window::DimZ);
- NEScheduler::get().schedule(&_horizontal_reduction[i], Window::DimY);
- NEScheduler::get().schedule(&_vertical_border_handler[i], Window::DimZ);
- NEScheduler::get().schedule(&_vertical_reduction[i], Window::DimY);
+ NEScheduler::get().schedule(_horizontal_border_handler[i].get(), Window::DimZ);
+ NEScheduler::get().schedule(_horizontal_reduction[i].get(), Window::DimY);
+ NEScheduler::get().schedule(_vertical_border_handler[i].get(), Window::DimZ);
+ NEScheduler::get().schedule(_vertical_reduction[i].get(), Window::DimY);
}
}
+NEGaussianPyramidOrb::~NEGaussianPyramidOrb() = default;
+
NEGaussianPyramidOrb::NEGaussianPyramidOrb() // NOLINT
: _gaus5x5(),
_scale_nearest()
diff --git a/src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp b/src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp
index 13210a06cd..d9a498e4bd 100644
--- a/src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp
+++ b/src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp
@@ -25,19 +25,22 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NECopyKernel.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NEPadLayerKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
namespace arm_compute
{
NEGenerateProposalsLayer::NEGenerateProposalsLayer(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(memory_manager),
- _permute_deltas_kernel(),
+ _permute_deltas(),
_flatten_deltas(),
- _permute_scores_kernel(),
+ _permute_scores(),
_flatten_scores(),
- _compute_anchors_kernel(),
- _bounding_box_kernel(),
- _pad_kernel(),
+ _compute_anchors(),
+ _bounding_box(),
+ _pad(),
_dequantize_anchors(),
_dequantize_deltas(),
_quantize_all_proposals(),
@@ -62,6 +65,8 @@ NEGenerateProposalsLayer::NEGenerateProposalsLayer(std::shared_ptr<IMemoryManage
{
}
+NEGenerateProposalsLayer::~NEGenerateProposalsLayer() = default;
+
void NEGenerateProposalsLayer::configure(const ITensor *scores, const ITensor *deltas, const ITensor *anchors, ITensor *proposals, ITensor *scores_out, ITensor *num_valid_proposals,
const GenerateProposalsInfo &info)
{
@@ -85,7 +90,7 @@ void NEGenerateProposalsLayer::configure(const ITensor *scores, const ITensor *d
// Compute all the anchors
_memory_group.manage(&_all_anchors);
- _compute_anchors_kernel.configure(anchors, &_all_anchors, ComputeAnchorsInfo(feat_width, feat_height, info.spatial_scale()));
+ _compute_anchors.configure(anchors, &_all_anchors, ComputeAnchorsInfo(feat_width, feat_height, info.spatial_scale()));
const TensorShape flatten_shape_deltas(values_per_roi, total_num_anchors);
_deltas_flattened.allocator()->init(TensorInfo(flatten_shape_deltas, 1, scores_data_type, deltas->info()->quantization_info()));
@@ -95,7 +100,7 @@ void NEGenerateProposalsLayer::configure(const ITensor *scores, const ITensor *d
if(!_is_nhwc)
{
_memory_group.manage(&_deltas_permuted);
- _permute_deltas_kernel.configure(deltas, &_deltas_permuted, PermutationVector{ 2, 0, 1 });
+ _permute_deltas.configure(deltas, &_deltas_permuted, PermutationVector{ 2, 0, 1 });
_flatten_deltas.configure(&_deltas_permuted, &_deltas_flattened);
_deltas_permuted.allocator()->allocate();
}
@@ -112,7 +117,7 @@ void NEGenerateProposalsLayer::configure(const ITensor *scores, const ITensor *d
if(!_is_nhwc)
{
_memory_group.manage(&_scores_permuted);
- _permute_scores_kernel.configure(scores, &_scores_permuted, PermutationVector{ 2, 0, 1 });
+ _permute_scores.configure(scores, &_scores_permuted, PermutationVector{ 2, 0, 1 });
_flatten_scores.configure(&_scores_permuted, &_scores_flattened);
_scores_permuted.allocator()->allocate();
}
@@ -141,7 +146,7 @@ void NEGenerateProposalsLayer::configure(const ITensor *scores, const ITensor *d
// Bounding box transform
_memory_group.manage(&_all_proposals);
BoundingBoxTransformInfo bbox_info(info.im_width(), info.im_height(), 1.f);
- _bounding_box_kernel.configure(anchors_to_use, &_all_proposals, deltas_to_use, bbox_info);
+ _bounding_box.configure(anchors_to_use, &_all_proposals, deltas_to_use, bbox_info);
deltas_to_use->allocator()->allocate();
anchors_to_use->allocator()->allocate();
@@ -197,7 +202,7 @@ void NEGenerateProposalsLayer::configure(const ITensor *scores, const ITensor *d
_scores_flattened.allocator()->allocate();
// Add the first column that represents the batch id. This will be all zeros, as we don't support multiple images
- _pad_kernel.configure(&_proposals_4_roi_values, proposals, PaddingList{ { 1, 0 } });
+ _pad.configure(&_proposals_4_roi_values, proposals, PaddingList{ { 1, 0 } });
_proposals_4_roi_values.allocator()->allocate();
}
@@ -229,7 +234,7 @@ Status NEGenerateProposalsLayer::validate(const ITensorInfo *scores, const ITens
}
TensorInfo all_anchors_info(anchors->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true));
- ARM_COMPUTE_RETURN_ON_ERROR(NEComputeAllAnchorsKernel::validate(anchors, &all_anchors_info, ComputeAnchorsInfo(feat_width, feat_height, info.spatial_scale())));
+ ARM_COMPUTE_RETURN_ON_ERROR(NEComputeAllAnchors::validate(anchors, &all_anchors_info, ComputeAnchorsInfo(feat_width, feat_height, info.spatial_scale())));
TensorInfo deltas_permuted_info = deltas->clone()->set_tensor_shape(TensorShape(values_per_roi * num_anchors, feat_width, feat_height)).set_is_resizable(true);
TensorInfo scores_permuted_info = scores->clone()->set_tensor_shape(TensorShape(num_anchors, feat_width, feat_height)).set_is_resizable(true);
@@ -240,8 +245,8 @@ Status NEGenerateProposalsLayer::validate(const ITensorInfo *scores, const ITens
}
else
{
- ARM_COMPUTE_RETURN_ON_ERROR(NEPermuteKernel::validate(deltas, &deltas_permuted_info, PermutationVector{ 2, 0, 1 }));
- ARM_COMPUTE_RETURN_ON_ERROR(NEPermuteKernel::validate(scores, &scores_permuted_info, PermutationVector{ 2, 0, 1 }));
+ ARM_COMPUTE_RETURN_ON_ERROR(NEPermute::validate(deltas, &deltas_permuted_info, PermutationVector{ 2, 0, 1 }));
+ ARM_COMPUTE_RETURN_ON_ERROR(NEPermute::validate(scores, &scores_permuted_info, PermutationVector{ 2, 0, 1 }));
}
TensorInfo deltas_flattened_info(deltas->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true));
@@ -258,25 +263,25 @@ Status NEGenerateProposalsLayer::validate(const ITensorInfo *scores, const ITens
if(is_qasymm8)
{
TensorInfo all_anchors_f32_info(anchors->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true).set_data_type(DataType::F32));
- ARM_COMPUTE_RETURN_ON_ERROR(NEDequantizationLayerKernel::validate(&all_anchors_info, &all_anchors_f32_info));
+ ARM_COMPUTE_RETURN_ON_ERROR(NEDequantizationLayer::validate(&all_anchors_info, &all_anchors_f32_info));
TensorInfo deltas_flattened_f32_info(deltas->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true).set_data_type(DataType::F32));
- ARM_COMPUTE_RETURN_ON_ERROR(NEDequantizationLayerKernel::validate(&deltas_flattened_info, &deltas_flattened_f32_info));
+ ARM_COMPUTE_RETURN_ON_ERROR(NEDequantizationLayer::validate(&deltas_flattened_info, &deltas_flattened_f32_info));
TensorInfo proposals_4_roi_values_f32(deltas->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true).set_data_type(DataType::F32));
- ARM_COMPUTE_RETURN_ON_ERROR(NEBoundingBoxTransformKernel::validate(&all_anchors_f32_info, &proposals_4_roi_values_f32, &deltas_flattened_f32_info,
- BoundingBoxTransformInfo(info.im_width(), info.im_height(), 1.f)));
+ ARM_COMPUTE_RETURN_ON_ERROR(NEBoundingBoxTransform::validate(&all_anchors_f32_info, &proposals_4_roi_values_f32, &deltas_flattened_f32_info,
+ BoundingBoxTransformInfo(info.im_width(), info.im_height(), 1.f)));
- ARM_COMPUTE_RETURN_ON_ERROR(NEQuantizationLayerKernel::validate(&proposals_4_roi_values_f32, &proposals_4_roi_values_quantized));
+ ARM_COMPUTE_RETURN_ON_ERROR(NEQuantizationLayer::validate(&proposals_4_roi_values_f32, &proposals_4_roi_values_quantized));
proposals_4_roi_values_to_use = &proposals_4_roi_values_quantized;
}
else
{
- ARM_COMPUTE_RETURN_ON_ERROR(NEBoundingBoxTransformKernel::validate(&all_anchors_info, &proposals_4_roi_values, &deltas_flattened_info,
- BoundingBoxTransformInfo(info.im_width(), info.im_height(), 1.f)));
+ ARM_COMPUTE_RETURN_ON_ERROR(NEBoundingBoxTransform::validate(&all_anchors_info, &proposals_4_roi_values, &deltas_flattened_info,
+ BoundingBoxTransformInfo(info.im_width(), info.im_height(), 1.f)));
}
- ARM_COMPUTE_RETURN_ON_ERROR(NEPadLayerKernel::validate(proposals_4_roi_values_to_use, proposals, PaddingList{ { 1, 0 } }));
+ ARM_COMPUTE_RETURN_ON_ERROR(NEPadLayer::validate(proposals_4_roi_values_to_use, proposals, PaddingList{ { 1, 0 } }));
if(num_valid_proposals->total_size() > 0)
{
@@ -319,13 +324,13 @@ void NEGenerateProposalsLayer::run()
MemoryGroupResourceScope scope_mg(_memory_group);
// Compute all the anchors
- NEScheduler::get().schedule(&_compute_anchors_kernel, Window::DimY);
+ _compute_anchors.run();
// Transpose and reshape the inputs
if(!_is_nhwc)
{
- NEScheduler::get().schedule(&_permute_deltas_kernel, Window::DimY);
- NEScheduler::get().schedule(&_permute_scores_kernel, Window::DimY);
+ _permute_deltas.run();
+ _permute_scores.run();
}
_flatten_deltas.run();
@@ -333,22 +338,22 @@ void NEGenerateProposalsLayer::run()
if(_is_qasymm8)
{
- NEScheduler::get().schedule(&_dequantize_anchors, Window::DimY);
- NEScheduler::get().schedule(&_dequantize_deltas, Window::DimY);
+ _dequantize_anchors.run();
+ _dequantize_deltas.run();
}
// Build the boxes
- NEScheduler::get().schedule(&_bounding_box_kernel, Window::DimY);
+ _bounding_box.run();
if(_is_qasymm8)
{
- NEScheduler::get().schedule(&_quantize_all_proposals, Window::DimY);
+ _quantize_all_proposals.run();
}
// Non maxima suppression
_cpp_nms.run();
// Add dummy batch indexes
- NEScheduler::get().schedule(&_pad_kernel, Window::DimY);
+ _pad.run();
}
} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEHOGDescriptor.cpp b/src/runtime/NEON/functions/NEHOGDescriptor.cpp
index 10765f9b86..689e64fae7 100644
--- a/src/runtime/NEON/functions/NEHOGDescriptor.cpp
+++ b/src/runtime/NEON/functions/NEHOGDescriptor.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -28,8 +28,14 @@
#include "arm_compute/core/Size2D.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEDerivativeKernel.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NEHOGDescriptorKernel.h"
+#include "support/MemorySupport.h"
-using namespace arm_compute;
+namespace arm_compute
+{
+NEHOGDescriptor::~NEHOGDescriptor() = default;
NEHOGDescriptor::NEHOGDescriptor(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)), _gradient(), _orient_bin(), _block_norm(), _mag(), _phase(), _hog_space()
@@ -82,10 +88,12 @@ void NEHOGDescriptor::configure(ITensor *input, ITensor *output, const IHOG *hog
_memory_group.manage(&_hog_space);
// Initialise orientation binning kernel
- _orient_bin.configure(&_mag, &_phase, &_hog_space, hog->info());
+ _orient_bin = arm_compute::support::cpp14::make_unique<NEHOGOrientationBinningKernel>();
+ _orient_bin->configure(&_mag, &_phase, &_hog_space, hog->info());
// Initialize HOG norm kernel
- _block_norm.configure(&_hog_space, output, hog->info());
+ _block_norm = arm_compute::support::cpp14::make_unique<NEHOGBlockNormalizationKernel>();
+ _block_norm->configure(&_hog_space, output, hog->info());
// Allocate intermediate tensors
_mag.allocator()->allocate();
@@ -101,8 +109,9 @@ void NEHOGDescriptor::run()
_gradient.run();
// Run orientation binning kernel
- NEScheduler::get().schedule(&_orient_bin, Window::DimY);
+ NEScheduler::get().schedule(_orient_bin.get(), Window::DimY);
// Run block normalization kernel
- NEScheduler::get().schedule(&_block_norm, Window::DimY);
+ NEScheduler::get().schedule(_block_norm.get(), Window::DimY);
}
+} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEHOGDetector.cpp b/src/runtime/NEON/functions/NEHOGDetector.cpp
index 21db5f83b7..8468b75f4e 100644
--- a/src/runtime/NEON/functions/NEHOGDetector.cpp
+++ b/src/runtime/NEON/functions/NEHOGDetector.cpp
@@ -23,10 +23,12 @@
*/
#include "arm_compute/runtime/NEON/functions/NEHOGDetector.h"
-#include "arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h"
+#include "src/core/NEON/kernels/NEHOGDetectorKernel.h"
#include "support/MemorySupport.h"
-using namespace arm_compute;
+namespace arm_compute
+{
+NEHOGDetector::~NEHOGDetector() = default;
void NEHOGDetector::configure(const ITensor *input, const IHOG *hog, IDetectionWindowArray *detection_windows, const Size2D &detection_window_stride, float threshold, size_t idx_class)
{
@@ -34,3 +36,4 @@ void NEHOGDetector::configure(const ITensor *input, const IHOG *hog, IDetectionW
k->configure(input, hog, detection_windows, detection_window_stride, threshold, idx_class);
_kernel = std::move(k);
}
+} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEHOGGradient.cpp b/src/runtime/NEON/functions/NEHOGGradient.cpp
index 8f3559a7ed..7d794bc1a0 100644
--- a/src/runtime/NEON/functions/NEHOGGradient.cpp
+++ b/src/runtime/NEON/functions/NEHOGGradient.cpp
@@ -23,12 +23,16 @@
*/
#include "arm_compute/runtime/NEON/functions/NEHOGGradient.h"
-#include "arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEDerivativeKernel.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NEMagnitudePhaseKernel.h"
#include "support/MemorySupport.h"
-using namespace arm_compute;
+namespace arm_compute
+{
+NEHOGGradient::~NEHOGGradient() = default;
NEHOGGradient::NEHOGGradient(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
: _memory_group(std::move(memory_manager)),
@@ -88,3 +92,4 @@ void NEHOGGradient::run()
// Run magnitude/phase kernel
NEScheduler::get().schedule(_mag_phase.get(), Window::DimY);
}
+} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEHOGMultiDetection.cpp b/src/runtime/NEON/functions/NEHOGMultiDetection.cpp
index e08b699e1c..3e41faad43 100644
--- a/src/runtime/NEON/functions/NEHOGMultiDetection.cpp
+++ b/src/runtime/NEON/functions/NEHOGMultiDetection.cpp
@@ -28,8 +28,13 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
#include "arm_compute/runtime/Tensor.h"
+#include "src/core/NEON/kernels/NEDerivativeKernel.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NEHOGDescriptorKernel.h"
-using namespace arm_compute;
+namespace arm_compute
+{
+NEHOGMultiDetection::~NEHOGMultiDetection() = default;
NEHOGMultiDetection::NEHOGMultiDetection(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
: _memory_group(std::move(memory_manager)),
@@ -262,3 +267,4 @@ void NEHOGMultiDetection::run()
NEScheduler::get().schedule(&_non_maxima_kernel, Window::DimY);
}
}
+} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEHarrisCorners.cpp b/src/runtime/NEON/functions/NEHarrisCorners.cpp
index 3c51eb2249..23fcf8c805 100644
--- a/src/runtime/NEON/functions/NEHarrisCorners.cpp
+++ b/src/runtime/NEON/functions/NEHarrisCorners.cpp
@@ -24,8 +24,6 @@
#include "arm_compute/runtime/NEON/functions/NEHarrisCorners.h"
#include "arm_compute/core/Error.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/Array.h"
@@ -34,12 +32,19 @@
#include "arm_compute/runtime/NEON/functions/NESobel5x5.h"
#include "arm_compute/runtime/NEON/functions/NESobel7x7.h"
#include "arm_compute/runtime/TensorAllocator.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NEHarrisCornersKernel.h"
+#include "src/core/NEON/kernels/NESobel5x5Kernel.h"
+#include "src/core/NEON/kernels/NESobel7x7Kernel.h"
#include "support/MemorySupport.h"
#include <cmath>
#include <utility>
-using namespace arm_compute;
+namespace arm_compute
+{
+NEHarrisCorners::~NEHarrisCorners() = default;
NEHarrisCorners::NEHarrisCorners(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
: _memory_group(std::move(memory_manager)),
@@ -154,8 +159,10 @@ void NEHarrisCorners::configure(IImage *input, float threshold, float min_dist,
}
// Configure border filling before harris score
- _border_gx.configure(&_gx, _harris_score->border_size(), border_mode, constant_border_value);
- _border_gy.configure(&_gy, _harris_score->border_size(), border_mode, constant_border_value);
+ _border_gx = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+ _border_gy = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+ _border_gx->configure(&_gx, _harris_score->border_size(), border_mode, constant_border_value);
+ _border_gy->configure(&_gy, _harris_score->border_size(), border_mode, constant_border_value);
// Allocate once all the configure methods have been called
_gx.allocator()->allocate();
@@ -193,8 +200,8 @@ void NEHarrisCorners::run()
_sobel->run();
// Fill border before harris score kernel
- NEScheduler::get().schedule(&_border_gx, Window::DimZ);
- NEScheduler::get().schedule(&_border_gy, Window::DimZ);
+ NEScheduler::get().schedule(_border_gx.get(), Window::DimZ);
+ NEScheduler::get().schedule(_border_gy.get(), Window::DimZ);
// Run harris score kernel
NEScheduler::get().schedule(_harris_score.get(), Window::DimY);
@@ -208,3 +215,4 @@ void NEHarrisCorners::run()
// Run sort & euclidean distance
NEScheduler::get().schedule(&_sort_euclidean, Window::DimY);
}
+} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEHistogram.cpp b/src/runtime/NEON/functions/NEHistogram.cpp
index 39fad977af..40ea3a16c6 100644
--- a/src/runtime/NEON/functions/NEHistogram.cpp
+++ b/src/runtime/NEON/functions/NEHistogram.cpp
@@ -29,8 +29,12 @@
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEHistogramKernel.h"
+#include "support/MemorySupport.h"
-using namespace arm_compute;
+namespace arm_compute
+{
+NEHistogram::~NEHistogram() = default;
NEHistogram::NEHistogram()
: _histogram_kernel(), _local_hist(), _window_lut(window_lut_default_size), _local_hist_size(0)
@@ -47,11 +51,13 @@ void NEHistogram::configure(const IImage *input, IDistribution1D *output)
_local_hist.resize(_local_hist_size);
// Configure kernel
- _histogram_kernel.configure(input, output, _local_hist.data(), _window_lut.data());
+ _histogram_kernel = arm_compute::support::cpp14::make_unique<NEHistogramKernel>();
+ _histogram_kernel->configure(input, output, _local_hist.data(), _window_lut.data());
}
void NEHistogram::run()
{
// Calculate histogram of input.
- NEScheduler::get().schedule(&_histogram_kernel, Window::DimY);
+ NEScheduler::get().schedule(_histogram_kernel.get(), Window::DimY);
}
+} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEIm2Col.cpp b/src/runtime/NEON/functions/NEIm2Col.cpp
index 99e5d3f1df..bc0c60112e 100644
--- a/src/runtime/NEON/functions/NEIm2Col.cpp
+++ b/src/runtime/NEON/functions/NEIm2Col.cpp
@@ -25,9 +25,13 @@
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEIm2ColKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
+NEIm2Col::~NEIm2Col() = default;
+
NEIm2Col::NEIm2Col()
: _kernel(), _y_dim(1)
{
@@ -37,7 +41,8 @@ void NEIm2Col::configure(const ITensor *input, ITensor *output, const Size2D &ke
{
_y_dim = get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::HEIGHT);
- _kernel.configure(input, output, kernel_dims, conv_info, has_bias, dilation, num_groups);
+ _kernel = arm_compute::support::cpp14::make_unique<NEIm2ColKernel>();
+ _kernel->configure(input, output, kernel_dims, conv_info, has_bias, dilation, num_groups);
}
Status NEIm2Col::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation,
@@ -48,6 +53,6 @@ Status NEIm2Col::validate(const ITensorInfo *input, const ITensorInfo *output, c
void NEIm2Col::run()
{
- NEScheduler::get().schedule(&_kernel, _y_dim);
+ NEScheduler::get().schedule(_kernel.get(), _y_dim);
}
} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEInstanceNormalizationLayer.cpp b/src/runtime/NEON/functions/NEInstanceNormalizationLayer.cpp
index 57d01ff2d6..e3fb284796 100644
--- a/src/runtime/NEON/functions/NEInstanceNormalizationLayer.cpp
+++ b/src/runtime/NEON/functions/NEInstanceNormalizationLayer.cpp
@@ -26,9 +26,13 @@
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/KernelDescriptors.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
+NEInstanceNormalizationLayer::~NEInstanceNormalizationLayer() = default;
+
NEInstanceNormalizationLayer::NEInstanceNormalizationLayer(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)), _normalization_kernel(), _is_nchw(false), _permute_input(), _permute_output(), _permuted_input(), _permuted_output()
{
@@ -42,6 +46,8 @@ void NEInstanceNormalizationLayer::configure(ITensor *input, ITensor *output, fl
// Configure Kernels
_is_nchw = data_layout == DataLayout::NCHW;
+ _normalization_kernel = arm_compute::support::cpp14::make_unique<NEInstanceNormalizationLayerKernel>();
+
if(!_is_nchw)
{
_memory_group.manage(&_permuted_input);
@@ -51,7 +57,7 @@ void NEInstanceNormalizationLayer::configure(ITensor *input, ITensor *output, fl
_permute_input.configure(input, &_permuted_input, PermutationVector(1U, 2U, 0U));
_permuted_input.info()->set_data_layout(DataLayout::NCHW);
- _normalization_kernel.configure(&_permuted_input, &_permuted_output, kernel_descriptor);
+ _normalization_kernel->configure(&_permuted_input, &_permuted_output, kernel_descriptor);
_permuted_output.info()->set_data_layout(DataLayout::NCHW);
_permute_output.configure(&_permuted_output, output != nullptr ? output : input, PermutationVector(2U, 0U, 1U));
@@ -60,7 +66,7 @@ void NEInstanceNormalizationLayer::configure(ITensor *input, ITensor *output, fl
}
else
{
- _normalization_kernel.configure(input, output, kernel_descriptor);
+ _normalization_kernel->configure(input, output, kernel_descriptor);
}
}
@@ -81,7 +87,7 @@ void NEInstanceNormalizationLayer::run()
_permute_input.run();
}
- NEScheduler::get().schedule(&_normalization_kernel, Window::DimZ);
+ NEScheduler::get().schedule(_normalization_kernel.get(), Window::DimZ);
// Permute output
if(!_is_nchw)
diff --git a/src/runtime/NEON/functions/NEIntegralImage.cpp b/src/runtime/NEON/functions/NEIntegralImage.cpp
index 8ab6bbd76d..63bcd53373 100644
--- a/src/runtime/NEON/functions/NEIntegralImage.cpp
+++ b/src/runtime/NEON/functions/NEIntegralImage.cpp
@@ -23,18 +23,25 @@
*/
#include "arm_compute/runtime/NEON/functions/NEIntegralImage.h"
-#include "arm_compute/core/NEON/kernels/NEIntegralImageKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NEIntegralImageKernel.h"
#include "support/MemorySupport.h"
#include <utility>
-using namespace arm_compute;
+namespace arm_compute
+{
+NEIntegralImage::~NEIntegralImage() = default;
void NEIntegralImage::configure(const ITensor *input, ITensor *output)
{
auto k = arm_compute::support::cpp14::make_unique<NEIntegralImageKernel>();
k->configure(input, output);
_kernel = std::move(k);
- _border_handler.configure(output, _kernel->border_size(), BorderMode::CONSTANT, PixelValue());
+
+ auto b = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+ b->configure(output, _kernel->border_size(), BorderMode::CONSTANT, PixelValue());
+ _border_handler = std::move(b);
}
+} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEL2NormalizeLayer.cpp b/src/runtime/NEON/functions/NEL2NormalizeLayer.cpp
index 04cf3a233a..4a99968cc3 100644
--- a/src/runtime/NEON/functions/NEL2NormalizeLayer.cpp
+++ b/src/runtime/NEON/functions/NEL2NormalizeLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,6 +25,9 @@
#include "arm_compute/core/Helpers.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEL2NormalizeLayerKernel.h"
+#include "src/core/NEON/kernels/NEReductionOperationKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
@@ -32,6 +35,7 @@ namespace
{
constexpr int max_input_tensor_dim = 3;
} // namespace
+NEL2NormalizeLayer::~NEL2NormalizeLayer() = default;
NEL2NormalizeLayer::NEL2NormalizeLayer(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)), _reduce_func(), _normalize_kernel(), _sumsq()
@@ -46,7 +50,8 @@ void NEL2NormalizeLayer::configure(ITensor *input, ITensor *output, int axis, fl
// Configure Kernels
const uint32_t actual_axis = wrap_around(axis, max_input_tensor_dim);
_reduce_func.configure(input, &_sumsq, actual_axis, ReductionOperation::SUM_SQUARE);
- _normalize_kernel.configure(input, &_sumsq, output, axis, epsilon);
+ _normalize_kernel = arm_compute::support::cpp14::make_unique<NEL2NormalizeLayerKernel>();
+ _normalize_kernel->configure(input, &_sumsq, output, axis, epsilon);
// Allocate intermediate tensors
_sumsq.allocator()->allocate();
@@ -78,6 +83,6 @@ void NEL2NormalizeLayer::run()
MemoryGroupResourceScope scope_mg(_memory_group);
_reduce_func.run();
- NEScheduler::get().schedule(&_normalize_kernel, Window::DimY);
+ NEScheduler::get().schedule(_normalize_kernel.get(), Window::DimY);
}
} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NELSTMLayer.cpp b/src/runtime/NEON/functions/NELSTMLayer.cpp
index dca274acd2..48d69bd6fc 100644
--- a/src/runtime/NEON/functions/NELSTMLayer.cpp
+++ b/src/runtime/NEON/functions/NELSTMLayer.cpp
@@ -29,12 +29,24 @@
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/common/LSTMParams.h"
+#include "src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h"
+#include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h"
+#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h"
+#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
namespace arm_compute
{
using namespace arm_compute::misc::shape_calculator;
using namespace arm_compute::utils::info_helpers;
+NELSTMLayer::~NELSTMLayer() = default;
+
NELSTMLayer::NELSTMLayer(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)), _fully_connected_input_gate(), _accum_input_gate1(), _subtract_input_gate(), _pixelwise_mul_input_gate(), _activation_input_gate(),
_fully_connected_forget_gate(), _accum_forget_gate1(), _pixelwise_mul_forget_gate(), _activation_forget_gate(), _fully_connected_cell_state(), _gemm_cell_state1(), _transpose_cell_state(),
@@ -575,8 +587,8 @@ Status NELSTMLayer::validate(const ITensorInfo *input,
}
// Validate copy kernel
- ARM_COMPUTE_RETURN_ON_ERROR(NECopyKernel::validate(&cell_state_tmp, cell_state_out));
- ARM_COMPUTE_RETURN_ON_ERROR(NECopyKernel::validate(output_state_out, output));
+ ARM_COMPUTE_RETURN_ON_ERROR(NECopy::validate(&cell_state_tmp, cell_state_out));
+ ARM_COMPUTE_RETURN_ON_ERROR(NECopy::validate(output_state_out, output));
// Validate scratch concatenation
std::vector<const ITensorInfo *> inputs_vector_info_raw;
@@ -646,7 +658,7 @@ void NELSTMLayer::run()
}
_fully_connected_cell_state.run();
- NEScheduler::get().schedule(&_transpose_cell_state, Window::DimY);
+ _transpose_cell_state.run();
_gemm_cell_state1.run();
_accum_cell_state1.run();
if(_is_layer_norm_lstm)
@@ -691,8 +703,8 @@ void NELSTMLayer::run()
}
}
- NEScheduler::get().schedule(&_copy_cell_state, Window::DimY);
- NEScheduler::get().schedule(&_copy_output, Window::DimY);
+ _copy_cell_state.run();
+ _copy_output.run();
_concat_scratch_buffer.run();
}
diff --git a/src/runtime/NEON/functions/NELSTMLayerQuantized.cpp b/src/runtime/NEON/functions/NELSTMLayerQuantized.cpp
index 7610d15787..e43929390e 100644
--- a/src/runtime/NEON/functions/NELSTMLayerQuantized.cpp
+++ b/src/runtime/NEON/functions/NELSTMLayerQuantized.cpp
@@ -26,6 +26,16 @@
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
+#include "src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h"
+#include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h"
+#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h"
+#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
#include <cmath>
@@ -42,6 +52,7 @@ const QuantizationInfo qsymm_3(8.f / 32768.f, 0); // qsymm16 with 3 integer bit
const QuantizationInfo qsymm_4(16.f / 32768.f, 0); // qsymm16 with 4 integer bit
const QuantizationInfo qsymm_0(1.f / 32768.f, 0); // qsymm16 with 0 integer bit
} // namespace
+NELSTMLayerQuantized::~NELSTMLayerQuantized() = default;
NELSTMLayerQuantized::NELSTMLayerQuantized(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)), _gemmlowp(), _output_stage(), _transpose_weights(), _concat_input_weights(), _concat_recurrent_weights(), _concat_weights(), _concat_inputs(),
diff --git a/src/runtime/NEON/functions/NELaplacianPyramid.cpp b/src/runtime/NEON/functions/NELaplacianPyramid.cpp
index 4f0639b64b..a2651dbf36 100644
--- a/src/runtime/NEON/functions/NELaplacianPyramid.cpp
+++ b/src/runtime/NEON/functions/NELaplacianPyramid.cpp
@@ -29,11 +29,15 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h"
#include "arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h"
#include "arm_compute/runtime/NEON/functions/NEGaussianPyramid.h"
#include "arm_compute/runtime/Tensor.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NEGaussian5x5Kernel.h"
+#include "src/core/NEON/kernels/NEGaussianPyramidKernel.h"
-using namespace arm_compute;
+namespace arm_compute
+{
+NELaplacianPyramid::~NELaplacianPyramid() = default;
NELaplacianPyramid::NELaplacianPyramid() // NOLINT
: _num_levels(0),
@@ -105,3 +109,4 @@ void NELaplacianPyramid::configure(const ITensor *input, IPyramid *pyramid, ITen
_gauss_pyr.allocate();
_conv_pyr.allocate();
}
+} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NELaplacianReconstruct.cpp b/src/runtime/NEON/functions/NELaplacianReconstruct.cpp
index aa5f8a21ca..a50e7ccbef 100644
--- a/src/runtime/NEON/functions/NELaplacianReconstruct.cpp
+++ b/src/runtime/NEON/functions/NELaplacianReconstruct.cpp
@@ -23,6 +23,7 @@
*/
#include "arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h"
+#include "arm_compute/core/CPP/ICPPKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/IPyramid.h"
#include "arm_compute/core/ITensor.h"
@@ -31,7 +32,9 @@
#include <cstddef>
-using namespace arm_compute;
+namespace arm_compute
+{
+NELaplacianReconstruct::~NELaplacianReconstruct() = default;
NELaplacianReconstruct::NELaplacianReconstruct() // NOLINT
: _tmp_pyr(),
@@ -100,3 +103,4 @@ void NELaplacianReconstruct::run()
_depthf.run();
}
+} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NELocallyConnectedLayer.cpp b/src/runtime/NEON/functions/NELocallyConnectedLayer.cpp
index af502be1e9..131ac82ba8 100644
--- a/src/runtime/NEON/functions/NELocallyConnectedLayer.cpp
+++ b/src/runtime/NEON/functions/NELocallyConnectedLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -27,12 +27,16 @@
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEIm2ColKernel.h"
+#include "src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h"
+#include "src/core/NEON/kernels/NEWeightsReshapeKernel.h"
+#include "support/MemorySupport.h"
#include <cmath>
#include <tuple>
-using namespace arm_compute;
-
+namespace arm_compute
+{
namespace
{
void calculate_shapes(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
@@ -70,9 +74,10 @@ void calculate_shapes(const ITensorInfo *input, const ITensorInfo *weights, cons
shape_gemm.set(1, mat_input_rows);
}
} // namespace
+NELocallyConnectedLayer::~NELocallyConnectedLayer() = default;
NELocallyConnectedLayer::NELocallyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _input_im2col_kernel(), _weights_reshape_kernel(), _mm_kernel(), _output_col2im_kernel(), _input_im2col_reshaped(), _weights_reshaped(), _gemm_output(),
+ : _memory_group(std::move(memory_manager)), _input_im2col(), _weights_reshape_kernel(), _mm_kernel(), _output_col2im(), _input_im2col_reshaped(), _weights_reshaped(), _gemm_output(),
_is_prepared(false), _original_weights(nullptr)
{
}
@@ -113,10 +118,10 @@ Status NELocallyConnectedLayer::validate(const ITensorInfo *input, const ITensor
TensorInfo input_im2col_reshaped_info(shape_im2col, 1, input->data_type());
TensorInfo gemm_output_info(shape_gemm, 1, input->data_type());
- ARM_COMPUTE_RETURN_ON_ERROR(NEIm2ColKernel::validate(input, &input_im2col_reshaped_info, Size2D(kernel_width, kernel_height), conv_info, has_bias));
+ ARM_COMPUTE_RETURN_ON_ERROR(NEIm2Col::validate(input, &input_im2col_reshaped_info, Size2D(kernel_width, kernel_height), conv_info, has_bias));
ARM_COMPUTE_RETURN_ON_ERROR(NEWeightsReshapeKernel::validate(weights, biases, &weights_reshaped_info));
ARM_COMPUTE_RETURN_ON_ERROR(NELocallyConnectedMatrixMultiplyKernel::validate(&input_im2col_reshaped_info, &weights_reshaped_info, &gemm_output_info));
- ARM_COMPUTE_RETURN_ON_ERROR(NECol2ImKernel::validate(&gemm_output_info, output, Size2D(conv_w, conv_h)));
+ ARM_COMPUTE_RETURN_ON_ERROR(NECol2Im::validate(&gemm_output_info, output, Size2D(conv_w, conv_h)));
return Status{};
}
@@ -154,10 +159,12 @@ void NELocallyConnectedLayer::configure(const ITensor *input, const ITensor *wei
_memory_group.manage(&_gemm_output);
// Configure kernels
- _input_im2col_kernel.configure(input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _has_bias);
- _weights_reshape_kernel.configure(weights, biases, &_weights_reshaped);
- _mm_kernel.configure(&_input_im2col_reshaped, &_weights_reshaped, &_gemm_output);
- _output_col2im_kernel.configure(&_gemm_output, output, Size2D(conv_w, conv_h));
+ _input_im2col.configure(input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _has_bias);
+ _weights_reshape_kernel = arm_compute::support::cpp14::make_unique<NEWeightsReshapeKernel>();
+ _weights_reshape_kernel->configure(weights, biases, &_weights_reshaped);
+ _mm_kernel = arm_compute::support::cpp14::make_unique<NELocallyConnectedMatrixMultiplyKernel>();
+ _mm_kernel->configure(&_input_im2col_reshaped, &_weights_reshaped, &_gemm_output);
+ _output_col2im.configure(&_gemm_output, output, Size2D(conv_w, conv_h));
// Allocate intermediate tensors
_input_im2col_reshaped.allocator()->allocate();
@@ -171,13 +178,13 @@ void NELocallyConnectedLayer::run()
MemoryGroupResourceScope scope_mg(_memory_group);
// Run input reshaping
- NEScheduler::get().schedule(&_input_im2col_kernel, Window::DimY);
+ _input_im2col.run();
// Runs GEMM on reshaped matrices
- NEScheduler::get().schedule(&_mm_kernel, Window::DimX);
+ NEScheduler::get().schedule(_mm_kernel.get(), Window::DimX);
// Reshape output matrix
- NEScheduler::get().schedule(&_output_col2im_kernel, Window::DimY);
+ _output_col2im.run();
}
void NELocallyConnectedLayer::prepare()
@@ -188,9 +195,10 @@ void NELocallyConnectedLayer::prepare()
// Run weights reshaping and mark original weights tensor as unused
_weights_reshaped.allocator()->allocate();
- NEScheduler::get().schedule(&_weights_reshape_kernel, 3);
+ NEScheduler::get().schedule(_weights_reshape_kernel.get(), 3);
_original_weights->mark_as_unused();
_is_prepared = true;
}
}
+} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEMagnitude.cpp b/src/runtime/NEON/functions/NEMagnitude.cpp
index 5ca672e1d6..06ed8d46c9 100644
--- a/src/runtime/NEON/functions/NEMagnitude.cpp
+++ b/src/runtime/NEON/functions/NEMagnitude.cpp
@@ -23,13 +23,15 @@
*/
#include "arm_compute/runtime/NEON/functions/NEMagnitude.h"
-#include "arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/NEON/kernels/NEMagnitudePhaseKernel.h"
#include "support/MemorySupport.h"
#include <utility>
-using namespace arm_compute;
+namespace arm_compute
+{
+NEMagnitude::~NEMagnitude() = default;
void NEMagnitude::configure(const ITensor *input1, const ITensor *input2, ITensor *output, MagnitudeType mag_type)
{
@@ -46,3 +48,4 @@ void NEMagnitude::configure(const ITensor *input1, const ITensor *input2, ITenso
_kernel = std::move(k);
}
}
+} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEMaxUnpoolingLayer.cpp b/src/runtime/NEON/functions/NEMaxUnpoolingLayer.cpp
index 9d3f34fba4..e8c9d09d95 100644
--- a/src/runtime/NEON/functions/NEMaxUnpoolingLayer.cpp
+++ b/src/runtime/NEON/functions/NEMaxUnpoolingLayer.cpp
@@ -25,9 +25,14 @@
#include "arm_compute/core/ITensor.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h"
+#include "src/core/NEON/kernels/NEMemsetKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
+NEMaxUnpoolingLayer::~NEMaxUnpoolingLayer() = default;
+
NEMaxUnpoolingLayer::NEMaxUnpoolingLayer()
: _memset_kernel(), _unpooling_layer_kernel()
@@ -37,8 +42,10 @@ NEMaxUnpoolingLayer::NEMaxUnpoolingLayer()
void NEMaxUnpoolingLayer::configure(ITensor *input, ITensor *indices, ITensor *output, const PoolingLayerInfo &pool_info)
{
const PixelValue zero_value(0.f);
- _memset_kernel.configure(output, zero_value);
- _unpooling_layer_kernel.configure(input, indices, output, pool_info);
+ _memset_kernel = arm_compute::support::cpp14::make_unique<NEMemsetKernel>();
+ _unpooling_layer_kernel = arm_compute::support::cpp14::make_unique<NEMaxUnpoolingLayerKernel>();
+ _memset_kernel->configure(output, zero_value);
+ _unpooling_layer_kernel->configure(input, indices, output, pool_info);
}
Status NEMaxUnpoolingLayer::validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, const PoolingLayerInfo &pool_info)
@@ -48,7 +55,7 @@ Status NEMaxUnpoolingLayer::validate(const ITensorInfo *input, const ITensorInfo
void NEMaxUnpoolingLayer::run()
{
- NEScheduler::get().schedule(&_memset_kernel, Window::DimY);
- NEScheduler::get().schedule(&_unpooling_layer_kernel, Window::DimY);
+ NEScheduler::get().schedule(_memset_kernel.get(), Window::DimY);
+ NEScheduler::get().schedule(_unpooling_layer_kernel.get(), Window::DimY);
}
} /* namespace arm_compute */
diff --git a/src/runtime/NEON/functions/NEMeanStdDev.cpp b/src/runtime/NEON/functions/NEMeanStdDev.cpp
index 57363f05ff..e073420114 100644
--- a/src/runtime/NEON/functions/NEMeanStdDev.cpp
+++ b/src/runtime/NEON/functions/NEMeanStdDev.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,8 +24,13 @@
#include "arm_compute/runtime/NEON/functions/NEMeanStdDev.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NEMeanStdDevKernel.h"
+#include "support/MemorySupport.h"
-using namespace arm_compute;
+namespace arm_compute
+{
+NEMeanStdDev::~NEMeanStdDev() = default;
NEMeanStdDev::NEMeanStdDev()
: _mean_stddev_kernel(), _fill_border_kernel(), _global_sum(0), _global_sum_squared(0)
@@ -34,8 +39,11 @@ NEMeanStdDev::NEMeanStdDev()
void NEMeanStdDev::configure(IImage *input, float *mean, float *stddev)
{
- _mean_stddev_kernel.configure(input, mean, &_global_sum, stddev, &_global_sum_squared);
- _fill_border_kernel.configure(input, _mean_stddev_kernel.border_size(), BorderMode::CONSTANT, PixelValue(static_cast<uint8_t>(0)));
+ _mean_stddev_kernel = arm_compute::support::cpp14::make_unique<NEMeanStdDevKernel>();
+ _fill_border_kernel = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+
+ _mean_stddev_kernel->configure(input, mean, &_global_sum, stddev, &_global_sum_squared);
+ _fill_border_kernel->configure(input, _mean_stddev_kernel->border_size(), BorderMode::CONSTANT, PixelValue(static_cast<uint8_t>(0)));
}
void NEMeanStdDev::run()
@@ -43,6 +51,7 @@ void NEMeanStdDev::run()
_global_sum = 0;
_global_sum_squared = 0;
- NEScheduler::get().schedule(&_fill_border_kernel, Window::DimZ);
- NEScheduler::get().schedule(&_mean_stddev_kernel, Window::DimY);
+ NEScheduler::get().schedule(_fill_border_kernel.get(), Window::DimZ);
+ NEScheduler::get().schedule(_mean_stddev_kernel.get(), Window::DimY);
}
+} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.cpp b/src/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.cpp
index a88732b67d..d128c4456a 100644
--- a/src/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.cpp
+++ b/src/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.cpp
@@ -23,11 +23,13 @@
*/
#include "arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h"
-#include "arm_compute/core/NEON/kernels/NEMeanStdDevNormalizationKernel.h"
+#include "src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
{
+NEMeanStdDevNormalizationLayer::~NEMeanStdDevNormalizationLayer() = default;
+
void NEMeanStdDevNormalizationLayer::configure(ITensor *input, ITensor *output, float epsilon)
{
auto k = arm_compute::support::cpp14::make_unique<NEMeanStdDevNormalizationKernel>();
diff --git a/src/runtime/NEON/functions/NEMedian3x3.cpp b/src/runtime/NEON/functions/NEMedian3x3.cpp
index 2bbe8d39ae..b7b7c2cb47 100644
--- a/src/runtime/NEON/functions/NEMedian3x3.cpp
+++ b/src/runtime/NEON/functions/NEMedian3x3.cpp
@@ -23,18 +23,23 @@
*/
#include "arm_compute/runtime/NEON/functions/NEMedian3x3.h"
-#include "arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h"
#include "arm_compute/core/PixelValue.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NEMedian3x3Kernel.h"
#include "support/MemorySupport.h"
#include <utility>
-using namespace arm_compute;
-
+namespace arm_compute
+{
void NEMedian3x3::configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value)
{
auto k = arm_compute::support::cpp14::make_unique<NEMedian3x3Kernel>();
k->configure(input, output, border_mode == BorderMode::UNDEFINED);
_kernel = std::move(k);
- _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+
+ auto b = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+ b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler = std::move(b);
}
+} // namespace arm_compute \ No newline at end of file
diff --git a/src/runtime/NEON/functions/NEMinMaxLocation.cpp b/src/runtime/NEON/functions/NEMinMaxLocation.cpp
index ca63937770..3c2219ca07 100644
--- a/src/runtime/NEON/functions/NEMinMaxLocation.cpp
+++ b/src/runtime/NEON/functions/NEMinMaxLocation.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,8 +24,12 @@
#include "arm_compute/runtime/NEON/functions/NEMinMaxLocation.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEMinMaxLocationKernel.h"
+#include "support/MemorySupport.h"
-using namespace arm_compute;
+namespace arm_compute
+{
+NEMinMaxLocation::~NEMinMaxLocation() = default;
NEMinMaxLocation::NEMinMaxLocation()
: _min_max(), _min_max_loc()
@@ -34,17 +38,21 @@ NEMinMaxLocation::NEMinMaxLocation()
void NEMinMaxLocation::configure(const IImage *input, void *min, void *max, ICoordinates2DArray *min_loc, ICoordinates2DArray *max_loc, uint32_t *min_count, uint32_t *max_count)
{
- _min_max.configure(input, min, max);
- _min_max_loc.configure(input, min, max, min_loc, max_loc, min_count, max_count);
+ _min_max = arm_compute::support::cpp14::make_unique<NEMinMaxKernel>();
+ _min_max->configure(input, min, max);
+
+ _min_max_loc = arm_compute::support::cpp14::make_unique<NEMinMaxLocationKernel>();
+ _min_max_loc->configure(input, min, max, min_loc, max_loc, min_count, max_count);
}
void NEMinMaxLocation::run()
{
- _min_max.reset();
+ _min_max->reset();
/* Run min max kernel */
- NEScheduler::get().schedule(&_min_max, Window::DimY);
+ NEScheduler::get().schedule(_min_max.get(), Window::DimY);
/* Run min max location */
- NEScheduler::get().schedule(&_min_max_loc, Window::DimY);
+ NEScheduler::get().schedule(_min_max_loc.get(), Window::DimY);
}
+} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NENonLinearFilter.cpp b/src/runtime/NEON/functions/NENonLinearFilter.cpp
index b7c72acb9a..4d8fd00cbd 100644
--- a/src/runtime/NEON/functions/NENonLinearFilter.cpp
+++ b/src/runtime/NEON/functions/NENonLinearFilter.cpp
@@ -23,14 +23,15 @@
*/
#include "arm_compute/runtime/NEON/functions/NENonLinearFilter.h"
-#include "arm_compute/core/NEON/kernels/NENonLinearFilterKernel.h"
#include "arm_compute/core/PixelValue.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NENonLinearFilterKernel.h"
#include "support/MemorySupport.h"
#include <utility>
-using namespace arm_compute;
-
+namespace arm_compute
+{
void NENonLinearFilter::configure(ITensor *input, ITensor *output, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask,
BorderMode border_mode,
uint8_t constant_border_value)
@@ -38,5 +39,9 @@ void NENonLinearFilter::configure(ITensor *input, ITensor *output, NonLinearFilt
auto k = arm_compute::support::cpp14::make_unique<NENonLinearFilterKernel>();
k->configure(input, output, function, mask_size, pattern, mask, border_mode == BorderMode::UNDEFINED);
_kernel = std::move(k);
- _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+
+ auto b = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+ b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler = std::move(b);
}
+} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NENonMaximaSuppression3x3.cpp b/src/runtime/NEON/functions/NENonMaximaSuppression3x3.cpp
index 4d9edf7fc7..b8f5c251b7 100644
--- a/src/runtime/NEON/functions/NENonMaximaSuppression3x3.cpp
+++ b/src/runtime/NEON/functions/NENonMaximaSuppression3x3.cpp
@@ -23,25 +23,29 @@
*/
#include "arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h"
-#include "arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h"
#include "support/MemorySupport.h"
#include <utility>
-using namespace arm_compute;
-
+namespace arm_compute
+{
void NENonMaximaSuppression3x3::configure(ITensor *input, ITensor *output, BorderMode border_mode)
{
auto k = arm_compute::support::cpp14::make_unique<NENonMaximaSuppression3x3Kernel>();
k->configure(input, output, border_mode == BorderMode::UNDEFINED);
_kernel = std::move(k);
+ auto b = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
if(border_mode != BorderMode::UNDEFINED)
{
- _border_handler.configure(input, BorderSize(1), BorderMode::CONSTANT, static_cast<float>(0.f));
+ b->configure(input, BorderSize(1), BorderMode::CONSTANT, static_cast<float>(0.f));
}
else
{
- _border_handler.configure(input, BorderSize(1), BorderMode::UNDEFINED, static_cast<float>(0.f));
+ b->configure(input, BorderSize(1), BorderMode::UNDEFINED, static_cast<float>(0.f));
}
+ _border_handler = std::move(b);
}
+} // namespace arm_compute \ No newline at end of file
diff --git a/src/runtime/NEON/functions/NENormalizationLayer.cpp b/src/runtime/NEON/functions/NENormalizationLayer.cpp
index 10ee938335..dfc73b2a57 100644
--- a/src/runtime/NEON/functions/NENormalizationLayer.cpp
+++ b/src/runtime/NEON/functions/NENormalizationLayer.cpp
@@ -29,9 +29,13 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NENormalizationLayerKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
+NENormalizationLayer::~NENormalizationLayer() = default;
+
NENormalizationLayer::NENormalizationLayer(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)), _norm_kernel(), _multiply_f(), _input_squared()
{
@@ -48,7 +52,8 @@ void NENormalizationLayer::configure(const ITensor *input, ITensor *output, cons
_memory_group.manage(&_input_squared);
// Configure kernels
- _norm_kernel.configure(input, &_input_squared, output, norm_info);
+ _norm_kernel = arm_compute::support::cpp14::make_unique<NENormalizationLayerKernel>();
+ _norm_kernel->configure(input, &_input_squared, output, norm_info);
_multiply_f.configure(input, input, &_input_squared, 1.0f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
// Allocate the tensor once the configure methods have been called
@@ -70,6 +75,6 @@ void NENormalizationLayer::run()
{
MemoryGroupResourceScope scope_mg(_memory_group);
_multiply_f.run();
- NEScheduler::get().schedule(&_norm_kernel, Window::DimY);
+ NEScheduler::get().schedule(_norm_kernel.get(), Window::DimY);
}
} \ No newline at end of file
diff --git a/src/runtime/NEON/functions/NEOpticalFlow.cpp b/src/runtime/NEON/functions/NEOpticalFlow.cpp
index c9e07483e6..565346bfce 100644
--- a/src/runtime/NEON/functions/NEOpticalFlow.cpp
+++ b/src/runtime/NEON/functions/NEOpticalFlow.cpp
@@ -25,7 +25,6 @@
#include "arm_compute/core/Error.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/kernels/NELKTrackerKernel.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Window.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
@@ -33,8 +32,13 @@
#include "arm_compute/runtime/Pyramid.h"
#include "arm_compute/runtime/Tensor.h"
#include "arm_compute/runtime/TensorAllocator.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NELKTrackerKernel.h"
+#include "support/MemorySupport.h"
-using namespace arm_compute;
+namespace arm_compute
+{
+NEOpticalFlow::~NEOpticalFlow() = default;
NEOpticalFlow::NEOpticalFlow(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
: _memory_group(std::move(memory_manager)),
@@ -110,11 +114,12 @@ void NEOpticalFlow::configure(const Pyramid *old_pyramid, const Pyramid *new_pyr
_func_scharr[i].configure(old_ith_input, &_scharr_gx[i], &_scharr_gy[i], border_mode, constant_border_value);
// Init Lucas-Kanade kernel
- _kernel_tracker[i].configure(old_ith_input, new_ith_input, &_scharr_gx[i], &_scharr_gy[i],
- old_points, new_points_estimates, new_points,
- &_old_points_internal, &_new_points_internal,
- termination, use_initial_estimate, epsilon, num_iterations, window_dimension,
- i, _num_levels, pyr_scale);
+ _kernel_tracker[i] = arm_compute::support::cpp14::make_unique<NELKTrackerKernel>();
+ _kernel_tracker[i]->configure(old_ith_input, new_ith_input, &_scharr_gx[i], &_scharr_gy[i],
+ old_points, new_points_estimates, new_points,
+ &_old_points_internal, &_new_points_internal,
+ termination, use_initial_estimate, epsilon, num_iterations, window_dimension,
+ i, _num_levels, pyr_scale);
_scharr_gx[i].allocator()->allocate();
_scharr_gy[i].allocator()->allocate();
@@ -133,6 +138,7 @@ void NEOpticalFlow::run()
_func_scharr[level - 1].run();
// Run Lucas-Kanade kernel
- NEScheduler::get().schedule(&_kernel_tracker[level - 1], Window::DimX);
+ NEScheduler::get().schedule(_kernel_tracker[level - 1].get(), Window::DimX);
}
}
+} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEPReluLayer.cpp b/src/runtime/NEON/functions/NEPReluLayer.cpp
index f9393a4d92..00a1a4257a 100644
--- a/src/runtime/NEON/functions/NEPReluLayer.cpp
+++ b/src/runtime/NEON/functions/NEPReluLayer.cpp
@@ -24,7 +24,7 @@
#include "arm_compute/runtime/NEON/functions/NEPReluLayer.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h"
+#include "src/core/NEON/kernels/NEElementwiseOperationKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEPadLayer.cpp b/src/runtime/NEON/functions/NEPadLayer.cpp
index 03c597a3bf..92659f39a2 100644
--- a/src/runtime/NEON/functions/NEPadLayer.cpp
+++ b/src/runtime/NEON/functions/NEPadLayer.cpp
@@ -27,7 +27,10 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/NEON/kernels/NECopyKernel.h"
+#include "src/core/NEON/kernels/NEPadLayerKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
@@ -47,6 +50,8 @@ uint32_t last_padding_dimension(const PaddingList &padding)
}
} // namespace
+NEPadLayer::~NEPadLayer() = default;
+
NEPadLayer::NEPadLayer()
: _copy_kernel(), _pad_kernel(), _mode(), _padding(), _num_dimensions(0), _slice_functions(), _concat_functions(), _slice_results(), _concat_results()
{
@@ -54,7 +59,8 @@ NEPadLayer::NEPadLayer()
void NEPadLayer::configure_constant_mode(ITensor *input, ITensor *output, const PaddingList &padding, const PixelValue constant_value)
{
- _pad_kernel.configure(input, output, padding, constant_value, PaddingMode::CONSTANT);
+ _pad_kernel = arm_compute::support::cpp14::make_unique<NEPadLayerKernel>();
+ _pad_kernel->configure(input, output, padding, constant_value, PaddingMode::CONSTANT);
}
void NEPadLayer::configure_reflect_symmetric_mode(ITensor *input, ITensor *output)
@@ -195,7 +201,8 @@ void NEPadLayer::configure(ITensor *input, ITensor *output, const PaddingList &p
else
{
// Copy the input to the whole output if no padding is applied
- _copy_kernel.configure(input, output);
+ _copy_kernel = arm_compute::support::cpp14::make_unique<NECopyKernel>();
+ _copy_kernel->configure(input, output);
}
}
@@ -251,7 +258,7 @@ void NEPadLayer::run()
{
case PaddingMode::CONSTANT:
{
- NEScheduler::get().schedule(&_pad_kernel, Window::DimZ);
+ NEScheduler::get().schedule(_pad_kernel.get(), Window::DimZ);
break;
}
case PaddingMode::REFLECT:
@@ -280,7 +287,7 @@ void NEPadLayer::run()
}
else
{
- NEScheduler::get().schedule(&_copy_kernel, Window::DimY);
+ NEScheduler::get().schedule(_copy_kernel.get(), Window::DimY);
}
}
} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEPermute.cpp b/src/runtime/NEON/functions/NEPermute.cpp
index 698add86b9..d2a115fdc8 100644
--- a/src/runtime/NEON/functions/NEPermute.cpp
+++ b/src/runtime/NEON/functions/NEPermute.cpp
@@ -23,7 +23,7 @@
*/
#include "arm_compute/runtime/NEON/functions/NEPermute.h"
-#include "arm_compute/core/NEON/kernels/NEPermuteKernel.h"
+#include "src/core/NEON/kernels/NEPermuteKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEPhase.cpp b/src/runtime/NEON/functions/NEPhase.cpp
index 85779611cd..3b6182a269 100644
--- a/src/runtime/NEON/functions/NEPhase.cpp
+++ b/src/runtime/NEON/functions/NEPhase.cpp
@@ -23,13 +23,13 @@
*/
#include "arm_compute/runtime/NEON/functions/NEPhase.h"
-#include "arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h"
+#include "src/core/NEON/kernels/NEMagnitudePhaseKernel.h"
#include "support/MemorySupport.h"
#include <utility>
-using namespace arm_compute;
-
+namespace arm_compute
+{
void NEPhase::configure(const ITensor *input1, const ITensor *input2, ITensor *output, PhaseType phase_type)
{
if(phase_type == PhaseType::UNSIGNED)
@@ -45,3 +45,4 @@ void NEPhase::configure(const ITensor *input1, const ITensor *input2, ITensor *o
_kernel = std::move(k);
}
}
+} // namespace arm_compute \ No newline at end of file
diff --git a/src/runtime/NEON/functions/NEPixelWiseMultiplication.cpp b/src/runtime/NEON/functions/NEPixelWiseMultiplication.cpp
index 4208878b75..f7f4437554 100644
--- a/src/runtime/NEON/functions/NEPixelWiseMultiplication.cpp
+++ b/src/runtime/NEON/functions/NEPixelWiseMultiplication.cpp
@@ -24,7 +24,7 @@
#include "arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h"
+#include "src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h"
#include "support/MemorySupport.h"
#include <utility>
diff --git a/src/runtime/NEON/functions/NEPoolingLayer.cpp b/src/runtime/NEON/functions/NEPoolingLayer.cpp
index 81bd00d44d..12ac8d6d7d 100644
--- a/src/runtime/NEON/functions/NEPoolingLayer.cpp
+++ b/src/runtime/NEON/functions/NEPoolingLayer.cpp
@@ -25,8 +25,13 @@
#include "arm_compute/core/ITensor.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NEPoolingLayerKernel.h"
+#include "support/MemorySupport.h"
-using namespace arm_compute;
+namespace arm_compute
+{
+NEPoolingLayer::~NEPoolingLayer() = default;
NEPoolingLayer::NEPoolingLayer()
: _pooling_layer_kernel(), _border_handler(), _is_global_pooling_layer(false), _data_layout(DataLayout::NCHW)
@@ -42,7 +47,8 @@ void NEPoolingLayer::configure(ITensor *input, ITensor *output, const PoolingLay
_data_layout = pool_info.data_layout == DataLayout::UNKNOWN ? input->info()->data_layout() : pool_info.data_layout;
// Configure pooling kernel
- _pooling_layer_kernel.configure(input, output, pool_info, indices);
+ _pooling_layer_kernel = arm_compute::support::cpp14::make_unique<NEPoolingLayerKernel>();
+ _pooling_layer_kernel->configure(input, output, pool_info, indices);
switch(_data_layout)
{
@@ -55,7 +61,8 @@ void NEPoolingLayer::configure(ITensor *input, ITensor *output, const PoolingLay
{
zero_value = PixelValue(0, input->info()->data_type(), input->info()->quantization_info());
}
- _border_handler.configure(input, _pooling_layer_kernel.border_size(), border_mode, zero_value);
+ _border_handler = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+ _border_handler->configure(input, _pooling_layer_kernel->border_size(), border_mode, zero_value);
break;
}
case DataLayout::NHWC:
@@ -76,16 +83,18 @@ void NEPoolingLayer::run()
{
case DataLayout::NCHW:
// Fill border
- NEScheduler::get().schedule(&_border_handler, Window::DimY);
+ NEScheduler::get().schedule(_border_handler.get(), Window::DimY);
// Run pooling layer
- NEScheduler::get().schedule(&_pooling_layer_kernel, _is_global_pooling_layer ? Window::DimZ : Window::DimY);
+ NEScheduler::get().schedule(_pooling_layer_kernel.get(), _is_global_pooling_layer ? Window::DimZ : Window::DimY);
break;
case DataLayout::NHWC:
// Run pooling layer
- NEScheduler::get().schedule(&_pooling_layer_kernel, Window::DimX);
+ NEScheduler::get().schedule(_pooling_layer_kernel.get(), Window::DimX);
break;
default:
ARM_COMPUTE_ERROR("Data layout not supported");
}
}
+
+} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEPriorBoxLayer.cpp b/src/runtime/NEON/functions/NEPriorBoxLayer.cpp
index bcf6bef9c7..bfa06da04e 100644
--- a/src/runtime/NEON/functions/NEPriorBoxLayer.cpp
+++ b/src/runtime/NEON/functions/NEPriorBoxLayer.cpp
@@ -30,6 +30,7 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEPriorBoxLayerKernel.h"
#include "support/MemorySupport.h"
diff --git a/src/runtime/NEON/functions/NEQLSTMLayer.cpp b/src/runtime/NEON/functions/NEQLSTMLayer.cpp
index e41962451c..1013730235 100644
--- a/src/runtime/NEON/functions/NEQLSTMLayer.cpp
+++ b/src/runtime/NEON/functions/NEQLSTMLayer.cpp
@@ -30,7 +30,16 @@
#include "arm_compute/core/utils/misc/InfoHelpers.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h"
+#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
+#include "src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h"
#include "src/core/helpers/WindowHelpers.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
@@ -47,6 +56,31 @@ Status validate_mm(GEMMLowpOutputStageInfo &gemmlowp_info, const ITensorInfo *mm
}
} // namespace
+Status NEQLSTMLayer::validate_layer_norm(const ITensorInfo &in, const ITensorInfo &weight, const ITensorInfo &bias)
+{
+ // Output quantization scale will be different, but ignored here
+ // since it will be configured at configure() stage.
+ const TensorInfo out
+ {
+ in
+ };
+ return NEQLSTMLayerNormalizationKernel::validate(&in, &out, &weight, &bias);
+}
+
+void NEQLSTMLayer::configure_layer_norm(NEQLSTMLayer::LayerNormGate g, const ITensor *in)
+{
+ ARM_COMPUTE_ERROR_ON(!_has_layer_norm);
+
+ Tensor &out = get_layer_norm_output(g);
+ _memory_group.manage(&out);
+ out.allocator()->init(*(in->info()));
+
+ get_layer_norm(g) = arm_compute::support::cpp14::make_unique<NEQLSTMLayerNormalizationKernel>();
+ get_layer_norm(g)->configure(in, &out, get_layer_norm_weight(g), get_layer_norm_bias(g));
+}
+
+NEQLSTMLayer::TensorCopyKernel::~TensorCopyKernel() = default;
+
Status NEQLSTMLayer::TensorCopyKernel::validate(const ITensorInfo &src, const ITensorInfo &dst)
{
ARM_COMPUTE_RETURN_ERROR_ON(src.tensor_shape().num_dimensions() > max_dimension_supported);
@@ -77,7 +111,21 @@ void NEQLSTMLayer::TensorCopyKernel::run()
input_iter, output_iter);
}
+NEQLSTMLayer::~NEQLSTMLayer() = default;
+
NEQLSTMLayer::NEQLSTMLayer(std::shared_ptr<IMemoryManager> memory_manager)
+ : _memory_group(), _transpose_input_to_forget_weights(), _transpose_input_to_cell_weights(), _transpose_input_to_output_weights(), _transpose_input_to_input_weights(),
+ _transpose_recurrent_to_forget_weights(), _transpose_recurrent_to_cell_weights(), _transpose_recurrent_to_output_weights(), _transpose_recurrent_to_input_weights(), _transpose_projection_weights(),
+ _input_to_input_reduction(), _recurrent_to_input_reduction(), _input_to_forget_reduction(), _recurrent_to_forget_reduction(), _input_to_cell_reduction(), _recurrent_to_cell_reduction(),
+ _input_to_output_reduction(), _recurrent_to_output_reduction(), _projection_reduction(), _projection_bias_add(), _mm_input_to_forget(), _mm_recurrent_to_forget(), _pixelwise_mul_cell_to_forget(),
+ _input_to_forget_outstage(), _recurrent_to_forget_outstage(), _cell_to_forget_outstage(), _accumulate_input_recurrent_forget(), _accumulate_cell_forget(), _forget_gate_sigmoid(), _mm_input_to_cell(),
+ _input_to_cell_outstage(), _mm_recurrent_to_cell(), _recurrent_to_cell_outstage(), _accumulate_input_recurrent_modulation(), _cell_gate_tanh(), _input_gate_sub(), _mm_input_to_input(),
+ _input_to_input_outstage(), _mm_recurrent_to_input(), _recurrent_to_input_outstage(), _accumulate_input_recurrent_input(), _pixelwise_mul_cell_to_input(), _cell_to_input_outstage(),
+ _accumulate_cell_input(), _input_gate_sigmoid(), _pixelwise_mul_forget_cell(), _pixelwise_mul_input_cell(), _add_forget_cell(), _cell_clip(), _mm_input_to_output(), _input_to_output_outstage(),
+ _mm_recurrent_to_output(), _recurrent_to_output_outstage(), _accumulate_input_recurrent_output(), _pixelwise_mul_cell_to_output(), _cell_to_output_outstage(), _accumulate_cell_to_output(),
+ _output_gate_sigmoid(), _hidden_tanh(), _pixelwise_mul_hidden(), _hidden_outstage(), _mm_projection(), _projection_outstage(), _accumulate_projection(), _projection_clip(), _projection_bias_copy(),
+ _projection_output_to_accumulate_copy(), _projection_accumulate_to_output_copy(), _hidden_to_output_copy(), _layer_norms(), _copy_output(), _layer_norm_weights(), _layer_norm_bias(),
+ _layer_norm_output()
{
_memory_group = MemoryGroup(std::move(memory_manager));
}
@@ -178,18 +226,29 @@ void NEQLSTMLayer::configure(const ITensor *input,
_input_to_input_weights = lstm_params.input_to_input_weights();
_recurrent_to_input_weights = lstm_params.recurrent_to_input_weights();
- _input_to_input_reduction.configure(_input_to_input_weights, &_input_to_input_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
- _recurrent_to_input_reduction.configure(_recurrent_to_input_weights, &_recurrent_to_input_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
+ _input_to_input_reduction = arm_compute::support::cpp14::make_unique<NEGEMMLowpMatrixAReductionKernel>();
+ _recurrent_to_input_reduction = arm_compute::support::cpp14::make_unique<NEGEMMLowpMatrixAReductionKernel>();
+ _input_to_input_reduction->configure(_input_to_input_weights, &_input_to_input_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
+ _recurrent_to_input_reduction->configure(_recurrent_to_input_weights, &_recurrent_to_input_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
}
- _input_to_forget_reduction.configure(input_to_forget_weights, &_input_to_forget_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
- _recurrent_to_forget_reduction.configure(recurrent_to_forget_weights, &_recurrent_to_forget_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
- _input_to_cell_reduction.configure(input_to_cell_weights, &_input_to_cell_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
- _recurrent_to_cell_reduction.configure(recurrent_to_cell_weights, &_recurrent_to_cell_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
- _input_to_output_reduction.configure(input_to_output_weights, &_input_to_output_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
- _recurrent_to_output_reduction.configure(recurrent_to_output_weights, &_recurrent_to_output_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
+
+ _input_to_forget_reduction = arm_compute::support::cpp14::make_unique<NEGEMMLowpMatrixAReductionKernel>();
+ _recurrent_to_forget_reduction = arm_compute::support::cpp14::make_unique<NEGEMMLowpMatrixAReductionKernel>();
+ _input_to_cell_reduction = arm_compute::support::cpp14::make_unique<NEGEMMLowpMatrixAReductionKernel>();
+ _recurrent_to_cell_reduction = arm_compute::support::cpp14::make_unique<NEGEMMLowpMatrixAReductionKernel>();
+ _input_to_output_reduction = arm_compute::support::cpp14::make_unique<NEGEMMLowpMatrixAReductionKernel>();
+ _recurrent_to_output_reduction = arm_compute::support::cpp14::make_unique<NEGEMMLowpMatrixAReductionKernel>();
+
+ _recurrent_to_cell_reduction->configure(input_to_forget_weights, &_input_to_forget_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
+ _recurrent_to_forget_reduction->configure(recurrent_to_forget_weights, &_recurrent_to_forget_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
+ _input_to_cell_reduction->configure(input_to_cell_weights, &_input_to_cell_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
+ _recurrent_to_cell_reduction->configure(recurrent_to_cell_weights, &_recurrent_to_cell_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
+ _input_to_output_reduction->configure(input_to_output_weights, &_input_to_output_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
+ _recurrent_to_output_reduction->configure(recurrent_to_output_weights, &_recurrent_to_output_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
if(_has_projection)
{
- _projection_reduction.configure(_projection_weights, &_projection_eff_bias, GEMMLowpReductionKernelInfo(output_size, false, lstm_params.hidden_state_zero(), true));
+ _projection_reduction = arm_compute::support::cpp14::make_unique<NEGEMMLowpMatrixAReductionKernel>();
+ _projection_reduction->configure(_projection_weights, &_projection_eff_bias, GEMMLowpReductionKernelInfo(output_size, false, lstm_params.hidden_state_zero(), true));
if(_projection_bias != nullptr)
{
_projection_bias_add.configure(_projection_bias, &_projection_eff_bias, &_projection_eff_bias, ConvertPolicy::SATURATE);
@@ -878,7 +937,7 @@ Status NEQLSTMLayer::validate(const ITensorInfo *input,
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output_state_in, output_state_out);
}
- ARM_COMPUTE_RETURN_ON_ERROR(NECopyKernel::validate(output_state_out, output));
+ ARM_COMPUTE_RETURN_ON_ERROR(NECopy::validate(output_state_out, output));
return Status{};
}
@@ -906,7 +965,7 @@ void NEQLSTMLayer::run()
if(_has_layer_norm)
{
- NEScheduler::get().schedule(&get_layer_norm(LayerNormGate::Forget), Window::DimY);
+ NEScheduler::get().schedule(get_layer_norm(LayerNormGate::Forget).get(), Window::DimY);
}
_forget_gate_sigmoid.run();
@@ -921,7 +980,7 @@ void NEQLSTMLayer::run()
if(_has_layer_norm)
{
- NEScheduler::get().schedule(&get_layer_norm(LayerNormGate::Cell), Window::DimY);
+ NEScheduler::get().schedule(get_layer_norm(LayerNormGate::Cell).get(), Window::DimY);
}
_cell_gate_tanh.run();
@@ -948,7 +1007,7 @@ void NEQLSTMLayer::run()
if(_has_layer_norm)
{
- NEScheduler::get().schedule(&get_layer_norm(LayerNormGate::Input), Window::DimY);
+ NEScheduler::get().schedule(get_layer_norm(LayerNormGate::Input).get(), Window::DimY);
}
_input_gate_sigmoid.run();
@@ -979,7 +1038,7 @@ void NEQLSTMLayer::run()
if(_has_layer_norm)
{
- NEScheduler::get().schedule(&get_layer_norm(LayerNormGate::Output), Window::DimY);
+ NEScheduler::get().schedule(get_layer_norm(LayerNormGate::Output).get(), Window::DimY);
}
_output_gate_sigmoid.run();
@@ -1021,7 +1080,7 @@ void NEQLSTMLayer::run()
}
// Copy output_state_out to output
- NEScheduler::get().schedule(&_copy_output, Window::DimY);
+ _copy_output.run();
}
void NEQLSTMLayer::prepare()
@@ -1051,8 +1110,8 @@ void NEQLSTMLayer::prepare()
{
_input_to_input_eff_bias.allocator()->allocate();
_recurrent_to_input_eff_bias.allocator()->allocate();
- NEScheduler::get().schedule(&_input_to_input_reduction, Window::DimY);
- NEScheduler::get().schedule(&_recurrent_to_input_reduction, Window::DimY);
+ NEScheduler::get().schedule(_input_to_input_reduction.get(), Window::DimY);
+ NEScheduler::get().schedule(_recurrent_to_input_reduction.get(), Window::DimY);
_input_to_input_weights_transposed.allocator()->allocate();
_recurrent_to_input_weights_transposed.allocator()->allocate();
@@ -1067,17 +1126,17 @@ void NEQLSTMLayer::prepare()
_recurrent_to_cell_eff_bias.allocator()->allocate();
_input_to_output_eff_bias.allocator()->allocate();
_recurrent_to_output_eff_bias.allocator()->allocate();
- NEScheduler::get().schedule(&_input_to_forget_reduction, Window::DimY);
- NEScheduler::get().schedule(&_recurrent_to_forget_reduction, Window::DimY);
- NEScheduler::get().schedule(&_input_to_cell_reduction, Window::DimY);
- NEScheduler::get().schedule(&_recurrent_to_cell_reduction, Window::DimY);
- NEScheduler::get().schedule(&_input_to_output_reduction, Window::DimY);
- NEScheduler::get().schedule(&_recurrent_to_output_reduction, Window::DimY);
+ NEScheduler::get().schedule(_input_to_forget_reduction.get(), Window::DimY);
+ NEScheduler::get().schedule(_recurrent_to_forget_reduction.get(), Window::DimY);
+ NEScheduler::get().schedule(_input_to_cell_reduction.get(), Window::DimY);
+ NEScheduler::get().schedule(_recurrent_to_cell_reduction.get(), Window::DimY);
+ NEScheduler::get().schedule(_input_to_output_reduction.get(), Window::DimY);
+ NEScheduler::get().schedule(_recurrent_to_output_reduction.get(), Window::DimY);
if(_has_projection)
{
_projection_eff_bias.allocator()->allocate();
- NEScheduler::get().schedule(&_projection_reduction, Window::DimY);
+ NEScheduler::get().schedule(_projection_reduction.get(), Window::DimY);
if(_projection_bias != nullptr)
{
_projection_bias_add.run();
@@ -1106,5 +1165,4 @@ void NEQLSTMLayer::prepare()
_is_prepared = true;
}
}
-
} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEQuantizationLayer.cpp b/src/runtime/NEON/functions/NEQuantizationLayer.cpp
index c042705a72..a20ffb8858 100644
--- a/src/runtime/NEON/functions/NEQuantizationLayer.cpp
+++ b/src/runtime/NEON/functions/NEQuantizationLayer.cpp
@@ -26,6 +26,7 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/NEON/kernels/NEQuantizationLayerKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
diff --git a/src/runtime/NEON/functions/NERNNLayer.cpp b/src/runtime/NEON/functions/NERNNLayer.cpp
index b7415bd44c..a8e10482a7 100644
--- a/src/runtime/NEON/functions/NERNNLayer.cpp
+++ b/src/runtime/NEON/functions/NERNNLayer.cpp
@@ -30,9 +30,24 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h"
+#include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h"
+#include "src/core/NEON/kernels/NECopyKernel.h"
+#include "src/core/NEON/kernels/NEFlattenLayerKernel.h"
+#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h"
+#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
+NERNNLayer::~NERNNLayer() = default;
+
NERNNLayer::NERNNLayer(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)), _gemm_state_f(), _add_f(), _activation(), _fully_connected(memory_manager), _copy_kernel(), _fully_connected_out(), _gemm_output(), _add_output(),
_is_prepared(false)
@@ -99,7 +114,8 @@ void NERNNLayer::configure(const ITensor *input, const ITensor *weights, const I
_activation.configure(&_add_output, hidden_state, info);
_add_output.allocator()->allocate();
- _copy_kernel.configure(hidden_state, output);
+ _copy_kernel = arm_compute::support::cpp14::make_unique<NECopyKernel>();
+ _copy_kernel->configure(hidden_state, output);
}
void NERNNLayer::run()
@@ -116,7 +132,7 @@ void NERNNLayer::run()
_activation.run();
// copy hidden out to output
- NEScheduler::get().schedule(&_copy_kernel, Window::DimY);
+ NEScheduler::get().schedule(_copy_kernel.get(), Window::DimY);
}
void NERNNLayer::prepare()
diff --git a/src/runtime/NEON/functions/NEROIAlignLayer.cpp b/src/runtime/NEON/functions/NEROIAlignLayer.cpp
index a3b116a55e..a046140551 100644
--- a/src/runtime/NEON/functions/NEROIAlignLayer.cpp
+++ b/src/runtime/NEON/functions/NEROIAlignLayer.cpp
@@ -23,7 +23,8 @@
*/
#include "arm_compute/runtime/NEON/functions/NEROIAlignLayer.h"
-#include "arm_compute/core/NEON/kernels/NEROIAlignLayerKernel.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NEROIAlignLayerKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEROIPoolingLayer.cpp b/src/runtime/NEON/functions/NEROIPoolingLayer.cpp
index 4aecadbc09..8bcf152881 100644
--- a/src/runtime/NEON/functions/NEROIPoolingLayer.cpp
+++ b/src/runtime/NEON/functions/NEROIPoolingLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,11 +24,14 @@
#include "arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h"
#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/NEON/kernels/NEROIPoolingLayerKernel.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEROIPoolingLayerKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
+NEROIPoolingLayer::~NEROIPoolingLayer() = default;
+
NEROIPoolingLayer::NEROIPoolingLayer()
: _roi_kernel()
{
@@ -36,11 +39,12 @@ NEROIPoolingLayer::NEROIPoolingLayer()
void NEROIPoolingLayer::configure(const ITensor *input, const ITensor *rois, ITensor *output, const ROIPoolingLayerInfo &pool_info)
{
- _roi_kernel.configure(input, rois, output, pool_info);
+ _roi_kernel = arm_compute::support::cpp14::make_unique<NEROIPoolingLayerKernel>();
+ _roi_kernel->configure(input, rois, output, pool_info);
}
void NEROIPoolingLayer::run()
{
- NEScheduler::get().schedule(&_roi_kernel, Window::DimX);
+ NEScheduler::get().schedule(_roi_kernel.get(), Window::DimX);
}
} // namespace arm_compute \ No newline at end of file
diff --git a/src/runtime/NEON/functions/NERange.cpp b/src/runtime/NEON/functions/NERange.cpp
index 138b458fab..ba166b2d58 100644
--- a/src/runtime/NEON/functions/NERange.cpp
+++ b/src/runtime/NEON/functions/NERange.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,9 +24,13 @@
#include "arm_compute/runtime/NEON/functions/NERange.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NERangeKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
+NERange::~NERange() = default;
+
NERange::NERange()
: _kernel()
{
@@ -34,7 +38,8 @@ NERange::NERange()
void NERange::configure(ITensor *output, const float start, const float end, const float step)
{
- _kernel.configure(output, start, end, step);
+ _kernel = arm_compute::support::cpp14::make_unique<NERangeKernel>();
+ _kernel->configure(output, start, end, step);
}
Status NERange::validate(const ITensorInfo *output, const float start, const float end, const float step)
@@ -44,6 +49,6 @@ Status NERange::validate(const ITensorInfo *output, const float start, const flo
void NERange::run()
{
- NEScheduler::get().schedule(&_kernel, Window::DimX);
+ NEScheduler::get().schedule(_kernel.get(), Window::DimX);
}
} // namespace arm_compute \ No newline at end of file
diff --git a/src/runtime/NEON/functions/NEReduceMean.cpp b/src/runtime/NEON/functions/NEReduceMean.cpp
index c3c5529c09..b50a925f44 100644
--- a/src/runtime/NEON/functions/NEReduceMean.cpp
+++ b/src/runtime/NEON/functions/NEReduceMean.cpp
@@ -28,6 +28,7 @@
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
#include "src/core/CPP/Validate.h"
+#include "src/core/NEON/kernels/NEReductionOperationKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
namespace arm_compute
@@ -96,6 +97,8 @@ Status validate_config(const ITensorInfo *input, const Coordinates &reduction_ax
}
} // namespace
+NEReduceMean::~NEReduceMean() = default;
+
NEReduceMean::NEReduceMean(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)), _reduction_kernels(), _reduced_outs(), _reshape(), _dequant(), _requant(), _reduction_ops(), _keep_dims(), _do_requant(), _input_no_quant(),
_output_no_quant()
diff --git a/src/runtime/NEON/functions/NEReductionOperation.cpp b/src/runtime/NEON/functions/NEReductionOperation.cpp
index 4938a56b3f..463b65ec28 100644
--- a/src/runtime/NEON/functions/NEReductionOperation.cpp
+++ b/src/runtime/NEON/functions/NEReductionOperation.cpp
@@ -26,7 +26,9 @@
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEReductionOperationKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
@@ -54,6 +56,8 @@ size_t reduction_window_split_dimension(unsigned int axis)
}
} // namespace
+NEReductionOperation::~NEReductionOperation() = default;
+
NEReductionOperation::NEReductionOperation(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(memory_manager), _reduction_kernel(), _reshape(), _output_internal(), _window_split(0), _reduction_axis(), _is_reshape_required(false)
{
@@ -125,7 +129,8 @@ void NEReductionOperation::configure(ITensor *input, ITensor *output, unsigned i
ARM_COMPUTE_ERROR_THROW_ON(NEReductionOperation::validate(input->info(), output->info(), axis, op, keep_dims));
// Configure reduction kernel
- _reduction_kernel.configure(input, output_internal, axis, op);
+ _reduction_kernel = arm_compute::support::cpp14::make_unique<NEReductionOperationKernel>();
+ _reduction_kernel->configure(input, output_internal, axis, op);
_window_split = reduction_window_split_dimension(axis);
_reduction_axis = axis;
@@ -139,7 +144,7 @@ void NEReductionOperation::configure(ITensor *input, ITensor *output, unsigned i
void NEReductionOperation::run()
{
MemoryGroupResourceScope scope_mg(_memory_group);
- NEScheduler::get().schedule(&_reduction_kernel, _window_split);
+ NEScheduler::get().schedule(_reduction_kernel.get(), _window_split);
if(_is_reshape_required)
{
_reshape.run();
diff --git a/src/runtime/NEON/functions/NERemap.cpp b/src/runtime/NEON/functions/NERemap.cpp
index d4e7f838c6..9276d49cf5 100644
--- a/src/runtime/NEON/functions/NERemap.cpp
+++ b/src/runtime/NEON/functions/NERemap.cpp
@@ -25,17 +25,18 @@
#include "arm_compute/core/Error.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/kernels/NERemapKernel.h"
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/TensorAllocator.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NERemapKernel.h"
#include "support/MemorySupport.h"
#include <utility>
-using namespace arm_compute;
-
+namespace arm_compute
+{
void NERemap::configure(ITensor *input, const ITensor *map_x, const ITensor *map_y, ITensor *output, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
@@ -45,9 +46,11 @@ void NERemap::configure(ITensor *input, const ITensor *map_x, const ITensor *map
ARM_COMPUTE_ERROR_ON_MSG(policy == InterpolationPolicy::AREA, "Area interpolation is not supported");
auto k = arm_compute::support::cpp14::make_unique<NERemapKernel>();
-
k->configure(input, map_x, map_y, output, policy);
-
_kernel = std::move(k);
- _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+
+ auto b = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+ b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler = std::move(b);
}
+} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEReorgLayer.cpp b/src/runtime/NEON/functions/NEReorgLayer.cpp
index dfe002a503..77ec7fbfb1 100644
--- a/src/runtime/NEON/functions/NEReorgLayer.cpp
+++ b/src/runtime/NEON/functions/NEReorgLayer.cpp
@@ -23,7 +23,7 @@
*/
#include "arm_compute/runtime/NEON/functions/NEReorgLayer.h"
-#include "arm_compute/core/NEON/kernels/NEReorgLayerKernel.h"
+#include "src/core/NEON/kernels/NEReorgLayerKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEReshapeLayer.cpp b/src/runtime/NEON/functions/NEReshapeLayer.cpp
index c1c88c1c7a..915d5d408f 100644
--- a/src/runtime/NEON/functions/NEReshapeLayer.cpp
+++ b/src/runtime/NEON/functions/NEReshapeLayer.cpp
@@ -23,10 +23,10 @@
*/
#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h"
-#include "arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
#include "arm_compute/runtime/Types.h"
+#include "src/core/NEON/kernels/NEReshapeLayerKernel.h"
#include "support/MemorySupport.h"
#include <utility>
@@ -35,6 +35,8 @@ namespace arm_compute
{
namespace experimental
{
+NEReshape::~NEReshape() = default;
+
void NEReshape::configure(const ITensorInfo *input, ITensorInfo *output)
{
auto k = arm_compute::support::cpp14::make_unique<NEReshapeLayerKernel>();
diff --git a/src/runtime/NEON/functions/NEReverse.cpp b/src/runtime/NEON/functions/NEReverse.cpp
index c60c84e897..3ed0688386 100644
--- a/src/runtime/NEON/functions/NEReverse.cpp
+++ b/src/runtime/NEON/functions/NEReverse.cpp
@@ -23,7 +23,7 @@
*/
#include "arm_compute/runtime/NEON/functions/NEReverse.h"
-#include "arm_compute/core/NEON/kernels/NEReverseKernel.h"
+#include "src/core/NEON/kernels/NEReverseKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEScale.cpp b/src/runtime/NEON/functions/NEScale.cpp
index bbf8343c2b..0290fe5a01 100644
--- a/src/runtime/NEON/functions/NEScale.cpp
+++ b/src/runtime/NEON/functions/NEScale.cpp
@@ -32,6 +32,7 @@
#include "arm_compute/core/Window.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
#include "arm_compute/runtime/TensorAllocator.h"
+#include "src/core/NEON/kernels/NEScaleKernel.h"
#include "src/core/utils/ScaleUtils.h"
diff --git a/src/runtime/NEON/functions/NEScharr3x3.cpp b/src/runtime/NEON/functions/NEScharr3x3.cpp
index bf787e1440..cea0eefdb0 100644
--- a/src/runtime/NEON/functions/NEScharr3x3.cpp
+++ b/src/runtime/NEON/functions/NEScharr3x3.cpp
@@ -23,8 +23,9 @@
*/
#include "arm_compute/runtime/NEON/functions/NEScharr3x3.h"
-#include "arm_compute/core/NEON/kernels/NEScharr3x3Kernel.h"
#include "arm_compute/core/PixelValue.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NEScharr3x3Kernel.h"
#include "support/MemorySupport.h"
#include <utility>
@@ -36,5 +37,8 @@ void NEScharr3x3::configure(ITensor *input, ITensor *output_x, ITensor *output_y
auto k = arm_compute::support::cpp14::make_unique<NEScharr3x3Kernel>();
k->configure(input, output_x, output_y, border_mode == BorderMode::UNDEFINED);
_kernel = std::move(k);
- _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+
+ auto b = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+ b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler = std::move(b);
}
diff --git a/src/runtime/NEON/functions/NESelect.cpp b/src/runtime/NEON/functions/NESelect.cpp
index 8def123c5d..0d1f490767 100644
--- a/src/runtime/NEON/functions/NESelect.cpp
+++ b/src/runtime/NEON/functions/NESelect.cpp
@@ -23,8 +23,8 @@
*/
#include "arm_compute/runtime/NEON/functions/NESelect.h"
-#include "arm_compute/core/NEON/kernels/NESelectKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/NEON/kernels/NESelectKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
diff --git a/src/runtime/NEON/functions/NESlice.cpp b/src/runtime/NEON/functions/NESlice.cpp
index 2bacf2ee2a..dd56eaba8b 100644
--- a/src/runtime/NEON/functions/NESlice.cpp
+++ b/src/runtime/NEON/functions/NESlice.cpp
@@ -24,10 +24,10 @@
#include "arm_compute/runtime/NEON/functions/NESlice.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/kernels/NEStridedSliceKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/helpers/tensor_transform.h"
+#include "src/core/NEON/kernels/NEStridedSliceKernel.h"
#include "support/MemorySupport.h"
diff --git a/src/runtime/NEON/functions/NESobel3x3.cpp b/src/runtime/NEON/functions/NESobel3x3.cpp
index cfd68d70af..38d2dc227e 100644
--- a/src/runtime/NEON/functions/NESobel3x3.cpp
+++ b/src/runtime/NEON/functions/NESobel3x3.cpp
@@ -23,18 +23,23 @@
*/
#include "arm_compute/runtime/NEON/functions/NESobel3x3.h"
-#include "arm_compute/core/NEON/kernels/NESobel3x3Kernel.h"
#include "arm_compute/core/PixelValue.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NESobel3x3Kernel.h"
#include "support/MemorySupport.h"
#include <utility>
-using namespace arm_compute;
-
+namespace arm_compute
+{
void NESobel3x3::configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value)
{
auto k = arm_compute::support::cpp14::make_unique<NESobel3x3Kernel>();
k->configure(input, output_x, output_y, border_mode == BorderMode::UNDEFINED);
_kernel = std::move(k);
- _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+
+ auto b = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+ b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler = std::move(b);
}
+} // namespace arm_compute \ No newline at end of file
diff --git a/src/runtime/NEON/functions/NESobel5x5.cpp b/src/runtime/NEON/functions/NESobel5x5.cpp
index 092c510bcf..e631fb3ed7 100644
--- a/src/runtime/NEON/functions/NESobel5x5.cpp
+++ b/src/runtime/NEON/functions/NESobel5x5.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -29,8 +29,13 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
#include "arm_compute/runtime/TensorAllocator.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NESobel5x5Kernel.h"
+#include "support/MemorySupport.h"
-using namespace arm_compute;
+namespace arm_compute
+{
+NESobel5x5::~NESobel5x5() = default;
NESobel5x5::NESobel5x5(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)), _sobel_hor(), _sobel_vert(), _tmp_x(), _tmp_y(), _border_handler()
@@ -46,14 +51,18 @@ void NESobel5x5::configure(ITensor *input, ITensor *output_x, ITensor *output_y,
TensorInfo tensor_info(input->info()->tensor_shape(), Format::S16);
+ _sobel_hor = arm_compute::support::cpp14::make_unique<NESobel5x5HorKernel>();
+ _sobel_vert = arm_compute::support::cpp14::make_unique<NESobel5x5VertKernel>();
+ _border_handler = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+
if(run_sobel_x && run_sobel_y)
{
_tmp_x.allocator()->init(tensor_info);
_tmp_y.allocator()->init(tensor_info);
_memory_group.manage(&_tmp_x);
_memory_group.manage(&_tmp_y);
- _sobel_hor.configure(input, &_tmp_x, &_tmp_y, border_mode == BorderMode::UNDEFINED);
- _sobel_vert.configure(&_tmp_x, &_tmp_y, output_x, output_y, border_mode == BorderMode::UNDEFINED);
+ _sobel_hor->configure(input, &_tmp_x, &_tmp_y, border_mode == BorderMode::UNDEFINED);
+ _sobel_vert->configure(&_tmp_x, &_tmp_y, output_x, output_y, border_mode == BorderMode::UNDEFINED);
_tmp_x.allocator()->allocate();
_tmp_y.allocator()->allocate();
}
@@ -61,28 +70,29 @@ void NESobel5x5::configure(ITensor *input, ITensor *output_x, ITensor *output_y,
{
_tmp_x.allocator()->init(tensor_info);
_memory_group.manage(&_tmp_x);
- _sobel_hor.configure(input, &_tmp_x, nullptr, border_mode == BorderMode::UNDEFINED);
- _sobel_vert.configure(&_tmp_x, nullptr, output_x, nullptr, border_mode == BorderMode::UNDEFINED);
+ _sobel_hor->configure(input, &_tmp_x, nullptr, border_mode == BorderMode::UNDEFINED);
+ _sobel_vert->configure(&_tmp_x, nullptr, output_x, nullptr, border_mode == BorderMode::UNDEFINED);
_tmp_x.allocator()->allocate();
}
else if(run_sobel_y)
{
_tmp_y.allocator()->init(tensor_info);
_memory_group.manage(&_tmp_y);
- _sobel_hor.configure(input, nullptr, &_tmp_y, border_mode == BorderMode::UNDEFINED);
- _sobel_vert.configure(nullptr, &_tmp_y, nullptr, output_y, border_mode == BorderMode::UNDEFINED);
+ _sobel_hor->configure(input, nullptr, &_tmp_y, border_mode == BorderMode::UNDEFINED);
+ _sobel_vert->configure(nullptr, &_tmp_y, nullptr, output_y, border_mode == BorderMode::UNDEFINED);
_tmp_y.allocator()->allocate();
}
- _border_handler.configure(input, _sobel_hor.border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler->configure(input, _sobel_hor->border_size(), border_mode, PixelValue(constant_border_value));
}
void NESobel5x5::run()
{
- NEScheduler::get().schedule(&_border_handler, Window::DimZ);
+ NEScheduler::get().schedule(_border_handler.get(), Window::DimZ);
MemoryGroupResourceScope scope_mg(_memory_group);
- NEScheduler::get().schedule(&_sobel_hor, Window::DimY);
- NEScheduler::get().schedule(&_sobel_vert, Window::DimY);
+ NEScheduler::get().schedule(_sobel_hor.get(), Window::DimY);
+ NEScheduler::get().schedule(_sobel_vert.get(), Window::DimY);
}
+} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NESobel7x7.cpp b/src/runtime/NEON/functions/NESobel7x7.cpp
index 87ec81f7b0..bc5f87c1ec 100644
--- a/src/runtime/NEON/functions/NESobel7x7.cpp
+++ b/src/runtime/NEON/functions/NESobel7x7.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -29,8 +29,13 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
#include "arm_compute/runtime/TensorAllocator.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NESobel7x7Kernel.h"
+#include "support/MemorySupport.h"
-using namespace arm_compute;
+namespace arm_compute
+{
+NESobel7x7::~NESobel7x7() = default;
NESobel7x7::NESobel7x7(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)), _sobel_hor(), _sobel_vert(), _tmp_x(), _tmp_y(), _border_handler()
@@ -45,6 +50,9 @@ void NESobel7x7::configure(ITensor *input, ITensor *output_x, ITensor *output_y,
const bool run_sobel_y = output_y != nullptr;
TensorInfo tensor_info(input->info()->tensor_shape(), Format::S32);
+ _sobel_hor = arm_compute::support::cpp14::make_unique<NESobel7x7HorKernel>();
+ _sobel_vert = arm_compute::support::cpp14::make_unique<NESobel7x7VertKernel>();
+ _border_handler = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
if(run_sobel_x && run_sobel_y)
{
@@ -52,8 +60,8 @@ void NESobel7x7::configure(ITensor *input, ITensor *output_x, ITensor *output_y,
_tmp_y.allocator()->init(tensor_info);
_memory_group.manage(&_tmp_x);
_memory_group.manage(&_tmp_y);
- _sobel_hor.configure(input, &_tmp_x, &_tmp_y, border_mode == BorderMode::UNDEFINED);
- _sobel_vert.configure(&_tmp_x, &_tmp_y, output_x, output_y, border_mode == BorderMode::UNDEFINED);
+ _sobel_hor->configure(input, &_tmp_x, &_tmp_y, border_mode == BorderMode::UNDEFINED);
+ _sobel_vert->configure(&_tmp_x, &_tmp_y, output_x, output_y, border_mode == BorderMode::UNDEFINED);
_tmp_x.allocator()->allocate();
_tmp_y.allocator()->allocate();
}
@@ -61,28 +69,29 @@ void NESobel7x7::configure(ITensor *input, ITensor *output_x, ITensor *output_y,
{
_tmp_x.allocator()->init(tensor_info);
_memory_group.manage(&_tmp_x);
- _sobel_hor.configure(input, &_tmp_x, nullptr, border_mode == BorderMode::UNDEFINED);
- _sobel_vert.configure(&_tmp_x, nullptr, output_x, nullptr, border_mode == BorderMode::UNDEFINED);
+ _sobel_hor->configure(input, &_tmp_x, nullptr, border_mode == BorderMode::UNDEFINED);
+ _sobel_vert->configure(&_tmp_x, nullptr, output_x, nullptr, border_mode == BorderMode::UNDEFINED);
_tmp_x.allocator()->allocate();
}
else if(run_sobel_y)
{
_tmp_y.allocator()->init(tensor_info);
_memory_group.manage(&_tmp_y);
- _sobel_hor.configure(input, nullptr, &_tmp_y, border_mode == BorderMode::UNDEFINED);
- _sobel_vert.configure(nullptr, &_tmp_y, nullptr, output_y, border_mode == BorderMode::UNDEFINED);
+ _sobel_hor->configure(input, nullptr, &_tmp_y, border_mode == BorderMode::UNDEFINED);
+ _sobel_vert->configure(nullptr, &_tmp_y, nullptr, output_y, border_mode == BorderMode::UNDEFINED);
_tmp_y.allocator()->allocate();
}
- _border_handler.configure(input, _sobel_hor.border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler->configure(input, _sobel_hor->border_size(), border_mode, PixelValue(constant_border_value));
}
void NESobel7x7::run()
{
- NEScheduler::get().schedule(&_border_handler, Window::DimZ);
+ NEScheduler::get().schedule(_border_handler.get(), Window::DimZ);
MemoryGroupResourceScope scope_mg(_memory_group);
- NEScheduler::get().schedule(&_sobel_hor, Window::DimY);
- NEScheduler::get().schedule(&_sobel_vert, Window::DimY);
+ NEScheduler::get().schedule(_sobel_hor.get(), Window::DimY);
+ NEScheduler::get().schedule(_sobel_vert.get(), Window::DimY);
}
+} // namespace arm_compute \ No newline at end of file
diff --git a/src/runtime/NEON/functions/NESoftmaxLayer.cpp b/src/runtime/NEON/functions/NESoftmaxLayer.cpp
index 4f773861d2..e79ab0ee2d 100644
--- a/src/runtime/NEON/functions/NESoftmaxLayer.cpp
+++ b/src/runtime/NEON/functions/NESoftmaxLayer.cpp
@@ -24,14 +24,20 @@
#include "arm_compute/runtime/NEON/functions/NESoftmaxLayer.h"
#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NESoftmaxLayerKernel.h"
+#include "src/core/NEON/kernels/NESoftmaxLayerKernel.h"
#include "src/core/helpers/SoftmaxHelpers.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
template <bool IS_LOG>
+NESoftmaxLayerGeneric<IS_LOG>::~NESoftmaxLayerGeneric() = default;
+
+template <bool IS_LOG>
NESoftmaxLayerGeneric<IS_LOG>::NESoftmaxLayerGeneric(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)), _permute_input(), _permute_output(), _max_kernel(), _softmax_kernel(), _fill_border_kernel(), _max(), _tmp(), _input_permuted(), _output_permuted(),
_needs_permute(false)
@@ -76,15 +82,17 @@ void NESoftmaxLayerGeneric<IS_LOG>::configure(ITensor *input, ITensor *output, f
_memory_group.manage(&_max);
_memory_group.manage(&_tmp);
- // Configure Kernels
- _max_kernel.configure(tmp_input, &_max);
+ // Configure kernels
+ _max_kernel = arm_compute::support::cpp14::make_unique<NELogits1DMaxKernel>();
+ _softmax_kernel = arm_compute::support::cpp14::make_unique<NELogits1DSoftmaxKernel<IS_LOG>>();
+ _max_kernel->configure(tmp_input, &_max);
if(_needs_permute)
{
// Add to the memory manager _output_permuted
_memory_group.manage(&_output_permuted);
// The normalization kernel stores the result in a permuted output tensor
- _softmax_kernel.configure(tmp_input, &_max, &_output_permuted, beta, &_tmp);
+ _softmax_kernel->configure(tmp_input, &_max, &_output_permuted, beta, &_tmp);
_input_permuted.allocator()->allocate();
// Re-permute the permuted output into the requested (4D) output
@@ -96,8 +104,9 @@ void NESoftmaxLayerGeneric<IS_LOG>::configure(ITensor *input, ITensor *output, f
else
{
// Softmax 2D case
- _fill_border_kernel.configure(tmp_input, _max_kernel.border_size(), BorderMode::REPLICATE);
- _softmax_kernel.configure(tmp_input, &_max, output, beta, &_tmp);
+ _fill_border_kernel = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+ _fill_border_kernel->configure(tmp_input, _max_kernel->border_size(), BorderMode::REPLICATE);
+ _softmax_kernel->configure(tmp_input, &_max, output, beta, &_tmp);
}
// Allocate intermediate buffers
@@ -152,10 +161,13 @@ void NESoftmaxLayerGeneric<IS_LOG>::run()
{
_permute_input.run();
}
+ else
+ {
+ NEScheduler::get().schedule(_fill_border_kernel.get(), Window::DimY);
+ }
- NEScheduler::get().schedule(&_fill_border_kernel, Window::DimY);
- NEScheduler::get().schedule(&_max_kernel, Window::DimY);
- NEScheduler::get().schedule(&_softmax_kernel, Window::DimY);
+ NEScheduler::get().schedule(_max_kernel.get(), Window::DimY);
+ NEScheduler::get().schedule(_softmax_kernel.get(), Window::DimY);
if(_needs_permute)
{
diff --git a/src/runtime/NEON/functions/NESpaceToBatchLayer.cpp b/src/runtime/NEON/functions/NESpaceToBatchLayer.cpp
index 97e793f6fb..516e8d604c 100644
--- a/src/runtime/NEON/functions/NESpaceToBatchLayer.cpp
+++ b/src/runtime/NEON/functions/NESpaceToBatchLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -29,9 +29,14 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEMemsetKernel.h"
+#include "src/core/NEON/kernels/NESpaceToBatchLayerKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
+NESpaceToBatchLayer::~NESpaceToBatchLayer() = default;
+
NESpaceToBatchLayer::NESpaceToBatchLayer()
: _space_to_batch_kernel(), _memset_kernel(), _has_padding(false)
{
@@ -43,10 +48,12 @@ void NESpaceToBatchLayer::configure(const ITensor *input, const ITensor *block_s
if(input->info()->tensor_shape().total_size() != output->info()->tensor_shape().total_size())
{
- _has_padding = true;
- _memset_kernel.configure(output, PixelValue(0, input->info()->data_type(), input->info()->quantization_info()));
+ _has_padding = true;
+ _memset_kernel = arm_compute::support::cpp14::make_unique<NEMemsetKernel>();
+ _memset_kernel->configure(output, PixelValue(0, input->info()->data_type(), input->info()->quantization_info()));
}
- _space_to_batch_kernel.configure(input, block_shape, paddings, output);
+ _space_to_batch_kernel = arm_compute::support::cpp14::make_unique<NESpaceToBatchLayerKernel>();
+ _space_to_batch_kernel->configure(input, block_shape, paddings, output);
}
void NESpaceToBatchLayer::configure(const ITensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, ITensor *output)
@@ -55,10 +62,12 @@ void NESpaceToBatchLayer::configure(const ITensor *input, const int block_shape_
if(input->info()->tensor_shape().total_size() != output->info()->tensor_shape().total_size())
{
- _has_padding = true;
- _memset_kernel.configure(output, PixelValue(0, input->info()->data_type(), input->info()->quantization_info()));
+ _has_padding = true;
+ _memset_kernel = arm_compute::support::cpp14::make_unique<NEMemsetKernel>();
+ _memset_kernel->configure(output, PixelValue(0, input->info()->data_type(), input->info()->quantization_info()));
}
- _space_to_batch_kernel.configure(input, block_shape_x, block_shape_y, padding_left, padding_right, output);
+ _space_to_batch_kernel = arm_compute::support::cpp14::make_unique<NESpaceToBatchLayerKernel>();
+ _space_to_batch_kernel->configure(input, block_shape_x, block_shape_y, padding_left, padding_right, output);
}
Status NESpaceToBatchLayer::validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *paddings, const ITensorInfo *output)
@@ -81,8 +90,8 @@ void NESpaceToBatchLayer::run()
// Zero out output only if we have paddings
if(_has_padding)
{
- NEScheduler::get().schedule(&_memset_kernel, Window::DimY);
+ NEScheduler::get().schedule(_memset_kernel.get(), Window::DimY);
}
- NEScheduler::get().schedule(&_space_to_batch_kernel, Window::DimY);
+ NEScheduler::get().schedule(_space_to_batch_kernel.get(), Window::DimY);
}
} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NESpaceToDepthLayer.cpp b/src/runtime/NEON/functions/NESpaceToDepthLayer.cpp
index 3e1ec80687..a834600199 100644
--- a/src/runtime/NEON/functions/NESpaceToDepthLayer.cpp
+++ b/src/runtime/NEON/functions/NESpaceToDepthLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -29,9 +29,13 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NESpaceToDepthLayerKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
+NESpaceToDepthLayer::~NESpaceToDepthLayer() = default;
+
NESpaceToDepthLayer::NESpaceToDepthLayer()
: _space_to_depth_kernel()
{
@@ -40,7 +44,8 @@ NESpaceToDepthLayer::NESpaceToDepthLayer()
void NESpaceToDepthLayer::configure(const ITensor *input, ITensor *output, int32_t block_shape)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- _space_to_depth_kernel.configure(input, output, block_shape);
+ _space_to_depth_kernel = arm_compute::support::cpp14::make_unique<NESpaceToDepthLayerKernel>();
+ _space_to_depth_kernel->configure(input, output, block_shape);
}
Status NESpaceToDepthLayer::validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape)
@@ -51,6 +56,6 @@ Status NESpaceToDepthLayer::validate(const ITensorInfo *input, const ITensorInfo
void NESpaceToDepthLayer::run()
{
- NEScheduler::get().schedule(&_space_to_depth_kernel, Window::DimY);
+ NEScheduler::get().schedule(_space_to_depth_kernel.get(), Window::DimY);
}
} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEStackLayer.cpp b/src/runtime/NEON/functions/NEStackLayer.cpp
index a99a95ab2a..e38ff6bee7 100644
--- a/src/runtime/NEON/functions/NEStackLayer.cpp
+++ b/src/runtime/NEON/functions/NEStackLayer.cpp
@@ -30,9 +30,13 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEStackLayerKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
+NEStackLayer::~NEStackLayer() = default;
+
NEStackLayer::NEStackLayer() // NOLINT
: _input(),
_stack_kernels(),
@@ -50,7 +54,8 @@ void NEStackLayer::configure(const std::vector<ITensor *> &input, int axis, ITen
for(unsigned int i = 0; i < _num_inputs; i++)
{
- _stack_kernels[i].configure(input[i], axis_u, i, _num_inputs, output);
+ _stack_kernels[i] = arm_compute::support::cpp14::make_unique<NEStackLayerKernel>();
+ _stack_kernels[i]->configure(input[i], axis_u, i, _num_inputs, output);
}
}
@@ -80,7 +85,7 @@ void NEStackLayer::run()
{
for(unsigned i = 0; i < _num_inputs; i++)
{
- NEScheduler::get().schedule(&_stack_kernels[i], Window::DimY);
+ NEScheduler::get().schedule(_stack_kernels[i].get(), Window::DimY);
}
}
} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEStridedSlice.cpp b/src/runtime/NEON/functions/NEStridedSlice.cpp
index 8bf81e8270..308b856ec6 100644
--- a/src/runtime/NEON/functions/NEStridedSlice.cpp
+++ b/src/runtime/NEON/functions/NEStridedSlice.cpp
@@ -24,8 +24,8 @@
#include "arm_compute/runtime/NEON/functions/NEStridedSlice.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/kernels/NEStridedSliceKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/NEON/kernels/NEStridedSliceKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
diff --git a/src/runtime/NEON/functions/NETableLookup.cpp b/src/runtime/NEON/functions/NETableLookup.cpp
index b8d765f76b..9295bf0ece 100644
--- a/src/runtime/NEON/functions/NETableLookup.cpp
+++ b/src/runtime/NEON/functions/NETableLookup.cpp
@@ -23,7 +23,7 @@
*/
#include "arm_compute/runtime/NEON/functions/NETableLookup.h"
-#include "arm_compute/core/NEON/kernels/NETableLookupKernel.h"
+#include "src/core/NEON/kernels/NETableLookupKernel.h"
#include "support/MemorySupport.h"
#include <utility>
diff --git a/src/runtime/NEON/functions/NEThreshold.cpp b/src/runtime/NEON/functions/NEThreshold.cpp
index e21511ed65..2f1e3047b5 100644
--- a/src/runtime/NEON/functions/NEThreshold.cpp
+++ b/src/runtime/NEON/functions/NEThreshold.cpp
@@ -23,7 +23,7 @@
*/
#include "arm_compute/runtime/NEON/functions/NEThreshold.h"
-#include "arm_compute/core/NEON/kernels/NEThresholdKernel.h"
+#include "src/core/NEON/kernels/NEThresholdKernel.h"
#include "support/MemorySupport.h"
#include <utility>
diff --git a/src/runtime/NEON/functions/NETile.cpp b/src/runtime/NEON/functions/NETile.cpp
index 6fda3a5ba6..6a1e20ddf8 100644
--- a/src/runtime/NEON/functions/NETile.cpp
+++ b/src/runtime/NEON/functions/NETile.cpp
@@ -23,7 +23,7 @@
*/
#include "arm_compute/runtime/NEON/functions/NETile.h"
-#include "arm_compute/core/NEON/kernels/NETileKernel.h"
+#include "src/core/NEON/kernels/NETileKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
diff --git a/src/runtime/NEON/functions/NETranspose.cpp b/src/runtime/NEON/functions/NETranspose.cpp
index 88d1672173..5af417f4ed 100644
--- a/src/runtime/NEON/functions/NETranspose.cpp
+++ b/src/runtime/NEON/functions/NETranspose.cpp
@@ -23,7 +23,7 @@
*/
#include "arm_compute/runtime/NEON/functions/NETranspose.h"
-#include "arm_compute/core/NEON/kernels/NETransposeKernel.h"
+#include "src/core/NEON/kernels/NETransposeKernel.h"
#include "support/MemorySupport.h"
#include <utility>
diff --git a/src/runtime/NEON/functions/NEUpsampleLayer.cpp b/src/runtime/NEON/functions/NEUpsampleLayer.cpp
index 58c050f904..aae58387e2 100644
--- a/src/runtime/NEON/functions/NEUpsampleLayer.cpp
+++ b/src/runtime/NEON/functions/NEUpsampleLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,10 +23,13 @@
*/
#include "arm_compute/runtime/NEON/functions/NEUpsampleLayer.h"
-#include "arm_compute/core/NEON/kernels/NEUpsampleLayerKernel.h"
+#include "src/core/NEON/kernels/NEUpsampleLayerKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
+NEUpsampleLayer::~NEUpsampleLayer() = default;
+
NEUpsampleLayer::NEUpsampleLayer()
: _kernel(), _data_layout()
{
@@ -41,12 +44,13 @@ Status NEUpsampleLayer::validate(const ITensorInfo *input, const ITensorInfo *ou
void NEUpsampleLayer::configure(const ITensor *input, ITensor *output, const Size2D &info, const InterpolationPolicy &policy)
{
_data_layout = input->info()->data_layout();
- _kernel.configure(input, output, info, policy);
+ _kernel = arm_compute::support::cpp14::make_unique<NEUpsampleLayerKernel>();
+ _kernel->configure(input, output, info, policy);
}
void NEUpsampleLayer::run()
{
const auto win = (_data_layout == DataLayout::NCHW) ? Window::DimZ : Window::DimX;
- NEScheduler::get().schedule(&_kernel, win);
+ NEScheduler::get().schedule(_kernel.get(), win);
}
} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEWarpAffine.cpp b/src/runtime/NEON/functions/NEWarpAffine.cpp
index ec2c6883ba..b5dbfe0d5c 100644
--- a/src/runtime/NEON/functions/NEWarpAffine.cpp
+++ b/src/runtime/NEON/functions/NEWarpAffine.cpp
@@ -24,8 +24,9 @@
#include "arm_compute/runtime/NEON/functions/NEWarpAffine.h"
#include "arm_compute/core/Error.h"
-#include "arm_compute/core/NEON/kernels/NEWarpKernel.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NEWarpKernel.h"
#include "support/MemorySupport.h"
#include <utility>
@@ -58,5 +59,7 @@ void NEWarpAffine::configure(ITensor *input, ITensor *output, const std::array<f
ARM_COMPUTE_ERROR("Interpolation type not supported");
}
- _border_handler.configure(input, _kernel->border_size(), border_mode, constant_border_value);
+ auto b = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+ b->configure(input, _kernel->border_size(), border_mode, constant_border_value);
+ _border_handler = std::move(b);
}
diff --git a/src/runtime/NEON/functions/NEWarpPerspective.cpp b/src/runtime/NEON/functions/NEWarpPerspective.cpp
index bf361b8ab9..8d42121005 100644
--- a/src/runtime/NEON/functions/NEWarpPerspective.cpp
+++ b/src/runtime/NEON/functions/NEWarpPerspective.cpp
@@ -24,14 +24,15 @@
#include "arm_compute/runtime/NEON/functions/NEWarpPerspective.h"
#include "arm_compute/core/Error.h"
-#include "arm_compute/core/NEON/kernels/NEWarpKernel.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NEWarpKernel.h"
#include "support/MemorySupport.h"
#include <utility>
-using namespace arm_compute;
-
+namespace arm_compute
+{
void NEWarpPerspective::configure(ITensor *input, ITensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
@@ -58,5 +59,8 @@ void NEWarpPerspective::configure(ITensor *input, ITensor *output, const std::ar
ARM_COMPUTE_ERROR("Interpolation type not supported");
}
- _border_handler.configure(input, _kernel->border_size(), border_mode, constant_border_value);
+ auto b = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+ b->configure(input, _kernel->border_size(), border_mode, constant_border_value);
+ _border_handler = std::move(b);
}
+} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp b/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp
index 23b9f60c38..1cb2458e13 100644
--- a/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp
@@ -30,6 +30,10 @@
#include "arm_compute/runtime/NEON/NEScheduler.h"
#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h"
#include "src/core/CPP/Validate.h"
+#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
+#include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h"
+#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
#include "src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h"
#include "support/MemorySupport.h"
diff --git a/src/runtime/NEON/functions/NEYOLOLayer.cpp b/src/runtime/NEON/functions/NEYOLOLayer.cpp
index 233afb727a..5cad53bffd 100644
--- a/src/runtime/NEON/functions/NEYOLOLayer.cpp
+++ b/src/runtime/NEON/functions/NEYOLOLayer.cpp
@@ -23,7 +23,7 @@
*/
#include "arm_compute/runtime/NEON/functions/NEYOLOLayer.h"
-#include "arm_compute/core/NEON/kernels/NEYOLOLayerKernel.h"
+#include "src/core/NEON/kernels/NEYOLOLayerKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute