aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/runtime/NEON
diff options
context:
space:
mode:
Diffstat (limited to 'arm_compute/runtime/NEON')
-rw-r--r--arm_compute/runtime/NEON/INEOperator.h76
-rw-r--r--arm_compute/runtime/NEON/INESimpleFunction.h25
-rw-r--r--arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h9
-rw-r--r--arm_compute/runtime/NEON/NEFunctions.h80
-rw-r--r--arm_compute/runtime/NEON/NEScheduler.h6
-rw-r--r--arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h50
-rw-r--r--arm_compute/runtime/NEON/functions/NEAccumulate.h74
-rw-r--r--arm_compute/runtime/NEON/functions/NEActivationLayer.h39
-rw-r--r--arm_compute/runtime/NEON/functions/NEAddMulAdd.h115
-rw-r--r--arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h34
-rw-r--r--arm_compute/runtime/NEON/functions/NEArithmeticAddition.h73
-rw-r--r--arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h75
-rw-r--r--arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h52
-rw-r--r--arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h47
-rw-r--r--arm_compute/runtime/NEON/functions/NEBitwiseAnd.h22
-rw-r--r--arm_compute/runtime/NEON/functions/NEBitwiseNot.h10
-rw-r--r--arm_compute/runtime/NEON/functions/NEBitwiseOr.h10
-rw-r--r--arm_compute/runtime/NEON/functions/NEBitwiseXor.h10
-rw-r--r--arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h34
-rw-r--r--arm_compute/runtime/NEON/functions/NEBox3x3.h58
-rw-r--r--arm_compute/runtime/NEON/functions/NECannyEdge.h99
-rw-r--r--arm_compute/runtime/NEON/functions/NECast.h60
-rw-r--r--arm_compute/runtime/NEON/functions/NEChannelCombine.h58
-rw-r--r--arm_compute/runtime/NEON/functions/NEChannelExtract.h56
-rw-r--r--arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h13
-rw-r--r--arm_compute/runtime/NEON/functions/NECol2Im.h60
-rw-r--r--arm_compute/runtime/NEON/functions/NEColorConvert.h67
-rw-r--r--arm_compute/runtime/NEON/functions/NEComputeAllAnchors.h62
-rw-r--r--arm_compute/runtime/NEON/functions/NEConcatenateLayer.h60
-rw-r--r--arm_compute/runtime/NEON/functions/NEConv3D.h100
-rw-r--r--arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h80
-rw-r--r--arm_compute/runtime/NEON/functions/NEConvolution.h132
-rw-r--r--arm_compute/runtime/NEON/functions/NEConvolutionLayer.h112
-rw-r--r--arm_compute/runtime/NEON/functions/NECopy.h38
-rw-r--r--arm_compute/runtime/NEON/functions/NECropResize.h33
-rw-r--r--arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h89
-rw-r--r--arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h59
-rw-r--r--arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h43
-rw-r--r--arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h205
-rw-r--r--arm_compute/runtime/NEON/functions/NEDequantizationLayer.h47
-rw-r--r--arm_compute/runtime/NEON/functions/NEDerivative.h70
-rw-r--r--arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h58
-rw-r--r--arm_compute/runtime/NEON/functions/NEDilate.h55
-rw-r--r--arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h63
-rw-r--r--arm_compute/runtime/NEON/functions/NEElementwiseOperations.h322
-rw-r--r--arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h169
-rw-r--r--arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h77
-rw-r--r--arm_compute/runtime/NEON/functions/NEErode.h55
-rw-r--r--arm_compute/runtime/NEON/functions/NEFFT1D.h51
-rw-r--r--arm_compute/runtime/NEON/functions/NEFFT2D.h25
-rw-r--r--arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h81
-rw-r--r--arm_compute/runtime/NEON/functions/NEFastCorners.h84
-rw-r--r--arm_compute/runtime/NEON/functions/NEFill.h38
-rw-r--r--arm_compute/runtime/NEON/functions/NEFillBorder.h22
-rw-r--r--arm_compute/runtime/NEON/functions/NEFlattenLayer.h35
-rw-r--r--arm_compute/runtime/NEON/functions/NEFloor.h41
-rw-r--r--arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h178
-rw-r--r--arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h45
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMM.h109
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h106
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMConv2d.h121
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h363
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h49
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.h69
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h125
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h283
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h58
-rw-r--r--arm_compute/runtime/NEON/functions/NEGather.h22
-rw-r--r--arm_compute/runtime/NEON/functions/NEGaussian3x3.h55
-rw-r--r--arm_compute/runtime/NEON/functions/NEGaussian5x5.h75
-rw-r--r--arm_compute/runtime/NEON/functions/NEGaussianPyramid.h122
-rw-r--r--arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h72
-rw-r--r--arm_compute/runtime/NEON/functions/NEHOGDescriptor.h76
-rw-r--r--arm_compute/runtime/NEON/functions/NEHOGDetector.h57
-rw-r--r--arm_compute/runtime/NEON/functions/NEHOGGradient.h75
-rw-r--r--arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h109
-rw-r--r--arm_compute/runtime/NEON/functions/NEHarrisCorners.h107
-rw-r--r--arm_compute/runtime/NEON/functions/NEHistogram.h63
-rw-r--r--arm_compute/runtime/NEON/functions/NEIm2Col.h85
-rw-r--r--arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayer.h46
-rw-r--r--arm_compute/runtime/NEON/functions/NEIntegralImage.h45
-rw-r--r--arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h38
-rw-r--r--arm_compute/runtime/NEON/functions/NELSTMLayer.h254
-rw-r--r--arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h117
-rw-r--r--arm_compute/runtime/NEON/functions/NELaplacianPyramid.h85
-rw-r--r--arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h91
-rw-r--r--arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h108
-rw-r--r--arm_compute/runtime/NEON/functions/NELogical.h186
-rw-r--r--arm_compute/runtime/NEON/functions/NEMagnitude.h47
-rw-r--r--arm_compute/runtime/NEON/functions/NEMatMul.h145
-rw-r--r--arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h104
-rw-r--r--arm_compute/runtime/NEON/functions/NEMeanStdDev.h64
-rw-r--r--arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h25
-rw-r--r--arm_compute/runtime/NEON/functions/NEMedian3x3.h56
-rw-r--r--arm_compute/runtime/NEON/functions/NEMinMaxLocation.h71
-rw-r--r--arm_compute/runtime/NEON/functions/NENonLinearFilter.h61
-rw-r--r--arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h56
-rw-r--r--arm_compute/runtime/NEON/functions/NENormalizationLayer.h48
-rw-r--r--arm_compute/runtime/NEON/functions/NEOpticalFlow.h102
-rw-r--r--arm_compute/runtime/NEON/functions/NEPReluLayer.h41
-rw-r--r--arm_compute/runtime/NEON/functions/NEPadLayer.h72
-rw-r--r--arm_compute/runtime/NEON/functions/NEPermute.h42
-rw-r--r--arm_compute/runtime/NEON/functions/NEPhase.h47
-rw-r--r--arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h117
-rw-r--r--arm_compute/runtime/NEON/functions/NEPooling3dLayer.h96
-rw-r--r--arm_compute/runtime/NEON/functions/NEPoolingLayer.h61
-rw-r--r--arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h18
-rw-r--r--arm_compute/runtime/NEON/functions/NEQLSTMLayer.h441
-rw-r--r--arm_compute/runtime/NEON/functions/NEQuantizationLayer.h50
-rw-r--r--arm_compute/runtime/NEON/functions/NERNNLayer.h64
-rw-r--r--arm_compute/runtime/NEON/functions/NEROIAlignLayer.h39
-rw-r--r--arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h60
-rw-r--r--arm_compute/runtime/NEON/functions/NERange.h35
-rw-r--r--arm_compute/runtime/NEON/functions/NEReduceMean.h30
-rw-r--r--arm_compute/runtime/NEON/functions/NEReductionOperation.h70
-rw-r--r--arm_compute/runtime/NEON/functions/NERemap.h60
-rw-r--r--arm_compute/runtime/NEON/functions/NEReorderLayer.h94
-rw-r--r--arm_compute/runtime/NEON/functions/NEReorgLayer.h12
-rw-r--r--arm_compute/runtime/NEON/functions/NEReshapeLayer.h41
-rw-r--r--arm_compute/runtime/NEON/functions/NEReverse.h47
-rw-r--r--arm_compute/runtime/NEON/functions/NEScale.h82
-rw-r--r--arm_compute/runtime/NEON/functions/NEScharr3x3.h59
-rw-r--r--arm_compute/runtime/NEON/functions/NESelect.h15
-rw-r--r--arm_compute/runtime/NEON/functions/NESimpleAssemblyFunction.h56
-rw-r--r--arm_compute/runtime/NEON/functions/NESlice.h77
-rw-r--r--arm_compute/runtime/NEON/functions/NESobel3x3.h59
-rw-r--r--arm_compute/runtime/NEON/functions/NESobel5x5.h79
-rw-r--r--arm_compute/runtime/NEON/functions/NESobel7x7.h79
-rw-r--r--arm_compute/runtime/NEON/functions/NESoftmaxLayer.h91
-rw-r--r--arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h66
-rw-r--r--arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h27
-rw-r--r--arm_compute/runtime/NEON/functions/NESplit.h15
-rw-r--r--arm_compute/runtime/NEON/functions/NEStackLayer.h35
-rw-r--r--arm_compute/runtime/NEON/functions/NEStridedSlice.h111
-rw-r--r--arm_compute/runtime/NEON/functions/NETableLookup.h47
-rw-r--r--arm_compute/runtime/NEON/functions/NEThreshold.h54
-rw-r--r--arm_compute/runtime/NEON/functions/NETile.h14
-rw-r--r--arm_compute/runtime/NEON/functions/NETranspose.h44
-rw-r--r--arm_compute/runtime/NEON/functions/NEUnstack.h29
-rw-r--r--arm_compute/runtime/NEON/functions/NEUpsampleLayer.h73
-rw-r--r--arm_compute/runtime/NEON/functions/NEWarpAffine.h53
-rw-r--r--arm_compute/runtime/NEON/functions/NEWarpPerspective.h52
-rw-r--r--arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h114
-rw-r--r--arm_compute/runtime/NEON/functions/NEYOLOLayer.h64
-rw-r--r--arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h123
145 files changed, 4718 insertions, 6128 deletions
diff --git a/arm_compute/runtime/NEON/INEOperator.h b/arm_compute/runtime/NEON/INEOperator.h
new file mode 100644
index 0000000000..7971168d24
--- /dev/null
+++ b/arm_compute/runtime/NEON/INEOperator.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2020-2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_INEOPERATOR_H
+#define ARM_COMPUTE_INEOPERATOR_H
+
+#include "arm_compute/runtime/IOperator.h"
+#include "arm_compute/runtime/IRuntimeContext.h"
+#include "arm_compute/runtime/Types.h"
+
+#include "../../core/ITensor.h"
+#include <memory>
+
+namespace arm_compute
+{
+class ICPPKernel;
+class Window;
+
+using INEKernel = ICPPKernel;
+namespace experimental
+{
+/** Basic interface for functions which have a single async CPU kernel */
+class INEOperator : public IOperator
+{
+public:
+ /** Constructor
+ *
+ * @param[in] ctx Runtime context to be used by the function
+ */
+ INEOperator(IRuntimeContext *ctx = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ INEOperator(const INEOperator &) = delete;
+ /** Default move constructor */
+ INEOperator(INEOperator &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ INEOperator &operator=(const INEOperator &) = delete;
+ /** Default move assignment operator */
+ INEOperator &operator=(INEOperator &&) = default;
+ /** Default destructor */
+ ~INEOperator();
+
+ // Inherited methods overridden:
+ void run(ITensorPack &tensors) override;
+ void prepare(ITensorPack &constants) override;
+ MemoryRequirements workspace() const override;
+
+protected:
+ void run(ITensorPack &tensors, const Window &window);
+
+ std::unique_ptr<INEKernel> _kernel;
+ IRuntimeContext *_ctx;
+ MemoryRequirements _workspace;
+};
+} // namespace experimental
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_INEOPERATOR_H */
diff --git a/arm_compute/runtime/NEON/INESimpleFunction.h b/arm_compute/runtime/NEON/INESimpleFunction.h
index 8506797fe3..f783a836ee 100644
--- a/arm_compute/runtime/NEON/INESimpleFunction.h
+++ b/arm_compute/runtime/NEON/INESimpleFunction.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,27 +24,38 @@
#ifndef ARM_COMPUTE_INESIMPLEFUNCTION_H
#define ARM_COMPUTE_INESIMPLEFUNCTION_H
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
#include "arm_compute/runtime/IFunction.h"
#include <memory>
namespace arm_compute
{
-/** Basic interface for functions which have a single NEON kernel */
+class ICPPKernel;
+class NEFillBorderKernel;
+using INEKernel = ICPPKernel;
+/** Basic interface for functions which have a single CPU kernel */
class INESimpleFunction : public IFunction
{
public:
/** Constructor */
INESimpleFunction();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ INESimpleFunction(const INESimpleFunction &) = delete;
+ /** Default move constructor */
+ INESimpleFunction(INESimpleFunction &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ INESimpleFunction &operator=(const INESimpleFunction &) = delete;
+ /** Default move assignment operator */
+ INESimpleFunction &operator=(INESimpleFunction &&) = default;
+ /** Default destructor */
+ ~INESimpleFunction();
// Inherited methods overridden:
void run() override final;
protected:
- std::unique_ptr<INEKernel> _kernel; /**< Kernel to run */
- NEFillBorderKernel _border_handler; /**< Kernel to handle image borders */
+ std::unique_ptr<INEKernel> _kernel; /**< Kernel to run */
+ std::unique_ptr<NEFillBorderKernel> _border_handler; /**< Kernel to handle image borders */
};
-}
+} // namespace arm_compute
#endif /*ARM_COMPUTE_INESIMPLEFUNCTION_H */
diff --git a/arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h b/arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h
index 223048f40f..dc4bac17e4 100644
--- a/arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h
+++ b/arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,7 +24,6 @@
#ifndef ARM_COMPUTE_INESIMPLEFUNCTIONNOBORDER_H
#define ARM_COMPUTE_INESIMPLEFUNCTIONNOBORDER_H
-#include "arm_compute/core/NEON/INEKernel.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IRuntimeContext.h"
@@ -32,7 +31,9 @@
namespace arm_compute
{
-/** Basic interface for functions which have a single NEON kernel and no border */
+class ICPPKernel;
+using INEKernel = ICPPKernel;
+/** Basic interface for functions which have a single CPU kernel and no border */
class INESimpleFunctionNoBorder : public IFunction
{
public:
@@ -49,6 +50,8 @@ public:
INESimpleFunctionNoBorder &operator=(const INESimpleFunctionNoBorder &) = delete;
/** Default move assignment operator */
INESimpleFunctionNoBorder &operator=(INESimpleFunctionNoBorder &&) = default;
+ /** Default destructor */
+ ~INESimpleFunctionNoBorder();
// Inherited methods overridden:
void run() override final;
diff --git a/arm_compute/runtime/NEON/NEFunctions.h b/arm_compute/runtime/NEON/NEFunctions.h
index de364fa9af..cc4d303202 100644
--- a/arm_compute/runtime/NEON/NEFunctions.h
+++ b/arm_compute/runtime/NEON/NEFunctions.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 ARM Limited.
+ * Copyright (c) 2016-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,13 +21,11 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_NEFUNCTIONS_H
-#define ARM_COMPUTE_NEFUNCTIONS_H
+#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_NEFUNCTIONS_H
+#define ACL_ARM_COMPUTE_RUNTIME_NEON_NEFUNCTIONS_H
-/* Header regrouping all the NEON functions */
-#include "arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h"
-#include "arm_compute/runtime/NEON/functions/NEAccumulate.h"
#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEAddMulAdd.h"
#include "arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h"
#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
#include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h"
@@ -38,18 +36,11 @@
#include "arm_compute/runtime/NEON/functions/NEBitwiseOr.h"
#include "arm_compute/runtime/NEON/functions/NEBitwiseXor.h"
#include "arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h"
-#include "arm_compute/runtime/NEON/functions/NEBox3x3.h"
-#include "arm_compute/runtime/NEON/functions/NECannyEdge.h"
#include "arm_compute/runtime/NEON/functions/NECast.h"
-#include "arm_compute/runtime/NEON/functions/NEChannelCombine.h"
-#include "arm_compute/runtime/NEON/functions/NEChannelExtract.h"
#include "arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h"
-#include "arm_compute/runtime/NEON/functions/NECol2Im.h"
-#include "arm_compute/runtime/NEON/functions/NEColorConvert.h"
-#include "arm_compute/runtime/NEON/functions/NEComputeAllAnchors.h"
#include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEConv3D.h"
#include "arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h"
-#include "arm_compute/runtime/NEON/functions/NEConvolution.h"
#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h"
#include "arm_compute/runtime/NEON/functions/NECopy.h"
#include "arm_compute/runtime/NEON/functions/NECropResize.h"
@@ -58,103 +49,66 @@
#include "arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h"
#include "arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h"
#include "arm_compute/runtime/NEON/functions/NEDequantizationLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEDerivative.h"
#include "arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEDilate.h"
#include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h"
#include "arm_compute/runtime/NEON/functions/NEElementwiseOperations.h"
#include "arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h"
-#include "arm_compute/runtime/NEON/functions/NEErode.h"
#include "arm_compute/runtime/NEON/functions/NEFFT1D.h"
#include "arm_compute/runtime/NEON/functions/NEFFT2D.h"
#include "arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEFastCorners.h"
#include "arm_compute/runtime/NEON/functions/NEFill.h"
#include "arm_compute/runtime/NEON/functions/NEFillBorder.h"
#include "arm_compute/runtime/NEON/functions/NEFlattenLayer.h"
#include "arm_compute/runtime/NEON/functions/NEFloor.h"
#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h"
#include "arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h"
+#include "arm_compute/runtime/NEON/functions/NEGather.h"
#include "arm_compute/runtime/NEON/functions/NEGEMM.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h"
+#include "arm_compute/runtime/NEON/functions/NEGEMMConv2d.h"
#include "arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.h"
#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h"
#include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h"
-#include "arm_compute/runtime/NEON/functions/NEGather.h"
-#include "arm_compute/runtime/NEON/functions/NEGaussian3x3.h"
-#include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h"
-#include "arm_compute/runtime/NEON/functions/NEGaussianPyramid.h"
#include "arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEHOGDescriptor.h"
-#include "arm_compute/runtime/NEON/functions/NEHOGDetector.h"
-#include "arm_compute/runtime/NEON/functions/NEHOGGradient.h"
-#include "arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h"
-#include "arm_compute/runtime/NEON/functions/NEHarrisCorners.h"
-#include "arm_compute/runtime/NEON/functions/NEHistogram.h"
-#include "arm_compute/runtime/NEON/functions/NEIm2Col.h"
#include "arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEIntegralImage.h"
#include "arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h"
+#include "arm_compute/runtime/NEON/functions/NELogical.h"
#include "arm_compute/runtime/NEON/functions/NELSTMLayer.h"
#include "arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h"
-#include "arm_compute/runtime/NEON/functions/NELaplacianPyramid.h"
-#include "arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h"
-#include "arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEMagnitude.h"
-#include "arm_compute/runtime/NEON/functions/NEMeanStdDev.h"
+#include "arm_compute/runtime/NEON/functions/NEMatMul.h"
+#include "arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h"
#include "arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEMedian3x3.h"
-#include "arm_compute/runtime/NEON/functions/NEMinMaxLocation.h"
-#include "arm_compute/runtime/NEON/functions/NENonLinearFilter.h"
-#include "arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h"
#include "arm_compute/runtime/NEON/functions/NENormalizationLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEOpticalFlow.h"
-#include "arm_compute/runtime/NEON/functions/NEPReluLayer.h"
#include "arm_compute/runtime/NEON/functions/NEPadLayer.h"
#include "arm_compute/runtime/NEON/functions/NEPermute.h"
-#include "arm_compute/runtime/NEON/functions/NEPhase.h"
#include "arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h"
+#include "arm_compute/runtime/NEON/functions/NEPooling3dLayer.h"
#include "arm_compute/runtime/NEON/functions/NEPoolingLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEPReluLayer.h"
#include "arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h"
#include "arm_compute/runtime/NEON/functions/NEQLSTMLayer.h"
#include "arm_compute/runtime/NEON/functions/NEQuantizationLayer.h"
-#include "arm_compute/runtime/NEON/functions/NERNNLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEROIAlignLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h"
#include "arm_compute/runtime/NEON/functions/NERange.h"
#include "arm_compute/runtime/NEON/functions/NEReduceMean.h"
#include "arm_compute/runtime/NEON/functions/NEReductionOperation.h"
-#include "arm_compute/runtime/NEON/functions/NERemap.h"
+#include "arm_compute/runtime/NEON/functions/NEReorderLayer.h"
#include "arm_compute/runtime/NEON/functions/NEReorgLayer.h"
#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h"
#include "arm_compute/runtime/NEON/functions/NEReverse.h"
+#include "arm_compute/runtime/NEON/functions/NERNNLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEROIAlignLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h"
#include "arm_compute/runtime/NEON/functions/NEScale.h"
-#include "arm_compute/runtime/NEON/functions/NEScharr3x3.h"
#include "arm_compute/runtime/NEON/functions/NESelect.h"
-#include "arm_compute/runtime/NEON/functions/NESimpleAssemblyFunction.h"
#include "arm_compute/runtime/NEON/functions/NESlice.h"
-#include "arm_compute/runtime/NEON/functions/NESobel3x3.h"
-#include "arm_compute/runtime/NEON/functions/NESobel5x5.h"
-#include "arm_compute/runtime/NEON/functions/NESobel7x7.h"
#include "arm_compute/runtime/NEON/functions/NESoftmaxLayer.h"
#include "arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h"
#include "arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h"
#include "arm_compute/runtime/NEON/functions/NESplit.h"
#include "arm_compute/runtime/NEON/functions/NEStackLayer.h"
#include "arm_compute/runtime/NEON/functions/NEStridedSlice.h"
-#include "arm_compute/runtime/NEON/functions/NETableLookup.h"
-#include "arm_compute/runtime/NEON/functions/NEThreshold.h"
#include "arm_compute/runtime/NEON/functions/NETile.h"
#include "arm_compute/runtime/NEON/functions/NETranspose.h"
#include "arm_compute/runtime/NEON/functions/NEUnstack.h"
-#include "arm_compute/runtime/NEON/functions/NEUpsampleLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEWarpAffine.h"
-#include "arm_compute/runtime/NEON/functions/NEWarpPerspective.h"
#include "arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEYOLOLayer.h"
-#endif /* ARM_COMPUTE_NEFUNCTIONS_H */
+#endif // ACL_ARM_COMPUTE_RUNTIME_NEON_NEFUNCTIONS_H
diff --git a/arm_compute/runtime/NEON/NEScheduler.h b/arm_compute/runtime/NEON/NEScheduler.h
index 54a92bbb41..613f44cc52 100644
--- a/arm_compute/runtime/NEON/NEScheduler.h
+++ b/arm_compute/runtime/NEON/NEScheduler.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -28,7 +28,7 @@
namespace arm_compute
{
-/** NEON Scheduler */
+/** CPU Scheduler */
using NEScheduler = Scheduler;
-}
+} // namespace arm_compute
#endif /*ARM_COMPUTE_NESCHEDULER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h b/arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h
deleted file mode 100644
index 2d6f94cde0..0000000000
--- a/arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEABSOLUTEDIFFERENCE_H
-#define ARM_COMPUTE_NEABSOLUTEDIFFERENCE_H
-
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to run @ref NEAbsoluteDifferenceKernel
- *
- * @note The image data type for the inputs must be U8 or S16
- * @note The function calculates the absolute difference also when the 2 inputs have different image data types
- */
-class NEAbsoluteDifference : public INESimpleFunction
-{
-public:
- /** Set the inputs and output images
- *
- * @param[in] input1 Source tensor. Data types supported: U8/S16.
- * @param[in] input2 Source tensor. Data types supported: U8/S16.
- * @param[out] output Destination tensor. Data types supported: U8/S16.
- */
- void configure(const ITensor *input1, const ITensor *input2, ITensor *output);
-};
-}
-#endif /* ARM_COMPUTE_NEABSOLUTEDIFFERENCE_H */
diff --git a/arm_compute/runtime/NEON/functions/NEAccumulate.h b/arm_compute/runtime/NEON/functions/NEAccumulate.h
deleted file mode 100644
index 0426bf94d7..0000000000
--- a/arm_compute/runtime/NEON/functions/NEAccumulate.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEACCUMULATE_H
-#define ARM_COMPUTE_NEACCUMULATE_H
-
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to run @ref NEAccumulateKernel */
-class NEAccumulate : public INESimpleFunctionNoBorder
-{
-public:
- /** Set the input and accumulation tensors
- *
- * @param[in] input Source tensor. Data type supported: U8.
- * @param[out] output Destination tensor. Data type supported: S16.
- */
- void configure(const ITensor *input, ITensor *output);
-};
-
-/** Basic function to run @ref NEAccumulateWeightedKernel */
-class NEAccumulateWeighted : public INESimpleFunctionNoBorder
-{
-public:
- /** Set the input and accumulation tensors, and the scale value
- *
- * @param[in] input Source tensor. Data type supported: U8.
- * @param[in] alpha The input scalar value with a value input the range of [0, 1.0]
- * @param[in,out] output Accumulated tensor. Data type supported: U8.
- * @param[in] use_fp16 (Optional) If true the FP16 kernels will be used. If false F32 kernels are used.
- */
- void configure(const ITensor *input, float alpha, ITensor *output, bool use_fp16 = false);
-};
-
-/** Basic function to run @ref NEAccumulateSquaredKernel */
-class NEAccumulateSquared : public INESimpleFunctionNoBorder
-{
-public:
- /** Set the input and accumulation tensors and the shift value.
- *
- * @param[in] input Source tensor. Data type supported: U8.
- * @param[in] shift The input with a value input the range of [0, 15]
- * @param[in,out] output Accumulated tensor. Data type supported: S16.
- */
- void configure(const ITensor *input, uint32_t shift, ITensor *output);
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEACCUMULATE_H */
diff --git a/arm_compute/runtime/NEON/functions/NEActivationLayer.h b/arm_compute/runtime/NEON/functions/NEActivationLayer.h
index 95901dc2d8..5584fdc783 100644
--- a/arm_compute/runtime/NEON/functions/NEActivationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEActivationLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,20 +24,24 @@
#ifndef ARM_COMPUTE_NEACTIVATIONLAYER_H
#define ARM_COMPUTE_NEACTIVATIONLAYER_H
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
#include "arm_compute/core/Types.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IRuntimeContext.h"
+
+#include <memory>
namespace arm_compute
{
// Forward declarations
class ITensor;
+class ITensorInfo;
-/** Basic function to run @ref NEActivationLayerKernel
+/** Basic function to run @ref cpu::kernels::CpuActivationKernel
*
* @note The function simulates an activation layer with the specified activation function.
*/
-class NEActivationLayer : public INESimpleFunctionNoBorder
+class NEActivationLayer : public IFunction
{
public:
/** Constructor
@@ -48,14 +52,28 @@ public:
/** Prevent instances of this class from being copied (As this class contains pointers) */
NEActivationLayer(const NEActivationLayer &) = delete;
/** Default move constructor */
- NEActivationLayer(NEActivationLayer &&) = default;
+ NEActivationLayer(NEActivationLayer &&);
/** Prevent instances of this class from being copied (As this class contains pointers) */
NEActivationLayer &operator=(const NEActivationLayer &) = delete;
/** Default move assignment operator */
- NEActivationLayer &operator=(NEActivationLayer &&) = default;
+ NEActivationLayer &operator=(NEActivationLayer &&);
+ /** Destructor */
+ ~NEActivationLayer();
/** [NEActivationLayer snippet] **/
/** Set the input and output tensor.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |QSYMM16 |QSYMM16 |
+ * |F16 |F16 |
+ * |F32 |F32 |
+ *
* @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place
*
* @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
@@ -75,6 +93,13 @@ public:
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info);
+
+ // Inherited methods overridden
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEACTIVATIONLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEAddMulAdd.h b/arm_compute/runtime/NEON/functions/NEAddMulAdd.h
new file mode 100644
index 0000000000..6c65c055dd
--- /dev/null
+++ b/arm_compute/runtime/NEON/functions/NEAddMulAdd.h
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEADDMULADD
+#define ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEADDMULADD
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IMemoryManager.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+class ITensor;
+class ITensorInfo;
+class ActivationLayerInfo;
+
+/** Function to compute Add+Mul+Add fused operation */
+class NEAddMulAdd : public IFunction
+{
+public:
+ /** Constructor */
+ NEAddMulAdd(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEAddMulAdd(const NEAddMulAdd &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEAddMulAdd(NEAddMulAdd &&) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEAddMulAdd &operator=(const NEAddMulAdd &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEAddMulAdd &operator=(NEAddMulAdd &&) = delete;
+ /** Destructor */
+ ~NEAddMulAdd();
+ /** Initialize the function's inputs and outputs.
+ *
+ * Valid data layouts:
+ * - Any
+ *
+ * Valid data type configurations:
+ * |input1 |input2 |bn_mul |bn_add |add_output |final_output |
+ * |:--------------|:--------------|:--------------|:--------------|:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |QASYMM8 |QASYMM8 |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |F16 |F16 |F16 |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |F32 |F32 |F32 |
+ *
+ * This is what this composite function (tailored for add followed by a batch norm operation) does:
+ * add_output <- input1 + input2 (add)
+ * final_output <- add_output * bn_mul + bn_add (batch norm = mul+add)
+ *
+ * @param[in] input1 First tensor input. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] input2 Second tensor input. Data types supported: Same as @p input.
+ * @param[in] bn_mul The multiplication coefficient on the feature dimension. Data types supported: Same as @p input.
+ * It's one dimensional tensor with size equal to the feature maps [FM]
+ * @param[in] bn_add The addition coefficient on the feature dimension. Data types supported: Same as @p input.
+ * It's one dimensional tensor with size equal to the feature maps [FM]
+ * @param[out] add_output Output of the first add. Data type supported: Same as @p input.
+ * @param[out] final_output Output of the add+mul+add+act composite operation. Data type supported: Same as @p input.
+ * @param[in] policy Policy to handle overflow
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ *
+ */
+ void configure(ITensor *input1,
+ ITensor *input2,
+ ITensor *bn_mul,
+ ITensor *bn_add,
+ ITensor *add_output,
+ ITensor *final_output,
+ ConvertPolicy policy,
+ const ActivationLayerInfo &act_info);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEAddMulAdd
+ *
+ * Similar to @ref NEAddMulAdd::configure() except the arguments are @ref ITensorInfo * instead of @ref ITensor *
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input1,
+ const ITensorInfo *input2,
+ const ITensorInfo *bn_mul,
+ const ITensorInfo *bn_add,
+ const ITensorInfo *add_output,
+ const ITensorInfo *final_output,
+ ConvertPolicy policy,
+ const ActivationLayerInfo &act_info);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEADDMULADD */
diff --git a/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h b/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h
index c50f358d1f..3bb50a0f90 100644
--- a/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 ARM Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,8 +24,6 @@
#ifndef ARM_COMPUTE_NEARGMINMAXLAYER_H
#define ARM_COMPUTE_NEARGMINMAXLAYER_H
-#include "arm_compute/runtime/NEON/functions/NEReductionOperation.h"
-
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/NEON/INESimpleFunction.h"
@@ -33,11 +31,10 @@
namespace arm_compute
{
class ITensor;
-
/** Function to calculate the index of the minimum or maximum values in a
* tensor based on an axis.
*
- * This function calls the following NEON kernels:
+ * This function calls the following kernels:
*
* -# @ref NEReductionOperationKernel
* -# @ref NEFillBorderKernel
@@ -52,8 +49,30 @@ class NEArgMinMaxLayer : public IFunction
public:
/** Constructor */
NEArgMinMaxLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEArgMinMaxLayer(const NEArgMinMaxLayer &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEArgMinMaxLayer &operator=(const NEArgMinMaxLayer &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEArgMinMaxLayer(NEArgMinMaxLayer &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEArgMinMaxLayer &operator=(NEArgMinMaxLayer &&) = delete;
+ /** Default destructor */
+ ~NEArgMinMaxLayer();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:-------------|
+ * |QASYMM8 |U32, S32 |
+ * |QASYMM8_SIGNED |U32, S32 |
+ * |S32 |U32, S32, S64 |
+ * |F16 |U32, S32 |
+ * |F32 |U32, S32 |
+ *
* @param[in] input Input source tensor. Data types supported: QASYMM8_SIGNED/QASYMM8/S32/F16/F32.
* @param[in] axis Axis to find max/min index.
* @param[out] output Output source tensor. Data types supported: U32/S32.
@@ -64,7 +83,7 @@ public:
*
* @param[in] input Input source tensor info. Data types supported: QASYMM8_SIGNED/QASYMM8/S32/F16/F32.
* @param[in] axis Axis to find max/min index.
- * @param[in] output Output source tensor info. Data types supported: U32/S32.
+ * @param[in] output Output source tensor info. Data types supported: U32/S32/S64.
* @param[in] op Operation to perform: min or max
*
* @return a status
@@ -75,7 +94,8 @@ public:
void run() override;
private:
- std::unique_ptr<NEReductionOperation> _reduction_function;
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEARGMINMAXLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h
index 6cab5b3547..73a43dbc44 100644
--- a/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h
+++ b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 ARM Limited.
+ * Copyright (c) 2016-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,36 +25,83 @@
#define ARM_COMPUTE_NEARITHMETICADDITION_H
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
-/** Basic function to run @ref NEArithmeticAdditionKernel */
-class NEArithmeticAddition : public INESimpleFunction
+/** Basic function to run @ref cpu::kernels::CpuAddKernel */
+class NEArithmeticAddition : public IFunction
{
public:
+ /** Default Constructor */
+ NEArithmeticAddition();
+ /** Default Destructor */
+ ~NEArithmeticAddition();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEArithmeticAddition(const NEArithmeticAddition &) = delete;
+ /** Default move constructor */
+ NEArithmeticAddition(NEArithmeticAddition &&);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEArithmeticAddition &operator=(const NEArithmeticAddition &) = delete;
+ /** Default move assignment operator */
+ NEArithmeticAddition &operator=(NEArithmeticAddition &&);
/** Initialise the kernel's inputs, output and conversion policy.
*
- * @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32
- * @param[in] input2 Second tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32
- * @param[out] output Output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |QSYMM16 |QSYMM16 |QASYMM16 |
+ * |QSYMM16 |QSYMM16 |S32 |
+ * |U8 |U8 |U8 |
+ * |S16 |S16 |S16 |
+ * |S32 |S32 |S32 |
+ * |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |
+ *
+ * @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32
+ * @param[in] input2 Second tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32
+ * @param[out] output Output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32
* @param[in] policy Policy to use to handle overflow.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
*/
- void configure(ITensor *input1, ITensor *input2, ITensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ void configure(const ITensor *input1,
+ const ITensor *input2,
+ ITensor *output,
+ ConvertPolicy policy,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
/** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticAddition
*
- * @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32
- * @param[in] input2 Second tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32
- * @param[in] output Output tensor. Data types supported: U8/SQASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32
+ * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32
+ * @param[in] input2 Second tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32
+ * @param[in] output Output tensor info. Data types supported: U8/SQASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32
* @param[in] policy Policy to use to handle overflow
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
*
* @return a status
*/
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ static Status validate(const ITensorInfo *input1,
+ const ITensorInfo *input2,
+ const ITensorInfo *output,
+ ConvertPolicy policy,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEARITHMETICADDITION_H */
+#endif /* ARM_COMPUTE_NEARITHMETICADDITION_H */
diff --git a/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h b/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h
index 4774fb6adb..3e4f6356c5 100644
--- a/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h
+++ b/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 ARM Limited.
+ * Copyright (c) 2016-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,43 +25,88 @@
#define ARM_COMPUTE_NEARITHMETICSUBTRACTION_H
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/NEON/INEOperator.h"
namespace arm_compute
{
class ITensor;
-/** Basic function to run @ref NEArithmeticSubtractionKernel
+/** Basic function to run @ref cpu::kernels::CpuSubKernel
*
- * @note The tensor data type for the inputs must be U8/QASYMM8/S16/F16/F32.
+ * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/F16/F32.
* @note The function performs an arithmetic subtraction between two tensors.
*
* This function calls the following kernels:
- * -# @ref NEArithmeticSubtractionKernel
+ * -# @ref cpu::kernels::CpuSubKernel
*/
-class NEArithmeticSubtraction : public INESimpleFunction
+class NEArithmeticSubtraction : public IFunction
{
public:
+ /** Default Constructor */
+ NEArithmeticSubtraction();
+ /** Default Destructor */
+ ~NEArithmeticSubtraction();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEArithmeticSubtraction(const NEArithmeticSubtraction &) = delete;
+ /** Default move constructor */
+ NEArithmeticSubtraction(NEArithmeticSubtraction &&);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEArithmeticSubtraction &operator=(const NEArithmeticSubtraction &) = delete;
+ /** Default move assignment operator */
+ NEArithmeticSubtraction &operator=(NEArithmeticSubtraction &&);
/** Initialise the kernel's inputs, output and conversion policy.
*
- * @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32
- * @param[in] input2 Second tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32
- * @param[out] output Output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |QSYMM16 |QSYMM16 |QASYMM16 |
+ * |QSYMM16 |QSYMM16 |S32 |
+ * |U8 |U8 |U8 |
+ * |S16 |S16 |S16 |
+ * |S32 |S32 |S32 |
+ * |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |
+ *
+ * @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32
+ * @param[in] input2 Second tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32
+ * @param[out] output Output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32
* @param[in] policy Policy to use to handle overflow. Convert policy cannot be WRAP if datatype is quantized.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
*/
- void configure(ITensor *input1, ITensor *input2, ITensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ void configure(const ITensor *input1,
+ const ITensor *input2,
+ ITensor *output,
+ ConvertPolicy policy,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
/** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticSubtraction
*
- * @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32
- * @param[in] input2 Second tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32
- * @param[in] output Output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32
+ * @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/F16/F32
+ * @param[in] input2 Second tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/F16/F32
+ * @param[in] output Output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/F16/F32
* @param[in] policy Policy to use to handle overflow. Convert policy cannot be WRAP if datatype is quantized.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
*
* @return a status
*/
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ static Status validate(const ITensorInfo *input1,
+ const ITensorInfo *input2,
+ const ITensorInfo *output,
+ ConvertPolicy policy,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
-}
+} // namespace arm_compute
#endif /* ARM_COMPUTE_NEARITHMETICSUBTRACTION_H */
diff --git a/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h b/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h
index 14416e7323..99e2dcadbb 100644
--- a/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,14 +24,16 @@
#ifndef ARM_COMPUTE_NEBATCHNORMALIZATIONLAYER_H
#define ARM_COMPUTE_NEBATCHNORMALIZATIONLAYER_H
-#include "arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
+#include <memory>
+
namespace arm_compute
{
class ITensor;
+class NEBatchNormalizationLayerKernel;
/** Basic function to run @ref NENormalizationLayerKernel and simulate a batch normalization layer.
*
@@ -42,10 +44,30 @@ class ITensor;
class NEBatchNormalizationLayer : public IFunction
{
public:
- /** Default constructor */
+ /** Constructor */
NEBatchNormalizationLayer();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEBatchNormalizationLayer(const NEBatchNormalizationLayer &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEBatchNormalizationLayer &operator=(const NEBatchNormalizationLayer &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEBatchNormalizationLayer(NEBatchNormalizationLayer &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEBatchNormalizationLayer &operator=(NEBatchNormalizationLayer &&) = delete;
+ /** Default destructor */
+ ~NEBatchNormalizationLayer();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |F32 |F32 |
+ * |F16 |F16 |
+ *
* @note If the output tensor is a nullptr or is equal to the input, the batch normalization function will be performed in-place
*
* @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result.
@@ -59,7 +81,13 @@ public:
* @param[in] epsilon (Optional) Small value to avoid division with zero. Default value is 0.001f.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
*/
- void configure(ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta = nullptr, const ITensor *gamma = nullptr, float epsilon = 0.001f,
+ void configure(ITensor *input,
+ ITensor *output,
+ const ITensor *mean,
+ const ITensor *var,
+ const ITensor *beta = nullptr,
+ const ITensor *gamma = nullptr,
+ float epsilon = 0.001f,
ActivationLayerInfo act_info = ActivationLayerInfo());
/** Static function to check if given info will lead to a valid configuration of @ref NEBatchNormalizationLayer
*
@@ -76,16 +104,20 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output,
- const ITensorInfo *mean, const ITensorInfo *var,
- const ITensorInfo *beta = nullptr, const ITensorInfo *gamma = nullptr,
- float epsilon = 0.001f, ActivationLayerInfo act_info = ActivationLayerInfo());
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *output,
+ const ITensorInfo *mean,
+ const ITensorInfo *var,
+ const ITensorInfo *beta = nullptr,
+ const ITensorInfo *gamma = nullptr,
+ float epsilon = 0.001f,
+ ActivationLayerInfo act_info = ActivationLayerInfo());
// Inherited methods overridden:
void run() override;
private:
- NEBatchNormalizationLayerKernel _norm_kernel; /**< Batch normalization layer kernel */
+ std::unique_ptr<NEBatchNormalizationLayerKernel> _norm_kernel; /**< Batch normalization layer kernel */
};
-}
+} // namespace arm_compute
#endif /* ARM_COMPUTE_NEBATCHNORMALIZATIONLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h b/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h
index 2a62530246..ebed0bea29 100644
--- a/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,26 +24,49 @@
#ifndef ARM_COMPUTE_NEBATCHTOSPACELAYER_H
#define ARM_COMPUTE_NEBATCHTOSPACELAYER_H
-#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/core/NEON/kernels/NEBatchToSpaceLayerKernel.h"
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
/** Basic function to run @ref NEBatchToSpaceLayerKernel. */
class NEBatchToSpaceLayer : public INESimpleFunctionNoBorder
{
public:
+ /** Constructor */
+ NEBatchToSpaceLayer() = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEBatchToSpaceLayer(const NEBatchToSpaceLayer &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEBatchToSpaceLayer &operator=(const NEBatchToSpaceLayer &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEBatchToSpaceLayer(NEBatchToSpaceLayer &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEBatchToSpaceLayer &operator=(NEBatchToSpaceLayer &&) = delete;
+ /** Default destructor */
+ ~NEBatchToSpaceLayer() = default;
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:---------|:---------|:----------|
+ * |All |s32 |All |
+ *
* @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
* @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32
* @param[out] output Tensor output. Data types supported: same as @p input
+ *
+ * @deprecated This method for dynamic block shape is not fully mature and will be removed in 23.08 release
*/
+ ARM_COMPUTE_DEPRECATED_REL(23.05)
void configure(const ITensor *input, const ITensor *block_shape, ITensor *output);
/** Set the input and output tensors. (Static block shape).
*
@@ -51,8 +74,13 @@ public:
* @param[in] block_shape_x Block shape x value.
* @param[in] block_shape_y Block shape y value.
* @param[out] output Tensor output. Data types supported: same as @p input
+ * @param[in] crop_info Specifies how the output shape is cropped after batch to space is performed
*/
- void configure(const ITensor *input, int32_t block_shape_x, int32_t block_shape_y, ITensor *output);
+ void configure(const ITensor *input,
+ int32_t block_shape_x,
+ int32_t block_shape_y,
+ ITensor *output,
+ const CropInfo &crop_info = CropInfo{});
/** Static function to check if given info will lead to a valid configuration of @ref CLBatchToSpaceLayer
*
* @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All.
@@ -60,7 +88,9 @@ public:
* @param[out] output Tensor output info. Data types supported: same as @p input
*
* @return a status
+ * @deprecated This method for dynamic block shape is not fully mature and will be removed in 23.08 release
*/
+ ARM_COMPUTE_DEPRECATED_REL(23.05)
static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *output);
/** Static function to check if given info will lead to a valid configuration of @ref CLBatchToSpaceLayer (Static block shape).
*
@@ -68,10 +98,15 @@ public:
* @param[in] block_shape_x Block shape x value.
* @param[in] block_shape_y Block shape y value.
* @param[out] output Tensor output info. Data types supported: same as @p input
+ * @param[in] crop_info Specifies how the output shape is cropped after batch to space is performed
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, int32_t block_shape_x, int32_t block_shape_y, const ITensorInfo *output);
+ static Status validate(const ITensorInfo *input,
+ int32_t block_shape_x,
+ int32_t block_shape_y,
+ const ITensorInfo *output,
+ const CropInfo &crop_info = CropInfo{});
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEBATCHTOSPACELAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h b/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h
index c254c30ce5..1f95f193d3 100644
--- a/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h
+++ b/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -34,8 +34,28 @@ class ITensor;
class NEBitwiseAnd : public INESimpleFunctionNoBorder
{
public:
+ /** Constructor */
+ NEBitwiseAnd() = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEBitwiseAnd(const NEBitwiseAnd &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEBitwiseAnd &operator=(const NEBitwiseAnd &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEBitwiseAnd(NEBitwiseAnd &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEBitwiseAnd &operator=(NEBitwiseAnd &&) = delete;
+ /** Default destructor */
+ ~NEBitwiseAnd() = default;
/** Initialise the kernel's inputs and output
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |U8 |U8 |
+ *
* @param[in] input1 First tensor input. Data type supported: U8.
* @param[in] input2 Second tensor input. Data type supported: U8.
* @param[out] output Output tensor. Data type supported: U8.
diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseNot.h b/arm_compute/runtime/NEON/functions/NEBitwiseNot.h
index 15e12509c8..c66bebf7cc 100644
--- a/arm_compute/runtime/NEON/functions/NEBitwiseNot.h
+++ b/arm_compute/runtime/NEON/functions/NEBitwiseNot.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -36,6 +36,14 @@ class NEBitwiseNot : public INESimpleFunctionNoBorder
public:
/** Initialise the kernel's input and output
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |U8 |U8 |
+ *
* @param[in] input Input tensor. Data type supported: U8.
* @param[out] output Output tensor. Data type supported: U8.
*/
diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseOr.h b/arm_compute/runtime/NEON/functions/NEBitwiseOr.h
index 0e62620fc0..183df212e4 100644
--- a/arm_compute/runtime/NEON/functions/NEBitwiseOr.h
+++ b/arm_compute/runtime/NEON/functions/NEBitwiseOr.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -36,6 +36,14 @@ class NEBitwiseOr : public INESimpleFunctionNoBorder
public:
/** Initialise the kernel's inputs and output
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |U8 |U8 |
+ *
* @param[in] input1 First tensor input. Data type supported: U8.
* @param[in] input2 Second tensor input. Data type supported: U8.
* @param[out] output Output tensor. Data type supported: U8.
diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseXor.h b/arm_compute/runtime/NEON/functions/NEBitwiseXor.h
index 1dcc6e2216..126aaa6ddd 100644
--- a/arm_compute/runtime/NEON/functions/NEBitwiseXor.h
+++ b/arm_compute/runtime/NEON/functions/NEBitwiseXor.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -36,6 +36,14 @@ class NEBitwiseXor : public INESimpleFunctionNoBorder
public:
/** Initialise the kernel's inputs and output
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |U8 |U8 |
+ *
* @param[in] input1 First tensor input. Data type supported: U8.
* @param[in] input2 Second tensor input. Data type supported: U8.
* @param[out] output Output tensor. Data type supported: U8.
diff --git a/arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h b/arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h
index 27c1c5198b..aa41fc0df2 100644
--- a/arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h
+++ b/arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 ARM Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,23 +24,31 @@
#ifndef ARM_COMPUTE_NEBOUNDINGBOXTRANSOFORM_H
#define ARM_COMPUTE_NEBOUNDINGBOXTRANSOFORM_H
-#include "arm_compute/core/NEON/kernels/NEBoundingBoxTransformKernel.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
-/** Basic function to run @ref NEBoundingBoxTransformKernel.
- *
- * This function calls the following Neon kernels:
- * -# @ref NEBoundingBoxTransformKernel
- */
-class NEBoundingBoxTransform : public INESimpleFunction
+/** Basic function to run @ref NEBoundingBoxTransformKernel. */
+class NEBoundingBoxTransform : public INESimpleFunctionNoBorder
{
public:
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:--------------|:--------------|
+ * |QASYMM16 |QASYMM8 |QASYMM16 |
+ * |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |
+ *
* @param[in] boxes Source tensor. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32.
* @param[out] pred_boxes Destination tensor. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input
* @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes.
@@ -49,7 +57,8 @@ public:
*
* @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct.
*/
- void configure(const ITensor *boxes, ITensor *pred_boxes, const ITensor *deltas, const BoundingBoxTransformInfo &info);
+ void
+ configure(const ITensor *boxes, ITensor *pred_boxes, const ITensor *deltas, const BoundingBoxTransformInfo &info);
/** Static function to check if given info will lead to a valid configuration of @ref NEBoundingBoxTransform
*
@@ -63,7 +72,10 @@ public:
*
* @return a Status
*/
- static Status validate(const ITensorInfo *boxes, const ITensorInfo *pred_boxes, const ITensorInfo *deltas, const BoundingBoxTransformInfo &info);
+ static Status validate(const ITensorInfo *boxes,
+ const ITensorInfo *pred_boxes,
+ const ITensorInfo *deltas,
+ const BoundingBoxTransformInfo &info);
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEBOUNDINGBOXTRANSFORM_H */
diff --git a/arm_compute/runtime/NEON/functions/NEBox3x3.h b/arm_compute/runtime/NEON/functions/NEBox3x3.h
deleted file mode 100644
index c382ea9114..0000000000
--- a/arm_compute/runtime/NEON/functions/NEBox3x3.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEBOX3x3_H
-#define ARM_COMPUTE_NEBOX3x3_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to execute box filter 3x3. This function calls the following NEON kernels:
- *
- * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref NEBox3x3Kernel
- *
- */
-class NEBox3x3 : public INESimpleFunction
-{
-public:
- /** Initialise the function's input, output and border mode.
- *
- * @note The border handler is run on the input tensor.
- *
- * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination tensor, Data type supported: U8.
- * @param[in] border_mode Strategy to use for borders.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- * @param[in] use_fp16 (Optional) If true the FP16 kernels will be used. If false F32 kernels are used.
- */
- void configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value = 0, bool use_fp16 = false);
-};
-}
-#endif /*ARM_COMPUTE_NEBOX3x3_H */
diff --git a/arm_compute/runtime/NEON/functions/NECannyEdge.h b/arm_compute/runtime/NEON/functions/NECannyEdge.h
deleted file mode 100644
index 84cc2de6d6..0000000000
--- a/arm_compute/runtime/NEON/functions/NECannyEdge.h
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NECANNYEDGE_H
-#define ARM_COMPUTE_NECANNYEDGE_H
-
-#include "arm_compute/core/NEON/kernels/NECannyEdgeKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/Tensor.h"
-
-#include <cstdint>
-#include <memory>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to execute canny edge on NEON. This function calls the following NEON kernels and functions:
- *
- * -# @ref NEFillBorderKernel (if border_mode == REPLICATE or border_mode == CONSTANT)
- * -# @ref NESobel3x3 (if gradient_size == 3) or
- * @ref NESobel5x5 (if gradient_size == 5) or
- * @ref NESobel7x7 (if gradient_size == 7)
- * -# @ref NEGradientKernel
- * -# @ref NEEdgeNonMaxSuppressionKernel
- * -# @ref NEEdgeTraceKernel
- *
- */
-class NECannyEdge : public IFunction
-{
-public:
- /** Constructor
- *
- * Initialize Sobel kernel to nullptr.
- *
- * @param[in] memory_manager (Optional) Memory manager.
- */
- NECannyEdge(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NECannyEdge(const NECannyEdge &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NECannyEdge &operator=(const NECannyEdge &) = delete;
- /** Initialise the function's source, destination, thresholds, gradient size, normalization type and border mode.
- *
- * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination tensor. Data type supported: U8.
- * @param[in] upper_thr Upper threhold used for the hysteresis
- * @param[in] lower_thr Lower threshold used for the hysteresis.
- * @param[in] gradient_size Gradient size (3, 5 or 7)
- * @param[in] norm_type Normalization type. If 1, L1-Norm otherwise L2-Norm
- * @param[in] border_mode Border mode to use for the convolution.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(ITensor *input, ITensor *output, int32_t upper_thr, int32_t lower_thr, int32_t gradient_size, int32_t norm_type, BorderMode border_mode, uint8_t constant_border_value = 0);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- MemoryGroup _memory_group; /**< Function's memory group */
- std::unique_ptr<IFunction> _sobel; /**< Pointer to Sobel kernel */
- std::unique_ptr<INEKernel> _gradient; /**< Gradient kernel */
- NEEdgeNonMaxSuppressionKernel _non_max_suppr; /**< Non-Maxima suppression kernel */
- NEEdgeTraceKernel _edge_trace; /**< Edge tracing kernel */
- NEFillBorderKernel _border_mag_gradient; /**< Fill border on magnitude tensor kernel */
- NEFillBorderKernel _border_edge_trace; /**< Fill border before edge trace */
- Tensor _gx; /**< Source tensor - Gx component */
- Tensor _gy; /**< Source tensor - Gy component */
- Tensor _magnitude; /**< Source tensor - Magnitude */
- Tensor _phase; /**< Source tensor - Phase */
- Tensor _nonmax; /**< Source tensor - Non-Maxima suppressed */
- ITensor *_output; /**< Output tensor provided by the user. */
-};
-}
-#endif /* ARM_COMPUTE_NECANNYEDGE_H */
diff --git a/arm_compute/runtime/NEON/functions/NECast.h b/arm_compute/runtime/NEON/functions/NECast.h
index 55c21a01ec..43cae777f6 100644
--- a/arm_compute/runtime/NEON/functions/NECast.h
+++ b/arm_compute/runtime/NEON/functions/NECast.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 ARM Limited.
+ * Copyright (c) 2019-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,47 +25,73 @@
#define ARM_COMPUTE_NECAST_H
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
-/** Basic function to run @ref NEDepthConvertLayerKernel.
+/** Basic function to run @ref cpu::kernels::CpuCastKernel.
* This function ignores the scale and zeroPoint of quanized tensors,so QASYMM8 input is treated as uint8 values.
*/
-class NECast : public INESimpleFunction
+class NECast : public IFunction
{
public:
+ /** Constructor */
+ NECast();
+ /** Destructor */
+ ~NECast();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NECast(const NECast &) = delete;
+ /** Default move constructor */
+ NECast(NECast &&);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NECast &operator=(const NECast &) = delete;
+ /** Default move assignment operator */
+ NECast &operator=(NECast &&);
/** Initialize the function's source, destination
*
- * Input data type must be different than output data type.
+ * Valid data layouts:
+ * - All
*
- * Valid conversions Input -> Output :
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:-----------------------------------------------|
+ * |QASYMM8_SIGNED | S16, S32, F32, F16 |
+ * |QASYMM8 | U16, S16, S32, F32, F16 |
+ * |U8 | U16, S16, S32, F32, F16 |
+ * |U16 | U8, U32 |
+ * |S16 | QASYMM8_SIGNED, U8, S32 |
+ * |F16 | QASYMM8_SIGNED, QASYMM8, F32, S32, U8 |
+ * |S32 | QASYMM8_SIGNED, QASYMM8, F16, F32, U8 |
+ * |F32 | QASYMM8_SIGNED, QASYMM8, BFLOAT16, F16, S32, U8|
*
- * - QASYMM8_SIGNED -> S16, S32, F32, F16
- * - QASYMM8 -> U16, S16, S32, F32, F16
- * - U8 -> U16, S16, S32, F32, F16
- * - U16 -> U8, U32
- * - S16 -> QASYMM8_SIGNED, U8, S32
- * - F16 -> QASYMM8_SIGNED, QASYMM8, F32, S32, U8
- * - S32 -> QASYMM8_SIGNED, QASYMM8, F16, F32, U8
- * - F32 -> QASYMM8_SIGNED, QASYMM8, F16, S32, U8
+ * Input data type must be different than output data type.
*
* @param[in] input The input tensor to convert. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/F16/S32/F32.
- * @param[out] output The output tensor. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/S8/U16/S16/U32/S32/F16/F32.
+ * @param[out] output The output tensor. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/S8/U16/S16/U32/S32/BFLOAT16/F16/F32.
* @param[in] policy Conversion policy.
*/
void configure(ITensor *input, ITensor *output, ConvertPolicy policy);
/** Static function to check if given info will lead to a valid configuration of @ref NECast
*
* @param[in] input Source tensor info. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/F16/S32/F32.
- * @param[in] output Destination tensor info. Data type supported: QASYMM8_SIGNED/QASYMM8/U8/S8/U16/S16/U32/S32/F16/F32.
+ * @param[in] output Destination tensor info. Data type supported: QASYMM8_SIGNED/QASYMM8/U8/S8/U16/S16/U32/S32/BFLOAT16/F16/F32.
* @param[in] policy Conversion policy.
*
* @return a status
*/
- static Status validate(ITensorInfo *input, ITensorInfo *output, ConvertPolicy policy);
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy);
+
+ // Inherited methods overridden
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_NECAST_H*/
diff --git a/arm_compute/runtime/NEON/functions/NEChannelCombine.h b/arm_compute/runtime/NEON/functions/NEChannelCombine.h
deleted file mode 100644
index ba159160e0..0000000000
--- a/arm_compute/runtime/NEON/functions/NEChannelCombine.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NECHANNELCOMBINE_H
-#define ARM_COMPUTE_NECHANNELCOMBINE_H
-
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
-namespace arm_compute
-{
-class IMultiImage;
-class ITensor;
-using IImage = ITensor;
-
-/**Basic function to run @ref NEChannelCombineKernel to perform channel combination. */
-class NEChannelCombine : public INESimpleFunctionNoBorder
-{
-public:
- /** Initialize function's inputs and outputs.
- *
- * @param[in] plane0 The 2D plane that forms channel 0. Data type supported: U8
- * @param[in] plane1 The 2D plane that forms channel 1. Data type supported: U8
- * @param[in] plane2 The 2D plane that forms channel 2. Data type supported: U8
- * @param[in] plane3 The 2D plane that forms channel 3. Data type supported: U8
- * @param[out] output The single planar output tensor. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422
- */
- void configure(const ITensor *plane0, const ITensor *plane1, const ITensor *plane2, const ITensor *plane3, ITensor *output);
- /** Initialize function's inputs and outputs.
- *
- * @param[in] plane0 The 2D plane that forms channel 0. Data type supported: U8
- * @param[in] plane1 The 2D plane that forms channel 1. Data type supported: U8
- * @param[in] plane2 The 2D plane that forms channel 2. Data type supported: U8
- * @param[out] output The multi planar output image. Formats supported: NV12/NV21/IYUV/YUV444
- */
- void configure(const IImage *plane0, const IImage *plane1, const IImage *plane2, IMultiImage *output);
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NECHANNELCOMBINE_H*/
diff --git a/arm_compute/runtime/NEON/functions/NEChannelExtract.h b/arm_compute/runtime/NEON/functions/NEChannelExtract.h
deleted file mode 100644
index 96ba1c8ecc..0000000000
--- a/arm_compute/runtime/NEON/functions/NEChannelExtract.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NECHANNELEXTRACT_H
-#define ARM_COMPUTE_NECHANNELEXTRACT_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
-namespace arm_compute
-{
-class IMultiImage;
-class ITensor;
-using IImage = ITensor;
-
-/**Basic function to run @ref NEChannelExtractKernel to perform channel extraction. */
-class NEChannelExtract : public INESimpleFunctionNoBorder
-{
-public:
- /** Initialize the function's source, destination
- *
- * @param[in] input The input tensor to extract the channel from. Formats supported: Formats supported: RGB888/RGBA8888/YUYV422/UYVY422
- * @param[in] channel The channel to extract.
- * @param[out] output The extracted channel. Format supported: U8
- */
- void configure(const ITensor *input, Channel channel, ITensor *output);
- /** Initialize the function's source, destination
- *
- * @param[in] input The multi-planar input image to extract channel from. Formats supported: NV12/NV21/IYUV/YUV444
- * @param[in] channel The channel to extract.
- * @param[out] output The extracted channel. Format supported: U8
- */
- void configure(const IMultiImage *input, Channel channel, IImage *output);
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NECHANNELEXTRACT_H*/
diff --git a/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h b/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h
index 716518a8da..bc19e1a4af 100644
--- a/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,12 +24,14 @@
#ifndef ARM_COMPUTE_NECHANNELSHUFFLELAYER_H
#define ARM_COMPUTE_NECHANNELSHUFFLELAYER_H
+#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
namespace arm_compute
{
// Forward declarations
class ITensor;
+class ITensorInfo;
/** Basic function to run @ref NEChannelShuffleLayerKernel
*
@@ -42,6 +44,15 @@ class NEChannelShuffleLayer : public INESimpleFunctionNoBorder
public:
/** Initialize the function
*
+ * Valid data layouts:
+ * - NCHW
+ * - NHWC
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |All |All |
+ *
* @param[in] input Input tensor. Data types supported: All
* @param[out] output Output tensor. Data type supported: Same as @p input
* @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups.
diff --git a/arm_compute/runtime/NEON/functions/NECol2Im.h b/arm_compute/runtime/NEON/functions/NECol2Im.h
deleted file mode 100644
index 5da0b91766..0000000000
--- a/arm_compute/runtime/NEON/functions/NECol2Im.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NECOL2IM_H
-#define ARM_COMPUTE_NECOL2IM_H
-
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
-#include "arm_compute/core/Size2D.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to run @ref NECol2Im */
-class NECol2Im : public INESimpleFunctionNoBorder
-{
-public:
- /** Configure the col2im NEON kernel
- *
- * @param[in] input The input tensor to convert. Data types supported: All
- * @param[out] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM],
- * while the rest represent batch of outputs. Data types supported: Same as @p input
- * @param[in] convolved_dims Output convolved dimensions.
- */
- void configure(const ITensor *input, ITensor *output, const Size2D &convolved_dims);
- /** Static function to check if given info will lead to a valid configuration of @ref NECol2Im
- *
- * @param[in] input The input tensor to convert. Data types supported: All
- * @param[in] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM],
- * while the rest represent batch of outputs. Data types supported: Same as @p input
- * @param[in] convolved_dims Output convolved dimensions.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &convolved_dims);
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NECOL2IM_H */
diff --git a/arm_compute/runtime/NEON/functions/NEColorConvert.h b/arm_compute/runtime/NEON/functions/NEColorConvert.h
deleted file mode 100644
index ee76db2787..0000000000
--- a/arm_compute/runtime/NEON/functions/NEColorConvert.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NECOLORCONVERT_H
-#define ARM_COMPUTE_NECOLORCONVERT_H
-
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
-namespace arm_compute
-{
-class ITensor;
-class IMultiImage;
-using IImage = ITensor;
-
-/**Basic function to run @ref NEColorConvertKernel to perform color conversion */
-class NEColorConvert : public INESimpleFunctionNoBorder
-{
-public:
- /** Initialize the function's source, destination
- *
- * @param[in] input Source tensor. Formats supported: RGBA8888/UYVY422/YUYV422/RGB888
- * @param[out] output Destination tensor. Formats supported: RGB888 (if the formats of @p input are RGBA8888/UYVY422/YUYV422),
- * RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/),
- * U8 (if the formats of @p input is RGB888)
- */
- void configure(const ITensor *input, ITensor *output);
- /** Initialize the function's source, destination
- *
- * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV
- * @param[out] output Single-planar destination image. Formats supported: RGB888/RGBA8888
- */
- void configure(const IMultiImage *input, IImage *output);
- /** Initialize the function's source, destination
- *
- * @param[in] input Single-planar source image. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422
- * @param[out] output Multi-planar destination image. Formats supported: NV12/IYUV/YUV444 (if the formats of @p input are RGB888/RGB8888)
- */
- void configure(const IImage *input, IMultiImage *output);
- /** Initialize the function's source, destination
- *
- * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV
- * @param[out] output Multi-planar destination image. Formats supported: YUV444/IYUV (if the formats of @p input are NV12/NV21)/NV12 (if the format of @p input is IYUV)
- */
- void configure(const IMultiImage *input, IMultiImage *output);
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NECOLORCONVERT_H*/
diff --git a/arm_compute/runtime/NEON/functions/NEComputeAllAnchors.h b/arm_compute/runtime/NEON/functions/NEComputeAllAnchors.h
deleted file mode 100644
index 09c0c9d985..0000000000
--- a/arm_compute/runtime/NEON/functions/NEComputeAllAnchors.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NECOMPUTEALLANCHORS_H
-#define ARM_COMPUTE_NECOMPUTEALLANCHORS_H
-
-#include "arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to run @ref NEComputeAllAnchorsKernel.
- *
- * This function calls the following NEON kernels:
- * -# @ref NEComputeAllAnchorsKernel
- */
-class NEComputeAllAnchors : public INESimpleFunction
-{
-public:
- /** Set the input and output tensors.
- *
- * @param[in] anchors Source tensor. Original set of anchors of size (4, A) where A is the number of anchors. Data types supported: F16/F32
- * @param[out] all_anchors Destination tensor. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input
- * @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo
- *
- */
- void configure(const ITensor *anchors, ITensor *all_anchors, const ComputeAnchorsInfo &info);
-
- /** Static function to check if given info will lead to a valid configuration of @ref NEComputeAllAnchorsKernel
- *
- * @param[in] anchors Source tensor info. Original set of anchors of size (4, A) where A is the number of anchors. Data types supported: F16/F32
- * @param[in] all_anchors Destination tensor info. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input
- * @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo
- *
- * @return a Status
- */
- static Status validate(const ITensorInfo *anchors, const ITensorInfo *all_anchors, const ComputeAnchorsInfo &info);
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NECOMPUTEALLANCHORS_H */
diff --git a/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h b/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h
index 8207589680..1600f85488 100644
--- a/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,14 +24,10 @@
#ifndef ARM_COMPUTE_NECONCATENATELAYER_H
#define ARM_COMPUTE_NECONCATENATELAYER_H
-#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/core/NEON/INEKernel.h"
#include "arm_compute/core/Types.h"
-#include "arm_compute/core/utils/misc/Requires.h"
+#include "arm_compute/runtime/IFunction.h"
#include <memory>
-#include <vector>
namespace arm_compute
{
@@ -40,33 +36,49 @@ class ITensor;
class ITensorInfo;
class Status;
-/** Basic function to execute concatenate tensors along a given axis. This function calls the following kernels:
- *
- * -# @ref NEWidthConcatenateLayerKernel (if underlying concatenation axis is 0).
- * -# @ref NEHeightConcatenateLayerKernel (if underlying concatenation axis is 1).
- * -# @ref NEDepthConcatenateLayerKernel (if underlying concatenation axis is 2).
- * -# @ref NEBatchConcatenateLayerKernel (if underlying concatenation axis is 3).
- */
+/** Basic function to execute concatenate tensors along a given axis */
class NEConcatenateLayer : public IFunction
{
public:
/** Default constructor */
NEConcatenateLayer();
+ /** Destructor */
+ ~NEConcatenateLayer();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEConcatenateLayer(const NEConcatenateLayer &) = delete;
+ /** Default move constructor */
+ NEConcatenateLayer(NEConcatenateLayer &&);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEConcatenateLayer &operator=(const NEConcatenateLayer &) = delete;
+ /** Default move assignment operator */
+ NEConcatenateLayer &operator=(NEConcatenateLayer &&);
/** Initialise the kernel's inputs vector and output.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |F16 |F16 |
+ * |F32 |F32 |
+ *
* @note Input and output tensor dimensions preconditions defer depending on the concatenation axis.
- * @note Preconditions can be found respectively at @ref NEWidthConcatenateLayerKernel, @ref NEHeightConcatenateLayerKernel and @ref NEDepthConcatenateLayerKernel.
+ * @note Preconditions can be found respectively at @ref cpu::kernels::CpuConcatenateWidthKernel, @ref cpu::kernels::CpuConcatenateHeightKernel,
+ * @ref cpu::kernels::CpuConcatenateDepthKernel and @ref cpu::kernels::CpuConcatenateBatchKernel.
*
* @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
* @param[out] output Output tensor. Data types supported: Same as @p input.
* @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3.
*/
- void configure(std::vector<ITensor *> inputs_vector, ITensor *output, size_t axis);
void configure(std::vector<const ITensor *> inputs_vector, ITensor *output, size_t axis);
/** Static function to check if given info will lead to a valid configuration of @ref NEConcatenateLayer
*
* @note Input and output tensor dimensions preconditions defer depending on the concatenation axis.
- * @note Preconditions can be found respectively at @ref NEWidthConcatenateLayerKernel, @ref NEHeightConcatenateLayerKernel and @ref NEDepthConcatenateLayerKernel.
+ * @note Preconditions can be found respectively at @ref cpu::kernels::CpuConcatenateWidthKernel, @ref cpu::kernels::CpuConcatenateHeightKernel,
+ * @ref cpu::kernels::CpuConcatenateDepthKernel and @ref cpu::kernels::CpuConcatenateBatchKernel.
*
* @param[in] inputs_vector The vectors containing all the tensors info to concatenate. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
* @param[in] output Output tensor info. Data types supported: Same as @p input.
@@ -74,23 +86,15 @@ public:
*
* @return a status
*/
- static Status validate(const std::vector<ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis);
- static Status validate(const std::vector<const ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis);
+ static Status
+ validate(const std::vector<const ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis);
// Inherited methods overridden:
void run() override;
private:
- template <typename TensorType, REQUIRES_TA(std::is_same<typename std::remove_cv<TensorType>::type, ITensor>::value)>
- void configure_internal(std::vector<TensorType *> &&inputs_vector, ITensor *output, size_t axis);
-
- template <typename TensorInfoType, REQUIRES_TA(std::is_same<typename std::remove_cv<TensorInfoType>::type, ITensorInfo>::value)>
- static Status validate_internal(const std::vector<TensorInfoType *> &inputs_vector, const ITensorInfo *output, size_t axis);
-
-private:
- std::vector<std::unique_ptr<INEKernel>> _concat_kernels;
- unsigned int _num_inputs;
- unsigned int _axis;
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NECONCATENATELAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEConv3D.h b/arm_compute/runtime/NEON/functions/NEConv3D.h
new file mode 100644
index 0000000000..525f37f3e7
--- /dev/null
+++ b/arm_compute/runtime/NEON/functions/NEConv3D.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NECONV3D_H
+#define ARM_COMPUTE_NECONV3D_H
+
+#include "arm_compute/core/ITensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/FunctionDescriptors.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+// Forward declarations
+class ITensor;
+
+/** Basic function to simulate a 3d convolution. This function calls one of the following functions:
+ * -# @ref cpu::CpuDirectConv3d
+ *
+ */
+class NEConv3D : public IFunction
+{
+public:
+ /** Constructor */
+ NEConv3D();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEConv3D(const NEConv3D &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEConv3D &operator=(const NEConv3D &) = delete;
+ /** Default move constructor */
+ NEConv3D(NEConv3D &&) = default;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEConv3D &operator=(NEConv3D &&) = default;
+ /** Default destructor */
+ ~NEConv3D();
+ /** Set the input and output tensors.
+ *
+ * Valid data layouts:
+ * - NDHWC
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:--------------|:------------------|:------|:--------------|
+ * |F16 |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |F32 |
+ * |QASYMM8 |QASYMM8 |S32 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED |
+ *
+ * @param[in] input Source tensor. 4 lower dimensions represent a single input [IFM, width, height, depth],
+ * while every optional dimension from 5 and above represent a batch of inputs.
+ * @param[in] weights Weights tensor. Weights are 5D tensor with dimensions [OFM, IFM, kernel_x, kernel_y, kernel_z].
+ * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
+ * @param[out] output Destination tensor. 4 lower dimensions represent a single output [OFM, width, height, depth], while the rest represent batch of outputs.
+ * @param[in] conv_info Contains padding, stride, acitvation information described in @ref Conv3dInfo.
+ */
+ void configure(
+ ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const Conv3dInfo &conv_info);
+ /** Static function to check if given info will lead to a valid configuration
+ *
+ * Similar to NEConv3D::configure()
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *biases,
+ const ITensorInfo *output,
+ const Conv3dInfo &conv_info);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NECONV3D_H */
diff --git a/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h b/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h
index 42f787090e..dc6b22d717 100644
--- a/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h
+++ b/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,31 +24,49 @@
#ifndef ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTS_H
#define ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTS_H
-#include "arm_compute/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h"
+#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/ITransformWeights.h"
-#include "arm_compute/runtime/NEON/NEScheduler.h"
-#include "arm_compute/runtime/Tensor.h"
namespace arm_compute
{
// Forward declarations
class ITensor;
+class ITensorInfo;
-/** Basic function to run @ref NEConvertFullyConnectedWeightsKernel. */
+/** Basic function to run @ref cpu::kernels::CpuConvertFullyConnectedWeightsKernel. */
class NEConvertFullyConnectedWeights : public IFunction
{
public:
/** Default constructor */
NEConvertFullyConnectedWeights();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEConvertFullyConnectedWeights(const NEConvertFullyConnectedWeights &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEConvertFullyConnectedWeights &operator=(const NEConvertFullyConnectedWeights &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEConvertFullyConnectedWeights(NEConvertFullyConnectedWeights &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEConvertFullyConnectedWeights &operator=(NEConvertFullyConnectedWeights &&) = delete;
+ /** Default destructor */
+ ~NEConvertFullyConnectedWeights();
/** Initialize the function.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |All |All |
+ *
* @param[in] input Source weights tensor to convert. Must be 2 dimensional. Data types supported: All.
* @param[out] output The converted weights tensor. Shape and Data Type: Same as @p input.
* @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer).
* @param[in] data_layout The data layout the weights have been trained in.
*/
- void configure(const ITensor *input, ITensor *output, const TensorShape &original_input_shape, DataLayout data_layout);
+ void
+ configure(const ITensor *input, ITensor *output, const TensorShape &original_input_shape, DataLayout data_layout);
/** Static function to check if given info will lead to a valid configuration of @ref NEConvertFullyConnectedWeights
*
* @param[in] input Source weights tensor info to convert. Must be 2 dimensional. Data types supported: All.
@@ -58,53 +76,17 @@ public:
*
* @return A Status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const TensorShape &original_input_shape, DataLayout data_layout);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *output,
+ const TensorShape &original_input_shape,
+ DataLayout data_layout);
// Inherited methods overriden:
void run() override;
private:
- NEConvertFullyConnectedWeightsKernel _kernel;
-};
-
-namespace weights_transformations
-{
-/** Basic function to run @ref NEConvertFullyConnectedWeightsKernel. */
-class NEConvertFullyConnectedWeightsManaged : public ITransformWeights
-{
-public:
- void run() override
- {
- _output.allocator()->allocate();
- _func.run();
- _reshape_run = true;
- }
-
- void release() override
- {
- _output.allocator()->free();
- }
-
- ITensor *get_weights() override
- {
- return &_output;
- }
-
- uint32_t uid() override
- {
- return _uid;
- }
-
- void configure(const ITensor *input, const TensorShape &original_input_shape, DataLayout data_layout)
- {
- _func.configure(input, &_output, original_input_shape, data_layout);
- }
-
-private:
- static constexpr uint32_t _uid = 0x4;
- Tensor _output{};
- NEConvertFullyConnectedWeights _func{};
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
-} // namespace weights_transformations
} // namespace arm_compute
#endif /* ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTS_H */
diff --git a/arm_compute/runtime/NEON/functions/NEConvolution.h b/arm_compute/runtime/NEON/functions/NEConvolution.h
deleted file mode 100644
index c297589013..0000000000
--- a/arm_compute/runtime/NEON/functions/NEConvolution.h
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NECONVOLUTION_H
-#define ARM_COMPUTE_NECONVOLUTION_H
-
-#include "arm_compute/core/NEON/kernels/NEConvolutionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
-#include "arm_compute/runtime/Tensor.h"
-
-#include <cstdint>
-#include <memory>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to execute convolution of size 3x3. This function calls the following NEON kernels:
- *
- * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref NEConvolution3x3Kernel
- *
- */
-class NEConvolution3x3 : public INESimpleFunction
-{
-public:
- /** Initialize the function's source, destination, conv and border_mode.
- *
- * @param[in,out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination tensor, Data types supported: U8/S16.
- * @param[in] conv Matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer.
- * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
- * @param[in] border_mode Strategy to use for borders.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0);
-};
-
-/** Basic function to execute convolution of size 5x5, 7x7, 9x9. This function calls the following NEON kernels:
- *
- * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref NEConvolutionKernel or<br/>
- * @ref NESeparableConvolutionHorKernel and @ref NESeparableConvolutionVertKernel (if convolution matrix is separable)
- *
- */
-template <unsigned int matrix_size>
-class NEConvolutionSquare : public IFunction
-{
-public:
- /** Default constructor */
- NEConvolutionSquare(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Initialize the function's source, destination, conv and border_mode.
- *
- * @param[in,out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination tensor, Data types supported: U8 or S16.
- * @param[in] conv matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer.
- * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
- * @param[in] border_mode Strategy to use for borders.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- MemoryGroup _memory_group; /**< Function memory group */
- Tensor _tmp; /**< temporary buffer for output of horizontal pass */
- bool _is_separable; /**< true if the convolution can be separated */
- NESeparableConvolutionHorKernel<matrix_size> _kernel_hor; /**< kernel for horizontal pass of separated convolution */
- NESeparableConvolutionVertKernel<matrix_size> _kernel_vert; /**< kernel for vertical pass of separated convolution */
- NEConvolutionKernel<matrix_size> _kernel; /**< kernel for non-separated convolution **/
- NEFillBorderKernel _border_handler; /**< kernel for border handling */
-};
-
-/** Basic function to run 5x5 convolution. */
-using NEConvolution5x5 = NEConvolutionSquare<5>;
-/** Basic function to run 7x7 convolution. */
-using NEConvolution7x7 = NEConvolutionSquare<7>;
-/** Basic function to run 9x9 convolution. */
-using NEConvolution9x9 = NEConvolutionSquare<9>;
-
-/** Basic function to execute non-square convolution. This function calls the following NEON kernels:
- *
- * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref NEConvolutionRectangleKernel or<br/>
- *
- * @note Convolution rectangle should have dimensions of 3, 5, 7, 9
- */
-class NEConvolutionRectangle : public INESimpleFunction
-{
-public:
- /** Initialize the function's source, destination, conv and border_mode.
- *
- * @param[in,out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination tensor, Data types supported: U8 or S16.
- * @param[in] conv Matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer.
- * @param[in] rows Rows of convolution kernel.
- * @param[in] cols Columns of convolution kernel.
- * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
- * @param[in] border_mode Strategy to use for borders.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t rows, uint32_t cols, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0);
-};
-}
-#endif /*ARM_COMPUTE_NECONVOLUTION_H */
diff --git a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
index b76695b80c..2d07980ade 100644
--- a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,27 +21,26 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_NECONVOLUTIONLAYER_H
-#define ARM_COMPUTE_NECONVOLUTIONLAYER_H
-
-#include "arm_compute/runtime/IFunction.h"
+#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NECONVOLUTIONLAYER_H
+#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NECONVOLUTIONLAYER_H
+#include "arm_compute/core/ITensorInfo.h"
#include "arm_compute/core/Types.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
+#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h"
+
#include <memory>
namespace arm_compute
{
+// Forward declarations
class ITensor;
-/** Basic function to simulate a convolution layer. This function calls one of the following NEON functions:
- * -# @ref NEGEMMConvolutionLayer (executed only in case GEMM is required for the operation)
- * -# @ref NEWinogradConvolutionLayer (executed only in case Winograd is required for the operation)
- * -# @ref NEDirectConvolutionLayer (executed only in case Direct Convolution is required for the operation)
+/** Basic function to simulate a convolution layer. This function calls one of the following functions:
+ * -# @ref cpu::CpuGemmConv2d (executed only in case GEMM is required for the operation)
+ * -# @ref cpu::CpuWinogradConv2d (executed only in case Winograd is required for the operation)
+ * -# @ref cpu::CpuDirectConv2d (executed only in case Direct Convolution is required for the operation)
* -# @ref NEFFTConvolutionLayer (executed only in case FFT is required for the operation)
*
*
@@ -75,41 +74,74 @@ class NEConvolutionLayer : public IFunction
public:
/** Constructor */
NEConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
-
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEConvolutionLayer(const NEConvolutionLayer &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEConvolutionLayer &operator=(const NEConvolutionLayer &) = delete;
+ /** Default move constructor */
+ NEConvolutionLayer(NEConvolutionLayer &&) = default;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEConvolutionLayer &operator=(NEConvolutionLayer &&) = default;
+ /** Default destructor */
+ ~NEConvolutionLayer();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:--------------|:------------------|:------|:--------------|
+ * |F16 |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |F32 |
+ * |QASYMM8 |QASYMM8 |S32 |QASYMM8 |
+ * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED |
+ * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED |
+ *
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
* while every optional dimension from 4 and above represent a batch of inputs.
* Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: Same as @p input.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
+ * Data type supported: Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED.
* @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
- * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
+ * Data type supported: Same as @p input, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
* @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
* Data types supported: Same as @p input.
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
* @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights
- * tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input.
+ * tensor has also been transposed with cpu::kernels::CpuGemmTranspose1xWKernel. Data type supported: Same as @p input.
* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
* @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
* available which may introduce a drop of accuracy as well. Default is false
* @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported
*/
- void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(),
- const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false, unsigned int num_groups = 1);
+ void configure(ITensor *input,
+ const ITensor *weights,
+ const ITensor *biases,
+ ITensor *output,
+ const PadStrideInfo &conv_info,
+ const WeightsInfo &weights_info = WeightsInfo(),
+ const Size2D &dilation = Size2D(1U, 1U),
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ bool enable_fast_math = false,
+ unsigned int num_groups = 1);
/** Static function to check if given info will lead to a valid configuration of @ref NEConvolutionLayer
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
* while every optional dimension from 4 and above represent a batch of inputs.
* Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
+ * Data type supported:Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED.
* @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
- * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
+ * Data type supported: Same as @p input, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
* @param[in] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
* Data types supported: Same as @p input.
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
* @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights
- * tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input.
+ * tensor has also been transposed with cpu::kernels::CpuGemmTranspose1xWKernel. Data type supported: Same as @p input.
* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
* @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
@@ -118,20 +150,28 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false,
- unsigned int num_groups = 1);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *biases,
+ const ITensorInfo *output,
+ const PadStrideInfo &conv_info,
+ const WeightsInfo &weights_info = WeightsInfo(),
+ const Size2D &dilation = Size2D(1U, 1U),
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ bool enable_fast_math = false,
+ unsigned int num_groups = 1);
/** Static function to check if given info will return the convolution called by @ref NEConvolutionLayer
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
* while every optional dimension from 4 and above represent a batch of inputs.
* Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
+ * Data type supported:Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED.
* @param[in] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
* Data types supported: Same as @p input.
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
* @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights
- * tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input.
+ * tensor has also been transposed with cpu::kernels::CpuGemmTranspose1xWKernel. Data type supported: Same as @p input.
* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
* @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
@@ -139,15 +179,21 @@ public:
*
* @return the Convolution Method Hint
*/
- static ConvolutionMethod get_convolution_method(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, const PadStrideInfo &conv_info,
- const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false);
+ static ConvolutionMethod get_convolution_method(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *output,
+ const PadStrideInfo &conv_info,
+ const WeightsInfo &weights_info = WeightsInfo(),
+ const Size2D &dilation = Size2D(1U, 1U),
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ bool enable_fast_math = false);
// Inherited methods overridden:
void run() override;
void prepare() override;
private:
- std::shared_ptr<IMemoryManager> _memory_manager;
- std::unique_ptr<IFunction> _function; /**< Function to run */
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
-}
-#endif /* ARM_COMPUTE_NECONVOLUTIONLAYER_H */ \ No newline at end of file
+} // namespace arm_compute
+#endif // ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NECONVOLUTIONLAYER_H
diff --git a/arm_compute/runtime/NEON/functions/NECopy.h b/arm_compute/runtime/NEON/functions/NECopy.h
index b03f408ad8..840c03e968 100644
--- a/arm_compute/runtime/NEON/functions/NECopy.h
+++ b/arm_compute/runtime/NEON/functions/NECopy.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,18 +25,41 @@
#define ARM_COMPUTE_NECOPY_H
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
-/** Basic function to run @ref NECopyKernel */
-class NECopy : public INESimpleFunctionNoBorder
+/** Basic function to run @ref cpu::kernels::CpuCopyKernel */
+class NECopy : public IFunction
{
public:
+ /** Default Constructor */
+ NECopy();
+ /** Default Destructor */
+ ~NECopy();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NECopy(const NECopy &) = delete;
+ /** Default move constructor */
+ NECopy(NECopy &&);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NECopy &operator=(const NECopy &) = delete;
+ /** Default move assignment operator */
+ NECopy &operator=(NECopy &&);
/** Initialise the function's source and destination.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |All |All |
+ *
* @param[in] input Source tensor. Data types supported: All
* @param[out] output Output tensor. Data types supported: Same as @p input.
*
@@ -50,6 +73,13 @@ public:
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+
+ // Inherited methods overridden
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_NECOPY_H */
diff --git a/arm_compute/runtime/NEON/functions/NECropResize.h b/arm_compute/runtime/NEON/functions/NECropResize.h
index 1c15beded4..f806762158 100644
--- a/arm_compute/runtime/NEON/functions/NECropResize.h
+++ b/arm_compute/runtime/NEON/functions/NECropResize.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 ARM Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,7 +24,6 @@
#ifndef ARM_COMPUTE_NEON_CROP_RESIZE_H
#define ARM_COMPUTE_NEON_CROP_RESIZE_H
-#include "arm_compute/core/NEON/kernels/NECropKernel.h"
#include "arm_compute/runtime/NEON/functions/NEScale.h"
#include <memory>
@@ -32,7 +31,9 @@
namespace arm_compute
{
// Forward Declarations
+class Tensor;
class ITensor;
+class NECropKernel;
/** Function to perform cropping and resizing */
class NECropResize : public IFunction
@@ -49,10 +50,18 @@ public:
/** Allow instances of this class to be moved */
NECropResize &operator=(NECropResize &&) = default;
/** Default destructor */
- virtual ~NECropResize() = default;
+ ~NECropResize();
/** Configure kernel
*
+ * Valid data layouts:
+ * - NHWC
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:--------|:--------|:------|:--------|
+ * |All |F32 |F32 |F32 |
+ *
* @note Supported tensor rank: up to 4
* @note Box indices may be outside of the bounds, in which case @p extrapolation_value is used.
* @note Start and end indices of boxes are inclusive.
@@ -66,8 +75,13 @@ public:
* @param[in] method The policy to be used when resizing image. Default is bilinear.
* @param[in] extrapolation_value Value to be used for values outside of the image for cropping and resizing. Default is 0.
*/
- void configure(const ITensor *input, const ITensor *boxes, const ITensor *box_ind, ITensor *output, Coordinates2D crop_size,
- InterpolationPolicy method = InterpolationPolicy::BILINEAR, float extrapolation_value = 0);
+ void configure(const ITensor *input,
+ const ITensor *boxes,
+ const ITensor *box_ind,
+ ITensor *output,
+ Coordinates2D crop_size,
+ InterpolationPolicy method = InterpolationPolicy::BILINEAR,
+ float extrapolation_value = 0);
/** Static function to check if given info will lead to a valid configuration of @ref NESlice
*
@@ -87,8 +101,13 @@ public:
*
* @return A status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *boxes, const ITensorInfo *box_ind, const ITensorInfo *output,
- Coordinates2D crop_size, InterpolationPolicy method, float extrapolation_value);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *boxes,
+ const ITensorInfo *box_ind,
+ const ITensorInfo *output,
+ Coordinates2D crop_size,
+ InterpolationPolicy method,
+ float extrapolation_value);
void run() override;
diff --git a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
index e2ed0e0abc..aabe42f928 100644
--- a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 ARM Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,15 +24,14 @@
#ifndef ARM_COMPUTE_NEDECONVOLUTIONLAYER_H
#define ARM_COMPUTE_NEDECONVOLUTIONLAYER_H
-#include "arm_compute/runtime/CPP/functions/CPPUpsample.h"
-#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEReverse.h"
-
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CPP/functions/CPPUpsample.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEReverse.h"
#include "arm_compute/runtime/Tensor.h"
#include <memory>
@@ -64,11 +63,10 @@ namespace arm_compute
* The weights used by Deconvolution are supposed to be the same as the ones used for Convolution. Therefore, it will be necessary to use the weights in the
* reverse order to perform an actual convolution. This is achieved by using @ref NEReverse.
*
- * This function calls the following NEON kernels/functions:
+ * This function calls the following kernels/functions:
*
* -# @ref CPPUpsample
* -# @ref NEConvolutionLayer
- * -# @ref NEPermute
* -# @ref NEReverse
*
*/
@@ -77,39 +75,77 @@ class NEDeconvolutionLayer : public IFunction
public:
/** Constructor */
NEDeconvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
-
/** Prevent instances of this class from being copied (As this class contains pointers) */
NEDeconvolutionLayer(const NEDeconvolutionLayer &) = delete;
+ /** Default move constructor */
+ NEDeconvolutionLayer(NEDeconvolutionLayer &&) = default;
/** Prevent instances of this class from being copied (As this class contains pointers) */
NEDeconvolutionLayer &operator=(const NEDeconvolutionLayer &) = delete;
- /** Allow instances of this class to be moved */
- NEDeconvolutionLayer(NEDeconvolutionLayer &&) = default;
- /** Allow instances of this class to be moved */
+ /** Default move assignment operator */
NEDeconvolutionLayer &operator=(NEDeconvolutionLayer &&) = default;
/** Default destructor */
- virtual ~NEDeconvolutionLayer() = default;
+ ~NEDeconvolutionLayer() = default;
/** Set the input, weights, biases and output tensors.
*
- * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED.
- * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input.
- * @param[in] bias Optional, ignored if NULL. The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8 and QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input.
- * @param[out] output Output tensor. The output has the same number of dimensions as the @p input.
- * @param[in] info Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo.
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:--------------|:------------------|:------|:--------------|
+ * |F16 |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |F32 |
+ * |QASYMM8 |QASYMM8 |S32 |QASYMM8 |
+ * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED |
+ * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED |
+ *
+ * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs.
+ * Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED.
+ * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM].
+ * Data type supported: Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] bias Optional, ignored if NULL. The biases have one dimension.
+ * Data type supported: Data types supported: S32 for QASYMM8/QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input.
+ * @param[out] output Output tensor. The output has the same number of dimensions as the @p input.
+ * @param[in] info Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo.
+ * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
+ * available which may introduce a drop of accuracy as well. Default is false
+ * @param[in] weights_info (Optional) Specifies the weight format. Default is unspecified. This parameter can be used to specify the weight format that is optimal for
+ * the GEMM convolution.
*
*/
- void configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, const PadStrideInfo &info);
+ void configure(ITensor *input,
+ const ITensor *weights,
+ const ITensor *bias,
+ ITensor *output,
+ const PadStrideInfo &info,
+ bool enable_fast_math = false,
+ const WeightsInfo &weights_info = WeightsInfo());
/** Static function to check if given info will lead to a valid configuration of @ref NEDeconvolutionLayer
*
- * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED.
- * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input.
- * @param[in] bias (Optional) The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8 and QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input.
- * @param[in] output Output tensor info. The output has the same number of dimensions as the @p input.
- * @param[in] info Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo.
+ * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs.
+ * Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED.
+ * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM].
+ * Data type supported: Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] bias (Optional) The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8/QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input.
+ * @param[in] output Output tensor info. The output has the same number of dimensions as the @p input.
+ * @param[in] info Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo.
+ * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
+ * available which may introduce a drop of accuracy as well. Default is false
+ * @param[in] weights_info (Optional) Specifies the weight format. Default is unspecified. This parameter can be used to specify the weight format that is optimal for
+ * the GEMM convolution.
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output, const PadStrideInfo &info);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *bias,
+ const ITensorInfo *output,
+ const PadStrideInfo &info,
+ bool enable_fast_math = false,
+ const WeightsInfo &weights_info = WeightsInfo());
// Inherited methods overridden:
void run() override;
@@ -127,6 +163,7 @@ private:
ITensor *_input;
PadStrideInfo _info;
bool _is_prepared;
+ bool _do_upsampling;
};
-} // arm_compute
+} // namespace arm_compute
#endif /* ARM_COMPUTE_NEDECONVOLUTIONLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h b/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h
index b784480887..7bfdfbd13d 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 ARM Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,35 +25,48 @@
#define ARM_COMPUTE_NEDEPTHCONVERT_H
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+#include "arm_compute/runtime/IFunction.h"
-#include <cstdint>
+#include <memory>
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
-/**Basic function to run @ref NEDepthConvertLayerKernel */
-class NEDepthConvertLayer : public INESimpleFunctionNoBorder
+/**Basic function to run @ref cpu::kernels::CpuCastKernel */
+class NEDepthConvertLayer : public IFunction
{
public:
- /* Contructor */
- NEDepthConvertLayer() = default;
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
+ /** Constructor */
+ NEDepthConvertLayer();
+ /** Destructor */
+ ~NEDepthConvertLayer();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
NEDepthConvertLayer(const NEDepthConvertLayer &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- const NEDepthConvertLayer &operator=(const NEDepthConvertLayer &) = delete;
+ /** Default move constructor */
+ NEDepthConvertLayer(NEDepthConvertLayer &&);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEDepthConvertLayer &operator=(const NEDepthConvertLayer &) = delete;
+ /** Default move assignment operator */
+ NEDepthConvertLayer &operator=(NEDepthConvertLayer &&);
/** Initialize the function's source, destination
*
- * Valid conversions Input -> Output :
+ * Valid data layouts:
+ * - All
*
- * - QASYMM8 -> F16, F32
- * - U8 -> U16, S16, S32
- * - U16 -> U8, U32
- * - S16 -> U8, S32
- * - BFLOAT16 -> F32
- * - F16 -> QASYMM8, F32
- * - F32 -> QASYMM8, F16, BFLOAT16
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------------------|
+ * |QASYMM8 | F16, F32 |
+ * |U8 | U16, S16, S32 |
+ * |U16 | U8, U32 |
+ * |S16 | U8, S32 |
+ * |BFLOAT16 | F32 |
+ * |F16 | QASYMM8, F32 |
+ * |F32 | QASYMM8, F16, BFLOAT16 |
+ *
+ * Input data type must be different than output data type.
*
* @param[in] input The input tensor to convert. Data types supported: QASYMM8/U8/U16/S16/BFLOAT16/F16/F32.
* @param[out] output The output tensor. Data types supported: QASYMM8/U8/U16/S16/U32/S32/BFLOAT16/F16/F32.
@@ -70,7 +83,15 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift = 0);
+ static Status
+ validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift = 0);
+
+ // Inherited methods overridden
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_NEDEPTHCONVERT_H*/
diff --git a/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h b/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h
index 3c21d1a33a..d27369670e 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,26 +21,48 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_NEDEPTHTOSPACELAYER_H
-#define ARM_COMPUTE_NEDEPTHTOSPACELAYER_H
+#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEDEPTHTOSPACELAYER_H
+#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEDEPTHTOSPACELAYER_H
+#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernel.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+#include <memory>
namespace arm_compute
{
// Forward declarations
class ITensor;
+class ITensorInfo;
+class NEDepthToSpaceLayerKernel;
/** Basic function to run @ref NEDepthToSpaceLayerKernel. */
-class NEDepthToSpaceLayer : public INESimpleFunctionNoBorder
+class NEDepthToSpaceLayer : public IFunction
{
public:
+ /** Constructor */
+ NEDepthToSpaceLayer();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEDepthToSpaceLayer(const NEDepthToSpaceLayer &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEDepthToSpaceLayer &operator=(const NEDepthToSpaceLayer &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEDepthToSpaceLayer(NEDepthToSpaceLayer &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEDepthToSpaceLayer &operator=(NEDepthToSpaceLayer &&) = delete;
+ /** Default destructor */
+ ~NEDepthToSpaceLayer();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |All |All |
+ *
* @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All
* @param[out] output Tensor output. Data types supported: same as @p input
* @param[in] block_shape Block shape value.
@@ -55,6 +77,11 @@ public:
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape);
+
+ void run() override;
+
+private:
+ std::unique_ptr<NEDepthToSpaceLayerKernel> _kernel;
};
} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEDEPTHTOSPACELAYER_H */
+#endif // ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEDEPTHTOSPACELAYER_H
diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
index 811dc82843..6ad5aa7bfa 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 ARM Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,18 +24,18 @@
#ifndef ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H
#define ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H
-#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h"
-#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
+#include "arm_compute/runtime/IMemoryManager.h"
+#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
#include "arm_compute/runtime/NEON/functions/NEPermute.h"
-#include "arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h"
+
+#include <memory>
namespace arm_compute
{
// Forward declarations
class ITensor;
+class NEDepthwiseConvolutionLayerNativeKernel;
/** Function to execute a depthwise convolution.
*/
@@ -52,8 +52,24 @@ public:
NEDepthwiseConvolutionLayer &operator=(const NEDepthwiseConvolutionLayer &) = delete;
/** Default move assignment operator */
NEDepthwiseConvolutionLayer &operator=(NEDepthwiseConvolutionLayer &&) = default;
+ /** Default destructor */
+ ~NEDepthwiseConvolutionLayer();
/** Initialize the function's source, destination, weights and convolution information.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:--------------|:------------------|:------|:--------------|
+ * |F16 |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |F32 |
+ * |QASYMM8 |QASYMM8 |S32 |QASYMM8 |
+ * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED |
+ * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED |
+ *
* @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32
* @param[out] output Destination tensor. Data type supported: same as @p input.
* @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
@@ -65,8 +81,14 @@ public:
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
*/
- void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
+ void configure(ITensor *input,
+ const ITensor *weights,
+ const ITensor *biases,
+ ITensor *output,
+ const PadStrideInfo &conv_info,
+ unsigned int depth_multiplier = 1,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ const Size2D &dilation = Size2D(1U, 1U));
/** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayer
*
@@ -83,40 +105,27 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *biases,
+ const ITensorInfo *output,
+ const PadStrideInfo &conv_info,
+ unsigned int depth_multiplier = 1,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ const Size2D &dilation = Size2D(1U, 1U));
// Inherited methods overriden:
void run() override;
void prepare() override;
private:
- /** Static function to choose the best depthwise convolution function for @ref NEDepthwiseConvolutionLayer
- *
- * @param[in] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32
- * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
- * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
- * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
- * @param[in] output Destination tensor. Data type supported: same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 quantized are supported.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- *
- * @return a Depthwise Convolution Function
- */
- static DepthwiseConvolutionFunction get_depthwiseconvolution_function(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
- const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
- ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
-
- /** Basic function to execute optimized depthwise convolution routines. This function calls the following NEON kernels:
+ /** Basic function to execute optimized depthwise convolution routines. This function calls the following kernels:
*
* @note At the moment 3x3 and 5x5 convolution of stride 1, 2 are supported
*
* -# @ref NEFillBorderKernel (if pad_x or pad_y > 0) and no assembly kernel implementation is present
* -# @ref NEDepthwiseConvolutionLayer3x3Kernel if 3x3 and no assembly kernel implementation is present
- * -# @ref NEDepthwiseConvolutionAssemblyDispatch if assembly kernel implementation is present
+ * -# @ref cpu::CpuDepthwiseConvolutionAssemblyDispatch if assembly kernel implementation is present
* -# @ref NEDirectConvolutionLayerOutputStageKernel if re-quantization of output is required
* -# @ref NEActivationLayer if fused activation is required
*
@@ -131,9 +140,13 @@ private:
/** Default move constructor */
NEDepthwiseConvolutionLayerOptimizedInternal(NEDepthwiseConvolutionLayerOptimizedInternal &&) = default;
/** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDepthwiseConvolutionLayerOptimizedInternal &operator=(const NEDepthwiseConvolutionLayerOptimizedInternal &) = delete;
+ NEDepthwiseConvolutionLayerOptimizedInternal &
+ operator=(const NEDepthwiseConvolutionLayerOptimizedInternal &) = delete;
/** Default move assignment operator */
- NEDepthwiseConvolutionLayerOptimizedInternal &operator=(NEDepthwiseConvolutionLayerOptimizedInternal &&) = default;
+ NEDepthwiseConvolutionLayerOptimizedInternal &
+ operator=(NEDepthwiseConvolutionLayerOptimizedInternal &&) = default;
+ /** Default destructor */
+ ~NEDepthwiseConvolutionLayerOptimizedInternal() = default;
/** Initialize the function's source, destination, kernels and border_size.
*
* @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling).
@@ -146,8 +159,14 @@ private:
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
*/
- void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
+ void configure(ITensor *input,
+ const ITensor *weights,
+ const ITensor *biases,
+ ITensor *output,
+ const PadStrideInfo &conv_info,
+ unsigned int depth_multiplier = 1,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ const Size2D &dilation = Size2D(1U, 1U));
/** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayer3x3
*
@@ -163,71 +182,26 @@ private:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *biases,
+ const ITensorInfo *output,
+ const PadStrideInfo &conv_info,
+ unsigned int depth_multiplier = 1,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ const Size2D &dilation = Size2D(1U, 1U));
// Inherited methods overriden:
void run() override;
void prepare() override;
private:
- /** Configure the kernels/functions for the generic pipeline.
- *
- * @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling).
- * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
- * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
- * @param[out] output Destination tensor. Data type supported: same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] act_info Activation layer information in case of a fused activation.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- *
- */
- void configure_generic(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation = Size2D(1U, 1U));
- /** Configure the kernels/functions for the optimized pipeline.
- *
- * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling).
- * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
- * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
- * @param[out] output Destination tensor. Data type supported: same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] act_info Activation layer information in case of a fused activation.
- */
- void configure_optimized(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation = Size2D(1U, 1U));
- /** Run generic kernel */
- void run_generic();
- /** Run optimized function */
- void run_optimized();
-
- MemoryGroup _memory_group;
- NEDepthwiseConvolutionLayer3x3Kernel _dwc_kernel;
- NEDepthwiseConvolutionAssemblyDispatch _dwc_optimized_func;
- NEDirectConvolutionLayerOutputStageKernel _output_stage_kernel;
- NEFillBorderKernel _border_handler;
- NEPermute _permute_input;
- NEPermute _permute_weights;
- NEPermute _permute_output;
- NEActivationLayer _activationlayer_function;
- Tensor _accumulator;
- Tensor _permuted_input;
- Tensor _permuted_weights;
- Tensor _permuted_output;
- const ITensor *_original_weights;
- bool _has_bias;
- bool _is_quantized;
- bool _is_optimized;
- bool _is_nchw;
- bool _permute;
- bool _is_activationlayer_enabled;
- bool _is_prepared;
+ MemoryGroup _memory_group;
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
- /** Basic function to execute a generic depthwise convolution. This function calls the following NEON kernel:
+ /** Basic function to execute a generic depthwise convolution. This function calls the following kernel:
*
* -# @ref NEDepthwiseConvolutionLayerNativeKernel
*
@@ -245,6 +219,8 @@ private:
NEDepthwiseConvolutionLayerGeneric &operator=(const NEDepthwiseConvolutionLayerGeneric &) = delete;
/** Default move assignment operator */
NEDepthwiseConvolutionLayerGeneric &operator=(NEDepthwiseConvolutionLayerGeneric &&) = default;
+ /** Default destructor */
+ ~NEDepthwiseConvolutionLayerGeneric() = default;
/** Initialize the function's source, destination, weights and convolution information.
*
* @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling).
@@ -258,8 +234,14 @@ private:
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
*/
- void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
+ void configure(ITensor *input,
+ const ITensor *weights,
+ const ITensor *biases,
+ ITensor *output,
+ const PadStrideInfo &conv_info,
+ unsigned int depth_multiplier = 1,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ const Size2D &dilation = Size2D(1U, 1U));
/** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayerGeneric
*
@@ -276,32 +258,25 @@ private:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *biases,
+ const ITensorInfo *output,
+ const PadStrideInfo &conv_info,
+ unsigned int depth_multiplier = 1,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ const Size2D &dilation = Size2D(1U, 1U));
// Inherited methods overriden:
void run() override;
- void prepare() override;
private:
- NEDepthwiseConvolutionLayerNativeKernel _depthwise_conv_kernel;
- NEFillBorderKernel _fill_border;
- NEPermute _permute_input;
- NEPermute _permute_weights;
- NEPermute _permute_output;
- NEActivationLayer _activationlayer_function;
- Tensor _permuted_input;
- Tensor _permuted_weights;
- Tensor _permuted_output;
- bool _is_prepared;
- bool _is_nchw;
- bool _is_activationlayer_enabled;
- const ITensor *_original_weights;
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
-
- DepthwiseConvolutionFunction _depth_conv_func;
- NEDepthwiseConvolutionLayerOptimizedInternal _func_optimized;
- NEDepthwiseConvolutionLayerGeneric _func_generic;
+ MemoryGroup _memory_group;
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H */ \ No newline at end of file
+#endif /* ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H */
diff --git a/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h b/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h
index f8d0ce8b2d..8b49930ef5 100644
--- a/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,33 +24,66 @@
#ifndef ARM_COMPUTE_NEDEQUANTIZATIONLAYER_H
#define ARM_COMPUTE_NEDEQUANTIZATIONLAYER_H
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
namespace arm_compute
{
// Forward declarations
class ITensor;
+class ITensorInfo;
-/** Basic function to run @ref NEDequantizationLayerKernel that dequantizes an input tensor */
-class NEDequantizationLayer : public INESimpleFunctionNoBorder
+/** Basic function to run @ref cpu::CpuDequantize that dequantizes an input tensor */
+class NEDequantizationLayer : public IFunction
{
public:
+ /** Default Constructor */
+ NEDequantizationLayer();
+ /** Default Destructor */
+ ~NEDequantizationLayer();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEDequantizationLayer(const NEDequantizationLayer &) = delete;
+ /** Default move constructor */
+ NEDequantizationLayer(NEDequantizationLayer &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEDequantizationLayer &operator=(const NEDequantizationLayer &) = delete;
+ /** Default move assignment operator */
+ NEDequantizationLayer &operator=(NEDequantizationLayer &&) = default;
/** Configure the kernel.
*
- * @param[in] input Source tensor. Data types supported: QASYMM8/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:------------------|:-----------|
+ * |QASYMM8 |F16, F32 |
+ * |QASYMM8_SIGNED |F16, F32 |
+ * |QSYMM8_PER_CHANNEL |F16, F32 |
+ * |QSYMM8 |F16, F32 |
+ * |QSYMM16 |F16, F32 |
+ *
+ * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
* @param[out] output Destination tensor with the same dimensions of input. Data type supported: F16/F32.
*/
void configure(const ITensor *input, ITensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref NEDequantizationLayer
*
- * @param[in] input Input tensor info. Data types supported: QASYMM8/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
+ * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
* @param[in] output Output tensor info. Data type supported: F16/F32.
*
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEDEQUANTIZATIONLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEDerivative.h b/arm_compute/runtime/NEON/functions/NEDerivative.h
deleted file mode 100644
index 65d0654612..0000000000
--- a/arm_compute/runtime/NEON/functions/NEDerivative.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEDERIVATIVE_H
-#define ARM_COMPUTE_NEDERIVATIVE_H
-
-#include "arm_compute/core/NEON/kernels/NEDerivativeKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/IFunction.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to execute first order derivative operator. This function calls the following NEON kernels:
- *
- * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref NEDerivativeKernel
- *
- */
-class NEDerivative : public IFunction
-{
-public:
- /** Default constructor */
- NEDerivative();
- /** Initialise the function's source, destinations and border mode.
- *
- * @note At least one of output_x or output_y must be not NULL.
- *
- * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output_x (optional) Destination tensor. Derivative along the X direction. Data type supported: S16.
- * @param[out] output_y (optional) Destination tensor. Derivative along the Y direction. Data type supported: S16.
- * @param[in] border_mode Border mode to use
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- *
- */
- void configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- NEDerivativeKernel _kernel; /**< Derivative kernel */
- NEFillBorderKernel _border_handler; /**< Kernel to handle tensor borders */
-};
-}
-#endif /* ARM_COMPUTE_NEDERIVATIVE_H */
diff --git a/arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h b/arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h
index d616762a5a..7a94833d10 100644
--- a/arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,13 +24,12 @@
#ifndef ARM_COMPUTE_NE_DETECTION_POSTPROCESS_H
#define ARM_COMPUTE_NE_DETECTION_POSTPROCESS_H
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
-
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CPP/functions/CPPDetectionPostProcessLayer.h"
#include "arm_compute/runtime/IMemoryManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/NEON/functions/NEDequantizationLayer.h"
+#include "arm_compute/runtime/NEON/INESimpleFunction.h"
#include "arm_compute/runtime/Tensor.h"
#include <map>
@@ -53,11 +52,23 @@ public:
NEDetectionPostProcessLayer(const NEDetectionPostProcessLayer &) = delete;
/** Prevent instances of this class from being copied (As this class contains pointers) */
NEDetectionPostProcessLayer &operator=(const NEDetectionPostProcessLayer &) = delete;
+ /** Default destructor */
+ ~NEDetectionPostProcessLayer() = default;
/** Configure the detection output layer NE function
*
- * @param[in] input_box_encoding The bounding box input tensor. Data types supported: F32, QASYMM8.
- * @param[in] input_score The class prediction input tensor. Data types supported: Same as @p input_box_encoding.
- * @param[in] input_anchors The anchors input tensor. Data types supported: Same as @p input_box_encoding.
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 - src2 |dst0 - dst3 |
+ * |:--------------|:--------------|
+ * |QASYMM8 |F32 |
+ * |QASYMM8_SIGNED |F32 |
+ * |F32 |F32 |
+ *
+ * @param[in] input_box_encoding The bounding box input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F32.
+ * @param[in] input_score The class prediction input tensor. Data types supported: same as @p input_box_encoding.
+ * @param[in] input_anchors The anchors input tensor. Data types supported: same as @p input_box_encoding.
* @param[out] output_boxes The boxes output tensor. Data types supported: F32.
* @param[out] output_classes The classes output tensor. Data types supported: Same as @p output_boxes.
* @param[out] output_scores The scores output tensor. Data types supported: Same as @p output_boxes.
@@ -66,23 +77,34 @@ public:
*
* @note Output contains all the detections. Of those, only the ones selected by the valid region are valid.
*/
- void configure(const ITensor *input_box_encoding, const ITensor *input_score, const ITensor *input_anchors,
- ITensor *output_boxes, ITensor *output_classes, ITensor *output_scores, ITensor *num_detection, DetectionPostProcessLayerInfo info = DetectionPostProcessLayerInfo());
+ void configure(const ITensor *input_box_encoding,
+ const ITensor *input_score,
+ const ITensor *input_anchors,
+ ITensor *output_boxes,
+ ITensor *output_classes,
+ ITensor *output_scores,
+ ITensor *num_detection,
+ DetectionPostProcessLayerInfo info = DetectionPostProcessLayerInfo());
/** Static function to check if given info will lead to a valid configuration of @ref NEDetectionPostProcessLayer
*
- * @param[in] input_box_encoding The bounding box input tensor info. Data types supported: F32, QASYMM8.
- * @param[in] input_class_score The class prediction input tensor info. Data types supported: F32, QASYMM8.
- * @param[in] input_anchors The anchors input tensor. Data types supported: F32, QASYMM8.
- * @param[out] output_boxes The output tensor. Data types supported: F32.
- * @param[out] output_classes The output tensor. Data types supported: Same as @p output_boxes.
- * @param[out] output_scores The output tensor. Data types supported: Same as @p output_boxes.
- * @param[out] num_detection The number of output detection. Data types supported: Same as @p output_boxes.
- * @param[in] info (Optional) DetectionPostProcessLayerInfo information.
+ * @param[in] input_box_encoding The bounding box input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F32.
+ * @param[in] input_class_score The class prediction input tensor info. Data types supported: same as @p input_box_encoding.
+ * @param[in] input_anchors The anchors input tensor info. Data types supported: same as @p input_box_encoding.
+ * @param[in] output_boxes The output tensor info. Data types supported: F32.
+ * @param[in] output_classes The output tensor info. Data types supported: Same as @p output_boxes.
+ * @param[in] output_scores The output tensor info. Data types supported: Same as @p output_boxes.
+ * @param[in] num_detection The number of output detection tensor info. Data types supported: Same as @p output_boxes.
+ * @param[in] info (Optional) DetectionPostProcessLayerInfo information.
*
* @return a status
*/
- static Status validate(const ITensorInfo *input_box_encoding, const ITensorInfo *input_class_score, const ITensorInfo *input_anchors,
- ITensorInfo *output_boxes, ITensorInfo *output_classes, ITensorInfo *output_scores, ITensorInfo *num_detection,
+ static Status validate(const ITensorInfo *input_box_encoding,
+ const ITensorInfo *input_class_score,
+ const ITensorInfo *input_anchors,
+ ITensorInfo *output_boxes,
+ ITensorInfo *output_classes,
+ ITensorInfo *output_scores,
+ ITensorInfo *num_detection,
DetectionPostProcessLayerInfo info = DetectionPostProcessLayerInfo());
// Inherited methods overridden:
void run() override;
diff --git a/arm_compute/runtime/NEON/functions/NEDilate.h b/arm_compute/runtime/NEON/functions/NEDilate.h
deleted file mode 100644
index 39a37af74c..0000000000
--- a/arm_compute/runtime/NEON/functions/NEDilate.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEDILATE_H
-#define ARM_COMPUTE_NEDILATE_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to execute dilate. This function calls the following NEON kernels:
- *
- * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref NEDilateKernel
- *
- */
-class NEDilate : public INESimpleFunction
-{
-public:
- /** Initialise the kernel's inputs, output and border mode.
- *
- * @param[in, out] input First tensor input. Data type supported: U8.(Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Output tensor. Data type supported: U8.
- * @param[in] border_mode Border mode to use for the convolution.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value = 0);
-};
-}
-#endif /*ARM_COMPUTE_NEDILATE_H */
diff --git a/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h
index 68454be88f..3ae3b2a15c 100644
--- a/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,35 +24,51 @@
#ifndef ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYER_H
#define ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYER_H
-#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
#include "arm_compute/core/Types.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
-#include "arm_compute/runtime/Tensor.h"
#include <memory>
namespace arm_compute
{
+class ITensor;
+class ITensorInfo;
/** Function to run the direct convolution.
*
- * This function calls the following NEON kernels:
+ * This function calls the following:
*
- * -# @ref NEFillBorderKernel for the input
- * -# @ref NEDirectConvolutionLayerOutputStageKernel
- * -# @ref NEDirectConvolutionLayerKernel
+ * -# @ref cpu::CpuDirectConv2d
*/
class NEDirectConvolutionLayer : public IFunction
{
public:
/** Constructor */
NEDirectConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEDirectConvolutionLayer(const NEDirectConvolutionLayer &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEDirectConvolutionLayer &operator=(const NEDirectConvolutionLayer &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEDirectConvolutionLayer(NEDirectConvolutionLayer &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEDirectConvolutionLayer &operator=(NEDirectConvolutionLayer &&) = delete;
+ /** Default destructor */
+ ~NEDirectConvolutionLayer();
/** Set the input, weights, biases and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:------|:------|:------|:------|
+ * |F16 |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |F32 |
+ *
* @note: DirectConvolution only works in the following configurations:
* 1x1 convolution with stride_x = 1/2/3, stride_y = 1/2/3 data type = F16/F32
* 3x3 convolution with stride_x = 1/2/3, stride_y = 1/2/3 data type = F16/F32
@@ -69,7 +85,12 @@ public:
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
*/
- void configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ void configure(ITensor *input,
+ const ITensor *weights,
+ const ITensor *bias,
+ ITensor *output,
+ const PadStrideInfo &conv_info,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
/** Static function to check if given info will lead to a valid configuration of @ref NEDirectConvolutionLayer
*
* @note: DirectConvolution only works in the following configurations:
@@ -90,22 +111,20 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output, const PadStrideInfo &conv_info,
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *bias,
+ const ITensorInfo *output,
+ const PadStrideInfo &conv_info,
const ActivationLayerInfo &act_info = ActivationLayerInfo());
// Inherited methods overridden:
void run() override;
private:
- MemoryGroup _memory_group;
- NEDirectConvolutionLayerOutputStageKernel _output_stage_kernel;
- NEDirectConvolutionLayerKernel _conv_kernel;
- NEFillBorderKernel _input_border_handler;
- NEActivationLayer _activationlayer_function;
- Tensor _accumulator;
- bool _has_bias;
- bool _is_activationlayer_enabled;
- unsigned int _dim_split;
+ struct Impl;
+ std::shared_ptr<IMemoryManager> _memory_manager;
+ std::unique_ptr<Impl> _impl;
};
-}
+} // namespace arm_compute
#endif /* ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h b/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h
index cac105cdb9..ebf2277d1f 100644
--- a/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h
+++ b/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 ARM Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,29 +25,59 @@
#define ARM_COMPUTE_NEELEMENTWISEOPERATIONS_H
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/NEON/INEOperator.h"
namespace arm_compute
{
class ITensor;
-/** Basic function to run @ref NEArithmeticOperationKernel for max
+/** Basic function to run @ref cpu::kernels::CpuArithmeticKernel for max
*
* @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
* @note The function performs a max operation between two tensors.
*/
-class NEElementwiseMax : public INESimpleFunction
+class NEElementwiseMax : public IFunction
{
public:
+ /** Default Constructor */
+ NEElementwiseMax();
+ /** Default Destructor */
+ ~NEElementwiseMax();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEElementwiseMax(const NEElementwiseMax &) = delete;
+ /** Default move constructor */
+ NEElementwiseMax(NEElementwiseMax &&);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEElementwiseMax &operator=(const NEElementwiseMax &) = delete;
+ /** Default move assignment operator */
+ NEElementwiseMax &operator=(NEElementwiseMax &&);
/** Initialise the kernel's inputs, output and conversion policy.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |S32 |S32 |S32 |
+ * |S16 |S16 |S16 |
+ * |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |
+ *
* @param[in, out] input1 First tensor input. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
* @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1.
* @param[out] output Output tensor. Data types supported: Same as @p input1.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
*/
- void configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
- /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel for max
+ void configure(ITensor *input1,
+ ITensor *input2,
+ ITensor *output,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuArithmeticKernel for max
*
* @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
* @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
@@ -56,26 +86,64 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ static Status validate(const ITensorInfo *input1,
+ const ITensorInfo *input2,
+ const ITensorInfo *output,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
-/** Basic function to run @ref NEArithmeticOperationKernel for min
+/** Basic function to run @ref cpu::kernels::CpuArithmeticKernel for min
*
* @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
* @note The function performs a min operation between two tensors.
*/
-class NEElementwiseMin : public INESimpleFunction
+class NEElementwiseMin : public IFunction
{
public:
+ /** Default Constructor */
+ NEElementwiseMin();
+ /** Default Destructor */
+ ~NEElementwiseMin();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEElementwiseMin(const NEElementwiseMin &) = delete;
+ /** Default move constructor */
+ NEElementwiseMin(NEElementwiseMin &&);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEElementwiseMin &operator=(const NEElementwiseMin &) = delete;
+ /** Default move assignment operator */
+ NEElementwiseMin &operator=(NEElementwiseMin &&);
/** Initialise the kernel's inputs, output and conversion policy.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |S32 |S32 |S32 |
+ * |S16 |S16 |S16 |
+ * |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |
+ *
* @param[in, out] input1 First tensor input. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
* @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1.
* @param[out] output Output tensor. Data types supported: Same as @p input1.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
*/
- void configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
- /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel for min
+ void configure(ITensor *input1,
+ ITensor *input2,
+ ITensor *output,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuArithmeticKernel for min
*
* @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
* @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
@@ -84,26 +152,64 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ static Status validate(const ITensorInfo *input1,
+ const ITensorInfo *input2,
+ const ITensorInfo *output,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
-/** Basic function to run @ref NEArithmeticOperationKernel for squared difference
+/** Basic function to run @ref cpu::kernels::CpuArithmeticKernel for squared difference
*
* @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
* @note The function performs a squared different operation between two tensors (i.e., out[i] = (in1[i] - in2[i])^2
*/
-class NEElementwiseSquaredDiff : public INESimpleFunction
+class NEElementwiseSquaredDiff : public IFunction
{
public:
+ /** Default Constructor */
+ NEElementwiseSquaredDiff();
+ /** Default Destructor */
+ ~NEElementwiseSquaredDiff();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEElementwiseSquaredDiff(const NEElementwiseSquaredDiff &) = delete;
+ /** Default move constructor */
+ NEElementwiseSquaredDiff(NEElementwiseSquaredDiff &&);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEElementwiseSquaredDiff &operator=(const NEElementwiseSquaredDiff &) = delete;
+ /** Default move assignment operator */
+ NEElementwiseSquaredDiff &operator=(NEElementwiseSquaredDiff &&);
/** Initialise the kernel's inputs, output and conversion policy.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |S32 |S32 |S32 |
+ * |S16 |S16 |S16 |
+ * |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |
+ *
* @param[in, out] input1 First tensor input. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
* @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1.
* @param[out] output Output tensor. Data types supported: Same as @p input1.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
*/
- void configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
- /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel for squared difference
+ void configure(ITensor *input1,
+ ITensor *input2,
+ ITensor *output,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuArithmeticKernel for squared difference
*
* @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
* @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
@@ -112,26 +218,60 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ static Status validate(const ITensorInfo *input1,
+ const ITensorInfo *input2,
+ const ITensorInfo *output,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
-/** Basic function to run @ref NEArithmeticOperationKernel for division
+/** Basic function to run @ref cpu::kernels::CpuArithmeticKernel for division
*
* @note The tensor data type for the inputs must be F16/F32.
* @note The function performs a squared different operation between two tensors (i.e., out[i] = in1[i] / in2[i])
*/
-class NEElementwiseDivision : public INESimpleFunction
+class NEElementwiseDivision : public IFunction
{
public:
+ /** Default Constructor */
+ NEElementwiseDivision();
+ /** Default Destructor */
+ ~NEElementwiseDivision();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEElementwiseDivision(const NEElementwiseDivision &) = delete;
+ /** Default move constructor */
+ NEElementwiseDivision(NEElementwiseDivision &&);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEElementwiseDivision &operator=(const NEElementwiseDivision &) = delete;
+ /** Default move assignment operator */
+ NEElementwiseDivision &operator=(NEElementwiseDivision &&);
/** Initialise the kernel's inputs, output and conversion policy.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:--------------|:--------------|
+ * |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |
+ *
* @param[in, out] input1 First tensor input. Data types supported: F16/F32.
* @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1.
* @param[out] output Output tensor. Data types supported: Same as @p input1.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
*/
- void configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
- /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel for division
+ void configure(ITensor *input1,
+ ITensor *input2,
+ ITensor *output,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuArithmeticKernel for division
*
* @param[in] input1 First tensor input info. Data types supported: F16/F32.
* @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
@@ -140,27 +280,61 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ static Status validate(const ITensorInfo *input1,
+ const ITensorInfo *input2,
+ const ITensorInfo *output,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
-/** Basic function to run @ref NEArithmeticOperationKernel for power
+/** Basic function to run @ref cpu::kernels::CpuArithmeticKernel for power
*
* @note The tensor data type for the inputs must be F16/F32.
* @note The function performs a elementwise power of in1 to in2 (i.e., out[i] = in1[i] ^ in2[i])
* @note For an exponent that is a float, this function will only work with a positive base.
*/
-class NEElementwisePower : public INESimpleFunction
+class NEElementwisePower : public IFunction
{
public:
+ /** Default Constructor */
+ NEElementwisePower();
+ /** Default Destructor */
+ ~NEElementwisePower();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEElementwisePower(const NEElementwisePower &) = delete;
+ /** Default move constructor */
+ NEElementwisePower(NEElementwisePower &&);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEElementwisePower &operator=(const NEElementwisePower &) = delete;
+ /** Default move assignment operator */
+ NEElementwisePower &operator=(NEElementwisePower &&);
/** Initialise the kernel's inputs, output and conversion policy.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:--------------|:--------------|
+ * |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |
+ *
* @param[in, out] input1 First tensor input. Data types supported: F16/F32.
* @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1.
* @param[out] output Output tensor. Data types supported: Same as @p input1.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
*/
- void configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
- /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel for power
+ void configure(ITensor *input1,
+ ITensor *input2,
+ ITensor *output,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuArithmeticKernel for power
*
* @param[in] input1 First tensor input info. Data types supported: F16/F32.
* @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
@@ -169,62 +343,125 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ static Status validate(const ITensorInfo *input1,
+ const ITensorInfo *input2,
+ const ITensorInfo *output,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
-/** Basic function to run @ref NEComparisonOperationKernel.
+/** Basic function to run @ref cpu::kernels::CpuComparisonKernel.
*
- * @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
+ * @note The tensor data type for the inputs must be U8/QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
* @note The function performs a comparison operation between two tensors.
*/
-class NEElementwiseComparison : public INESimpleFunction
+class NEElementwiseComparison : public IFunction
{
public:
+ /** Default Constructor */
+ NEElementwiseComparison();
+ /** Default Destructor */
+ ~NEElementwiseComparison();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEElementwiseComparison(const NEElementwiseComparison &) = delete;
+ /** Default move constructor */
+ NEElementwiseComparison(NEElementwiseComparison &&);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEElementwiseComparison &operator=(const NEElementwiseComparison &) = delete;
+ /** Default move assignment operator */
+ NEElementwiseComparison &operator=(NEElementwiseComparison &&);
/** Initialise the kernel's inputs, output and conversion policy.
*
- * @param[in, out] input1 First tensor input. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:--------------|:-----|
+ * |QASYMM8 |QASYMM8 |U8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |U8 |
+ * |S32 |S32 |U8 |
+ * |U8 |U8 |U8 |
+ * |S16 |S16 |U8 |
+ * |F16 |F16 |U8 |
+ * |F32 |F32 |U8 |
+ *
+ * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
* @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1.
- * @param[out] output Output tensor. Data types supported: U16/U32.
+ * @param[out] output Output tensor. Data types supported: U8.
* @param[in] op Comparison Operation to be performed.
*/
void configure(ITensor *input1, ITensor *input2, ITensor *output, ComparisonOperation op);
- /** Static function to check if given info will lead to a valid configuration of @ref NEComparisonOperationKernel
+ /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuComparisonKernel
*
- * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
+ * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
* @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[in] output Output tensor info. Data types supported: U16/U32.
+ * @param[in] output Output tensor info. Data types supported: U8.
* @param[in] op Comparison Operation to be performed.
*
* @return a status
*/
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ComparisonOperation op);
+ static Status
+ validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ComparisonOperation op);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
-/** Basic function to run @ref NEComparisonOperationKernel
+/** Basic function to run @ref cpu::kernels::CpuComparisonKernel
*
- * @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
+ * @note The tensor data type for the inputs must be U8/QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
* @note The function performs a comparison operation between two tensors.
*/
template <ComparisonOperation op>
-class NEElementwiseComparisonStatic : public INESimpleFunction
+class NEElementwiseComparisonStatic : public IFunction
{
public:
+ /** Default Constructor */
+ NEElementwiseComparisonStatic();
+ /** Default Destructor */
+ ~NEElementwiseComparisonStatic();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEElementwiseComparisonStatic(const NEElementwiseComparisonStatic &) = delete;
+ /** Default move constructor */
+ NEElementwiseComparisonStatic(NEElementwiseComparisonStatic &&);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEElementwiseComparisonStatic &operator=(const NEElementwiseComparisonStatic &) = delete;
+ /** Default move assignment operator */
+ NEElementwiseComparisonStatic &operator=(NEElementwiseComparisonStatic &&);
/** Initialise the kernel's inputs, output and conversion policy.
*
- * @param[in, out] input1 First tensor input. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
+ * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
* @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1.
* @param[out] output Output tensor. Data types supported: U16/U32.
*/
void configure(ITensor *input1, ITensor *input2, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEComparisonOperationKernel
+ /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuComparisonKernel
*
- * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
+ * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
* @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
* @param[in] output Output tensor info. Data types supported: U16/U32.
*
* @return a status
*/
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
/** Basic function to run equal comparison. */
@@ -239,5 +476,6 @@ using NEGreaterEqual = NEElementwiseComparisonStatic<ComparisonOperation::Greate
using NELess = NEElementwiseComparisonStatic<ComparisonOperation::Less>;
/** Basic function to run less-equal comparison. */
using NELessEqual = NEElementwiseComparisonStatic<ComparisonOperation::LessEqual>;
+
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEELEMENTWISEOPERATIONS_H */
diff --git a/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h b/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h
index 094f875b35..63e47b8377 100644
--- a/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,150 +24,73 @@
#ifndef ARM_COMPUTE_NEELEMENTWISEUNARYLAYER_H
#define ARM_COMPUTE_NEELEMENTWISEUNARYLAYER_H
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
namespace arm_compute
{
class ITensor;
-
-/** Basic function to perform inverse square root on an input tensor. */
-class NERsqrtLayer : public INESimpleFunction
-{
-public:
- /** Initialize the function
- *
- * @param[in] input Input tensor. Data types supported: F16/F32.
- * @param[out] output Output tensor. Data types supported: same as @p input.
- */
- void configure(const ITensor *input, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NERsqrtLayer
- *
- * @param[in] input First tensor input info. Data types supported: F16/F32.
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-};
-
-/** Basic function to perform exponential on an input tensor. */
-class NEExpLayer : public INESimpleFunction
+class ITensorInfo;
+/** Basic function to perform unary elementwise operations */
+template <ElementWiseUnary op>
+class NEElementwiseUnaryLayer : public IFunction
{
public:
- /** Initialize the function
- *
- * @param[in] input Input tensor. Data types supported: F16/F32.
- * @param[out] output Output tensor. Data types supported: same as @p input.
- */
- void configure(const ITensor *input, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEExpLayer
- *
- * @param[in] input First tensor input info. Data types supported: F16/F32.
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-};
+ /** Default Constructor */
+ NEElementwiseUnaryLayer();
+ /** Default Destructor */
+ ~NEElementwiseUnaryLayer();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEElementwiseUnaryLayer(const NEElementwiseUnaryLayer &) = delete;
+ /** Default move constructor */
+ NEElementwiseUnaryLayer(NEElementwiseUnaryLayer &&);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEElementwiseUnaryLayer &operator=(const NEElementwiseUnaryLayer &) = delete;
+ /** Default move assignment operator */
+ NEElementwiseUnaryLayer &operator=(NEElementwiseUnaryLayer &&);
-/** Basic function to negate an input tensor. */
-class NENegLayer : public INESimpleFunction
-{
-public:
/** Initialize the function
*
- * @param[in] input Input tensor. Data types supported: F16/F32/S32.
- * @param[out] output Output tensor. Data types supported: same as @p input.
- */
- void configure(const ITensor *input, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NENegLayer
- *
- * @param[in] input First tensor input info. Data types supported: F16/F32/S32.
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
+ * Valid data layouts:
+ * - All
*
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-};
-
-/** Basic function to compute the natural logarithm of an input tensor. */
-class NELogLayer : public INESimpleFunction
-{
-public:
- /** Initialize the function
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |F16 |F16 |
+ * |F32 |F32 |
+ * |S32 |S32 |
*
- * @param[in] input Input tensor. Data types supported: F16/F32/S32.
- * @param[out] output Output tensor. Data types supported: same as @p input.
+ * @param[in] input Input tensor. Data types supported: F16/F32, F16/F32/S32 for NEG/ABS operations.
+ * @param[out] output Output tensor. Data types supported: Same as @p input.
*/
void configure(const ITensor *input, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NELogLayer
+ /** Static function to check if given info will lead to a valid configuration
*
- * @param[in] input First tensor input info. Data types supported: F16/F32/S32.
+ * @param[in] input Input tensor info. Data types supported: F16/F32, F16/F32/S32 for NEG/ABS operations.
* @param[in] output Output tensor info. Data types supported: Same as @p input.
*
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-};
+ // Inherited methods overridden:
+ void run() override;
-/** Basic function to compute the absolute value of an input tensor. */
-class NEAbsLayer : public INESimpleFunction
-{
-public:
- /** Initialize the function
- *
- * @param[in] input Input tensor. Data types supported: F16/F32/S32.
- * @param[out] output Output tensor. Data types supported: same as @p input.
- */
- void configure(const ITensor *input, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEAbsLayer
- *
- * @param[in] input First tensor input info. Data types supported: F16/F32/S32.
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
-/** Basic function to compute the round value elementwise of an input tensor. */
-class NERoundLayer : public INESimpleFunction
-{
-public:
- /** Initialize the function
- *
- * @param[in] input Input tensor. Data types supported: F16/F32.
- * @param[out] output Output tensor. Data types supported: same as @p input.
- */
- void configure(const ITensor *input, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NERoundLayer
- *
- * @param[in] input First tensor input info. Data types supported: F16/F32.
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-};
+using NERsqrtLayer = NEElementwiseUnaryLayer<ElementWiseUnary::RSQRT>;
+using NEExpLayer = NEElementwiseUnaryLayer<ElementWiseUnary::EXP>;
+using NENegLayer = NEElementwiseUnaryLayer<ElementWiseUnary::NEG>;
+using NELogLayer = NEElementwiseUnaryLayer<ElementWiseUnary::LOG>;
+using NEAbsLayer = NEElementwiseUnaryLayer<ElementWiseUnary::ABS>;
+using NERoundLayer = NEElementwiseUnaryLayer<ElementWiseUnary::ROUND>;
+using NESinLayer = NEElementwiseUnaryLayer<ElementWiseUnary::SIN>;
-/** Basic function to compute the sine of an input tensor. */
-class NESinLayer : public INESimpleFunction
-{
-public:
- /** Initialize the function
- *
- * @param[in] input Input tensor. Data types supported: F16/F32.
- * @param[out] output Output tensor. Data types supported: same as @p input.
- */
- void configure(const ITensor *input, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NESinLayer
- *
- * @param[in] input First tensor input info. Data types supported: F16/F32.
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEELEMENTWISEUNARYLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h b/arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h
deleted file mode 100644
index e9d58f3e0c..0000000000
--- a/arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEEQUALIZEHISTOGRAM_H
-#define ARM_COMPUTE_NEEQUALIZEHISTOGRAM_H
-
-#include "arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEHistogramKernel.h"
-#include "arm_compute/core/NEON/kernels/NETableLookupKernel.h"
-#include "arm_compute/runtime/Distribution1D.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/Lut.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-using IImage = ITensor;
-
-/** Basic function to execute histogram equalization. This function calls the following NEON kernels:
- *
- * -# @ref NEHistogramKernel
- * -# @ref NECumulativeDistributionKernel
- * -# @ref NETableLookupKernel
- *
- */
-class NEEqualizeHistogram : public IFunction
-{
-public:
- /** Default Constructor. */
- NEEqualizeHistogram();
- /** Initialise the kernel's inputs.
- *
- * @note Currently the width of the input image must be a multiple of 16.
- *
- * @param[in] input Input image. Data type supported: U8.
- * @param[out] output Output image. Data type supported: same as @p input
- */
- void configure(const IImage *input, IImage *output);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- NEHistogramKernel _histogram_kernel; /**< Kernel that calculates the histogram of input. */
- NECumulativeDistributionKernel _cd_histogram_kernel; /**< Kernel that calculates the cumulative distribution
- and creates the relevant LookupTable. */
- NETableLookupKernel _map_histogram_kernel; /**< Kernel that maps the input to output using the lut. */
- Distribution1D _hist; /**< Distribution that holds the histogram of the input image. */
- Distribution1D _cum_dist; /**< Distribution that holds the cummulative distribution of the input histogram. */
- Lut _cd_lut; /**< Holds the equalization lookuptable. */
- static constexpr uint32_t nr_bins{ 256 }; /**< Histogram bins of the internal histograms. */
- static constexpr uint32_t max_range{ nr_bins - 1 }; /**< Histogram range of the internal histograms. */
-};
-}
-#endif /*ARM_COMPUTE_NEEQUALIZEHISTOGRAM_H */
diff --git a/arm_compute/runtime/NEON/functions/NEErode.h b/arm_compute/runtime/NEON/functions/NEErode.h
deleted file mode 100644
index 1d6ea42e16..0000000000
--- a/arm_compute/runtime/NEON/functions/NEErode.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEERODE_H
-#define ARM_COMPUTE_NEERODE_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to execute erode. This function calls the following NEON kernels:
- *
- * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref NEErodeKernel
- *
- */
-class NEErode : public INESimpleFunction
-{
-public:
- /** Initialise the kernel's inputs, output and border mode
- *
- * @param[in, out] input First tensor input. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Output tensor. Data type supported: U8.
- * @param[in] border_mode Border mode to use for the convolution.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value = 0);
-};
-}
-#endif /*ARM_COMPUTE_NEERODE_H */
diff --git a/arm_compute/runtime/NEON/functions/NEFFT1D.h b/arm_compute/runtime/NEON/functions/NEFFT1D.h
index c710b937b4..99c6fd4eb4 100644
--- a/arm_compute/runtime/NEON/functions/NEFFT1D.h
+++ b/arm_compute/runtime/NEON/functions/NEFFT1D.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,21 +24,22 @@
#ifndef ARM_COMPUTE_NEFFT1D_H
#define ARM_COMPUTE_NEFFT1D_H
-#include "arm_compute/core/NEON/kernels/NEFFTDigitReverseKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFFTRadixStageKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFFTScaleKernel.h"
-#include "arm_compute/runtime/IFunction.h"
-
#include "arm_compute/runtime/FunctionDescriptors.h"
+#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/Tensor.h"
+#include <memory>
+
namespace arm_compute
{
// Forward declaration
class ITensor;
+class NEFFTDigitReverseKernel;
+class NEFFTRadixStageKernel;
+class NEFFTScaleKernel;
-/** Basic function to execute one dimensional FFT. This function calls the following NEON kernels:
+/** Basic function to execute one dimensional FFT. This function calls the following kernels:
*
* -# @ref NEFFTDigitReverseKernel Performs digit reverse
* -# @ref NEFFTRadixStageKernel A list of FFT kernels depending on the radix decomposition
@@ -49,8 +50,26 @@ class NEFFT1D : public IFunction
public:
/** Default Constructor */
NEFFT1D(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEFFT1D(const NEFFT1D &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEFFT1D &operator=(const NEFFT1D &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEFFT1D(NEFFT1D &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEFFT1D &operator=(NEFFT1D &&) = delete;
+ /** Default destructor */
+ ~NEFFT1D();
/** Initialise the function's source and destinations.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:------|:------|
+ * |F32 |F32 |
+ *
* @param[in] input Source tensor. Data types supported: F32. Number of channels supported: 1 (real tensor) or 2 (complex tensor).
* @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input.
* Number of channels supported: 1 (real tensor) or 2 (complex tensor).If @p input is real, @p output must be complex.
@@ -71,15 +90,15 @@ public:
void run() override;
protected:
- MemoryGroup _memory_group;
- NEFFTDigitReverseKernel _digit_reverse_kernel;
- std::vector<NEFFTRadixStageKernel> _fft_kernels;
- NEFFTScaleKernel _scale_kernel;
- Tensor _digit_reversed_input;
- Tensor _digit_reverse_indices;
- unsigned int _num_ffts;
- unsigned int _axis;
- bool _run_scale;
+ MemoryGroup _memory_group;
+ std::unique_ptr<NEFFTDigitReverseKernel> _digit_reverse_kernel;
+ std::vector<std::unique_ptr<NEFFTRadixStageKernel>> _fft_kernels;
+ std::unique_ptr<NEFFTScaleKernel> _scale_kernel;
+ Tensor _digit_reversed_input;
+ Tensor _digit_reverse_indices;
+ unsigned int _num_ffts;
+ unsigned int _axis;
+ bool _run_scale;
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_NEFFT1D_H */
diff --git a/arm_compute/runtime/NEON/functions/NEFFT2D.h b/arm_compute/runtime/NEON/functions/NEFFT2D.h
index e25ebb9e80..cefd3df17a 100644
--- a/arm_compute/runtime/NEON/functions/NEFFT2D.h
+++ b/arm_compute/runtime/NEON/functions/NEFFT2D.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,9 +24,8 @@
#ifndef ARM_COMPUTE_NEFFT2D_H
#define ARM_COMPUTE_NEFFT2D_H
-#include "arm_compute/runtime/IFunction.h"
-
#include "arm_compute/runtime/FunctionDescriptors.h"
+#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/NEON/functions/NEFFT1D.h"
#include "arm_compute/runtime/Tensor.h"
@@ -36,7 +35,7 @@ namespace arm_compute
// Forward declaration
class ITensor;
-/** Basic function to execute two dimensional FFT. This function calls the following NEON kernels:
+/** Basic function to execute two dimensional FFT. This function calls the following kernels:
*
* -# @ref NEFFT1D 1D FFT is performed on the first given axis
* -# @ref NEFFT1D 1D FFT is performed on the second given axis
@@ -46,8 +45,26 @@ class NEFFT2D : public IFunction
public:
/** Default Constructor */
NEFFT2D(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEFFT2D(const NEFFT2D &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEFFT2D &operator=(const NEFFT2D &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEFFT2D(NEFFT2D &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEFFT2D &operator=(NEFFT2D &&) = delete;
+ /** Default destructor */
+ ~NEFFT2D();
/** Initialise the function's source and destinations
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:------|:------|
+ * |F32 |F32 |
+ *
* @param[in] input Source tensor. Data types supported: F32.
* @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input.
* @param[in] config FFT related configuration
diff --git a/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h
index 23df459e04..84bfe6b02f 100644
--- a/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,9 +24,8 @@
#ifndef ARM_COMPUTE_NEFFTCONVOLUTIONLAYER_H
#define ARM_COMPUTE_NEFFTCONVOLUTIONLAYER_H
-#include "arm_compute/runtime/IFunction.h"
-
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
#include "arm_compute/runtime/NEON/functions/NEFFT2D.h"
@@ -43,7 +42,7 @@ namespace arm_compute
// Forward declarations
class ITensor;
-/** Basic function to execute FFT-based convolution on NEON. This function calls the following NEON functions/kernels:
+/** Basic function to execute FFT-based convolution on CPU. This function calls the following functions/kernels:
*
* -# @ref NEPermute Permute input if NHWC(only NCHW is supported).
* -# @ref NEPadLayer Pad input.
@@ -63,46 +62,68 @@ public:
NEFFTConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
/** Prevent instances of this class from being copied (As this class contains pointers) */
NEFFTConvolutionLayer(const NEFFTConvolutionLayer &) = delete;
- /** Default move constructor */
- NEFFTConvolutionLayer(NEFFTConvolutionLayer &&) = default;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEFFTConvolutionLayer(NEFFTConvolutionLayer &&) = delete;
/** Prevent instances of this class from being copied (As this class contains pointers) */
NEFFTConvolutionLayer &operator=(const NEFFTConvolutionLayer &) = delete;
- /** Default move assignment operator */
- NEFFTConvolutionLayer &operator=(NEFFTConvolutionLayer &&) = default;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEFFTConvolutionLayer &operator=(NEFFTConvolutionLayer &&) = delete;
+ /** Default destructor */
+ ~NEFFTConvolutionLayer();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:------|:------|
+ * |F32 |F32 |
+ *
* @note: This function only works with any square kernel size and unit strides for both NCHW and NHWC data layout
*
- * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
- * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].Data type supported: Same as @p input
- * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
- * Data types supported: Same as @p input.
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of inputs.
+ * Data types supported: F32.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
+ * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].Data type supported: Same as @p input
+ * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
+ * Data types supported: Same as @p input.
+ * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ * @param[in] enable_fast_math (Optional) Enable fast math computation. Unused for CPU backend.
*/
- void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
- const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ void configure(ITensor *input,
+ const ITensor *weights,
+ const ITensor *biases,
+ ITensor *output,
+ const PadStrideInfo &conv_info,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ bool enable_fast_math = false);
/** Static function to check if given info will lead to a valid configuration of @ref NEFFTConvolutionLayer
*
* @note: This function only works with any square kernel size and unit strides for both NCHW and NHWC data layout
*
- * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
- * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].Data type supported: Same as @p input
- * @param[in] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
- * Data types supported: Same as @p input.
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of inputs.
+ * Data types supported: F32.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
+ * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].Data type supported: Same as @p input
+ * @param[in] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
+ * Data types supported: Same as @p input.
+ * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ * @param[in] enable_fast_math (Optional) Enable fast math computation. Unused for CPU backend.
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *biases,
+ const ITensorInfo *output,
+ const PadStrideInfo &conv_info,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ bool enable_fast_math = false);
// Inherited methods overridden:
void run() override;
diff --git a/arm_compute/runtime/NEON/functions/NEFastCorners.h b/arm_compute/runtime/NEON/functions/NEFastCorners.h
deleted file mode 100644
index e2decb177b..0000000000
--- a/arm_compute/runtime/NEON/functions/NEFastCorners.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEFASTCORNERS_H
-#define ARM_COMPUTE_NEFASTCORNERS_H
-
-#include "arm_compute/core/NEON/kernels/NEFastCornersKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillArrayKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/Array.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/Tensor.h"
-
-#include <cstdint>
-#include <memory>
-
-namespace arm_compute
-{
-class ITensor;
-using IImage = ITensor;
-
-/** Basic function to execute fast corners. This function call the following NEON kernels:
- *
- * -# @ref NEFastCornersKernel
- * -# @ref NENonMaximaSuppression3x3Kernel (executed if nonmax_suppression == true)
- * -# @ref NEFillArrayKernel
- *
- */
-class NEFastCorners : public IFunction
-{
-public:
- /** Constructor */
- NEFastCorners(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Initialize the function's source, destination, conv and border_mode.
- *
- * @param[in, out] input Source image. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[in] threshold Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3.
- * @param[in] nonmax_suppression If true, non-maximum suppression is applied to detected corners before being placed in the array.
- * @param[out] corners Array of keypoints to store the results.
- * @param[in] border_mode Strategy to use for borders.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(IImage *input, float threshold, bool nonmax_suppression, KeyPointArray *corners,
- BorderMode border_mode, uint8_t constant_border_value = 0);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- MemoryGroup _memory_group;
- NEFastCornersKernel _fast_corners_kernel;
- NEFillBorderKernel _border_handler;
- NENonMaximaSuppression3x3Kernel _nonmax_kernel;
- NEFillArrayKernel _fill_kernel;
- Image _output;
- Image _suppressed;
- bool _non_max;
-};
-}
-#endif /*ARM_COMPUTE_NEFASTCORNERS_H */
diff --git a/arm_compute/runtime/NEON/functions/NEFill.h b/arm_compute/runtime/NEON/functions/NEFill.h
index f8a15078c3..1829c71fef 100644
--- a/arm_compute/runtime/NEON/functions/NEFill.h
+++ b/arm_compute/runtime/NEON/functions/NEFill.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,25 +24,53 @@
#ifndef ARM_COMPUTE_NEFILL_H
#define ARM_COMPUTE_NEFILL_H
-#include "arm_compute/core/NEON/kernels/NEMemsetKernel.h"
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
namespace arm_compute
{
class ITensor;
-/** Basic function to run @ref NEMemsetKernel */
-class NEFill : public INESimpleFunctionNoBorder
+/** Basic function to run @ref cpu::kernels::CpuFillKernel */
+class NEFill : public IFunction
{
public:
+ /** Default Constructor */
+ NEFill();
+ /** Default Destructor */
+ ~NEFill();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEFill(const NEFill &) = delete;
+ /** Default move constructor */
+ NEFill(NEFill &&);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEFill &operator=(const NEFill &) = delete;
+ /** Default move assignment operator */
+ NEFill &operator=(NEFill &&);
/** Initialize the function
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:------|:------|
+ * |All |All |
+ *
* @param[in,out] tensor Source tensor. Data types supported: All
* @param[in] constant_value Constant value to use to fill tensor.
*/
void configure(ITensor *tensor, PixelValue constant_value);
+
+ // Inherited methods overridden
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_FILL_H */
diff --git a/arm_compute/runtime/NEON/functions/NEFillBorder.h b/arm_compute/runtime/NEON/functions/NEFillBorder.h
index 0ae04cbf00..44b1d4a62b 100644
--- a/arm_compute/runtime/NEON/functions/NEFillBorder.h
+++ b/arm_compute/runtime/NEON/functions/NEFillBorder.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,22 +24,33 @@
#ifndef ARM_COMPUTE_NEFILLBORDER_H
#define ARM_COMPUTE_NEFILLBORDER_H
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
+#include <memory>
+
namespace arm_compute
{
// Forward declaration
class ITensor;
+class NEFillBorderKernel;
/** Basic function to run @ref NEFillBorderKernel */
class NEFillBorder : public IFunction
{
public:
+ NEFillBorder();
/** Initialize the function's source, destination and border_mode.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |All |All |
+ *
* @note This function fills the borders within the XY-planes.
*
* @param[in, out] input Source tensor. Data type supported: All
@@ -47,13 +58,16 @@ public:
* @param[in] border_mode Strategy to use for borders.
* @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
*/
- void configure(ITensor *input, unsigned int border_width, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue());
+ void configure(ITensor *input,
+ unsigned int border_width,
+ BorderMode border_mode,
+ const PixelValue &constant_border_value = PixelValue());
// Inherited methods overridden:
void run() override;
private:
- NEFillBorderKernel _border_handler; /**< Kernel to handle image borders */
+ std::unique_ptr<NEFillBorderKernel> _border_handler; /**< Kernel to handle image borders */
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_NEFILLBORDER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEFlattenLayer.h b/arm_compute/runtime/NEON/functions/NEFlattenLayer.h
index 7b4801cd1c..3e92143824 100644
--- a/arm_compute/runtime/NEON/functions/NEFlattenLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEFlattenLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,18 +25,40 @@
#define ARM_COMPUTE_NEFLATTENLAYER_H
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
/** Basic function to execute flatten layer kernel. */
-class NEFlattenLayer : public INESimpleFunctionNoBorder
+class NEFlattenLayer : public IFunction
{
public:
+ NEFlattenLayer();
+ /** Destructor */
+ ~NEFlattenLayer();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEFlattenLayer(const NEFlattenLayer &) = delete;
+ /** Default move constructor */
+ NEFlattenLayer(NEFlattenLayer &&);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEFlattenLayer &operator=(const NEFlattenLayer &) = delete;
+ /** Default move assignment operator */
+ NEFlattenLayer &operator=(NEFlattenLayer &&);
/** Initialise the kernel's input and output.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |All |All |
+ *
* @param[in] input First input tensor to flatten with at least 3 dimensions. The dimensions over the third will be interpreted as batches. Data types supported: All
* @param[out] output Output tensor with shape [w*h*d, input_batches] where:
* w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input
@@ -53,6 +75,13 @@ public:
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
diff --git a/arm_compute/runtime/NEON/functions/NEFloor.h b/arm_compute/runtime/NEON/functions/NEFloor.h
index a11907ba6c..77ac484bab 100644
--- a/arm_compute/runtime/NEON/functions/NEFloor.h
+++ b/arm_compute/runtime/NEON/functions/NEFloor.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,20 +24,44 @@
#ifndef ARM_COMPUTE_NEFLOOR_H
#define ARM_COMPUTE_NEFLOOR_H
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
namespace arm_compute
{
+// Forward declarations
class ITensor;
+class ITensorInfo;
-/** Basic function to run @ref NEFloorKernel */
-class NEFloor : public INESimpleFunctionNoBorder
+/** Basic function to run @ref cpu::kernels::CpuFloorKernel */
+class NEFloor : public IFunction
{
public:
+ /** Constructor */
+ NEFloor();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEFloor(const NEFloor &) = delete;
+ /** Default move constructor */
+ NEFloor(NEFloor &&);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEFloor &operator=(const NEFloor &) = delete;
+ /** Default move assignment operator */
+ NEFloor &operator=(NEFloor &&);
+ /** Destructor */
+ ~NEFloor();
/** Set the source, destination of the kernel
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:------|:------|
+ * |F32 |F32 |
+ * |F16 |F16 |
+ *
* @param[in] input Source tensor. Data type supported: F16/F32.
* @param[out] output Destination tensor. Same as @p input
*/
@@ -50,6 +74,13 @@ public:
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+
+ // Inherited methods overridden
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEFLOOR_H */
diff --git a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
index b14650c0e9..885f8430cf 100644
--- a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 ARM Limited.
+ * Copyright (c) 2017-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,46 +24,20 @@
#ifndef ARM_COMPUTE_NEFULLYCONNECTEDLAYER_H
#define ARM_COMPUTE_NEFULLYCONNECTEDLAYER_H
+#include "arm_compute/function_info/FullyConnectedLayerInfo.h"
#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NETransposeKernel.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMM.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h"
+#include "arm_compute/runtime/IMemoryManager.h"
+#include "arm_compute/runtime/IWeightsManager.h"
+#include "arm_compute/runtime/NEON/functions/NETranspose.h"
#include "arm_compute/runtime/Tensor.h"
+#include <memory>
+
namespace arm_compute
{
-/** Basic function to reshape the weights of Fully Connected layer with NEON. This function calls the following kernels:
- *
- * -# @ref NETransposeKernel
- *
- * @note The fully connected layer accepts "weights" tensors only with 2 dimensions.
- */
-class NEFullyConnectedLayerReshapeWeights : public INESimpleFunctionNoBorder
-{
-public:
- /** Set the input and output tensors.
- *
- * @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[out] output Destination tensor. Data type supported: Same as @p input.
- */
- void configure(const ITensor *input, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEFullyConnectedLayerReshapeWeights
- *
- * @param[in] input Weights tensor info. The weights must be 2 dimensional. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] output Destination tensor info. Data type supported: Same as @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-};
-
namespace weights_transformations
{
-/** Basic function to manage the reshape weights generated from @ref NEFullyConnectedLayerReshapeWeights */
+/** Basic function to manage the reshape weights generated from @ref NETranspose */
class NEFullyConnectedLayerReshapeWeightsManaged : public ITransformWeights
{
public:
@@ -95,17 +69,17 @@ public:
}
private:
- static constexpr uint32_t _uid = 0x0;
- Tensor _output{};
- NEFullyConnectedLayerReshapeWeights _func{};
+ static constexpr uint32_t _uid = 0x0;
+ Tensor _output{};
+ NETranspose _func{};
};
} // namespace weights_transformations
-/** Basic function to compute a Fully Connected layer on NEON. This function calls the following NEON kernels:
- * -# @ref NEIm2ColKernel (called when the input comes from a convolutional layer)
- * -# @ref NEFullyConnectedLayerReshapeWeights (if @p are_weights_reshaped is set to false and transpose_weights is set to true ) (called once)
- * -# @ref NEGEMMMatrixMultiplyKernel or @ref NEGEMMLowpMatrixMultiplyCore (if quantized asymmetric)
- * -# @ref NEGEMMMatrixAdditionKernel or @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint (if quantized asymmetric) (if @p biases is not equal to nullptr)
+/** Basic function to compute a Fully Connected layer. This function calls the following kernels:
+ * -# @ref cpu::kernels::CpuIm2ColKernel (called when the input comes from a convolutional layer)
+ * -# @ref NETranspose (if @p are_weights_reshaped is set to false and transpose_weights is set to true ) (called once)
+ * -# @ref NEGEMM or @ref NEGEMMLowpMatrixMultiplyCore (if quantized asymmetric)
+ * -# @ref cpu::kernels::CpuGemmMatrixAdditionKernel or @ref NEGEMMLowpOutputStage (if quantized asymmetric) (if @p biases is not equal to nullptr)
*
* @note The fully connected layer accepts "weights" tensors only with 2 dimensions.
*/
@@ -113,77 +87,91 @@ class NEFullyConnectedLayer : public IFunction
{
public:
/** Constructor */
- NEFullyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr, IWeightsManager *weights_manager = nullptr);
+ NEFullyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr,
+ IWeightsManager *weights_manager = nullptr);
/** Prevent instances of this class from being copied (As this class contains pointers) */
NEFullyConnectedLayer(const NEFullyConnectedLayer &) = delete;
- /** Default move constructor */
- NEFullyConnectedLayer(NEFullyConnectedLayer &&) = default;
+ /** Prevent instances of this class from being moved (As this class contains pointers) */
+ NEFullyConnectedLayer(NEFullyConnectedLayer &&) = delete;
/** Prevent instances of this class from being copied (As this class contains pointers) */
NEFullyConnectedLayer &operator=(const NEFullyConnectedLayer &) = delete;
- /** Default move assignment operator */
- NEFullyConnectedLayer &operator=(NEFullyConnectedLayer &&) = default;
+ /** Prevent instances of this class from being moved (As this class contains pointers) */
+ NEFullyConnectedLayer &operator=(NEFullyConnectedLayer &&) = delete;
+ /** Default destructor */
+ ~NEFullyConnectedLayer();
/** Set the input and output tensors.
*
- * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] weights Weights tensor. The weights must be 2 dimensional.
- * If this function is called after a Convolution Layer, the (transposed) weights will have as many rows as the product of the first 3 input's dimensions.
- * If it is called after another FullyConnected Layer, the (transposed) weights will have as many rows as the input's first dimension.
- * Data type supported: Same as @p input.
- * @param[in] biases Bias tensor. Can be nullptr. Data type supported: Same as @p weights, S32 if @p weights is QASYMM8/QASYMM8_SIGNED.
- * @param[out] output Destination tensor. Its shape should be equal to the output of a matrix multiplication between:
- * - The output of im2col on the input and the (transposed) 2D weights, if the function is called after a Convolution Layer
- * - The input tensor and the (transposed) 2D weights, if the function is called after another FullyConnected Layer.
- * Data type supported: Same as @p input.
- * @param[in] fc_info (Optional) Fully connected layer additional info
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:--------------|:------------------|:------|:--------------|
+ * |F16 |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |F32 |
+ * |QASYMM8 |QASYMM8 |S32 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED |
+ *
+ * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] weights Weights tensor. The weights must be 2 dimensional.
+ * If this function is called after a Convolution Layer, the (transposed) weights will have as many rows as the product of the first 3 input's dimensions.
+ * If it is called after another FullyConnected Layer, the (transposed) weights will have as many rows as the input's first dimension.
+ * Data type supported: Same as @p input.
+ * @param[in] biases Bias tensor. Can be nullptr. Data type supported: Same as @p weights, S32 if @p weights is QASYMM8/QASYMM8_SIGNED.
+ * @param[out] output Destination tensor. Its shape should be equal to the output of a matrix multiplication between:
+ * - The output of im2col on the input and the (transposed) 2D weights, if the function is called after a Convolution Layer
+ * - The input tensor and the (transposed) 2D weights, if the function is called after another FullyConnected Layer.
+ * Data type supported: Same as @p input.
+ * @param[in] fc_info (Optional) Fully connected layer additional info
+ * @param[in] weights_info (Optional) Stores neccessary compute information when weights are already reshaped
*/
- void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output,
- FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo());
+ void configure(const ITensor *input,
+ const ITensor *weights,
+ const ITensor *biases,
+ ITensor *output,
+ FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo(),
+ const WeightsInfo &weights_info = WeightsInfo());
/** Static function to check if given info will lead to a valid configuration of @ref NEFullyConnectedLayer
*
- * @param[in] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] weights Weights tensor info. The weights must be 2 dimensional.
- * If this function is called after a Convolution Layer, the (transposed) weights will have as many rows as the product of the first 3 input's dimensions.
- * If it is called after another FullyConnected Layer, the (transposed) weights will have as many rows as the input's first dimension.
- * Data type supported: Same as @p input.
- * @param[in] biases Bias tensor. Can be nullptr. Data type supported: Same as @p weights, S32 if @p weights is QASYMM8/QASYMM8_SIGNED.
- * @param[in] output Destination tensor info. Its shape should be equal to the output of a matrix multiplication between:
- * - The output of im2col on the input and the (transposed) 2D weights, if the function is called after a Convolution Layer
- * - The input tensor and the (transposed) 2D weights, if the function is called after another FullyConnected Layer.
- * Data type supported: Same as @p input.
- * @param[in] fc_info (Optional) Fully connected layer additional info
+ * Similar to @ref NEFullyConnectedLayer::configure()
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
- FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo());
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *biases,
+ const ITensorInfo *output,
+ FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo(),
+ const WeightsInfo &weights_info = WeightsInfo());
+
+ /** Static function that queries whether fixed-format kernel exists for a given problem description
+ *
+ * @param[out] expected_weight_format Format in which weights should be for found fixed format kernel
+ * @param[in] input Source tensor
+ * @param[in] weights Weights tensor.
+ * @param[in] biases Bias tensor. Can be nullptr. Data type supported: Same as @p weights, S32 if @p weights is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] output Destination tensor
+ * @param[in] fc_info Fully connected layer additional info
+ * @param[in] weights_info Describes weights shape
+ *
+ * @return a status
+ */
+ static Status has_opt_impl(arm_compute::WeightFormat &expected_weight_format,
+ const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *biases,
+ const ITensorInfo *output,
+ const FullyConnectedLayerInfo &fc_info,
+ const WeightsInfo &weights_info);
//Inherited methods override
void run() override;
void prepare() override;
private:
- void configure_fc_fc(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const ActivationLayerInfo &act);
- void configure_conv_fc(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const ActivationLayerInfo &act);
- void configure_mm(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const ActivationLayerInfo &act);
-
- MemoryGroup _memory_group;
- IWeightsManager *_weights_manager;
- NEFlattenLayerKernel _flatten_kernel;
- NEConvertFullyConnectedWeights _convert_weights;
- weights_transformations::NEConvertFullyConnectedWeightsManaged _convert_weights_managed;
- NEFullyConnectedLayerReshapeWeights _reshape_weights_function;
- weights_transformations::NEFullyConnectedLayerReshapeWeightsManaged _reshape_weights_managed_function;
- NEGEMM _mm_gemm;
- NEGEMMLowpMatrixMultiplyCore _mm_gemmlowp;
- Tensor _flatten_output;
- Tensor _converted_weights_output;
- Tensor _reshape_weights_output;
- const ITensor *_original_weights;
- bool _are_weights_converted;
- bool _are_weights_reshaped;
- bool _is_fc_after_conv;
- bool _is_quantized_asymmetric;
- bool _is_prepared;
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEFULLYCONNECTEDLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h b/arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h
index b3b41c5445..f53b3de7f6 100644
--- a/arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h
+++ b/arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,7 +25,6 @@
#define ARM_COMPUTE_NEFUSEBATCHNORMALIZATION_H
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/kernels/NEFuseBatchNormalizationKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
@@ -33,6 +32,7 @@ namespace arm_compute
{
// Forward declarations
class ITensor;
+class NEFuseBatchNormalizationKernel;
/** Basic function to fuse the batch normalization node to a preceding convolution node */
class NEFuseBatchNormalization : public IFunction
@@ -49,9 +49,19 @@ public:
/** Allow instances of this class to be moved */
NEFuseBatchNormalization &operator=(NEFuseBatchNormalization &&) = default;
/** Default destructor */
- ~NEFuseBatchNormalization() = default;
+ ~NEFuseBatchNormalization();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |F32 |F32 |
+ * |F16 |F16 |
+ *
* @param[in] input_weights Input weights tensor for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC
* @param[in] bn_mean Batch normalization layer mean tensor. Same as @p input_weights
* @param[in] bn_var Batch normalization layer variance tensor. Same as @p input_weights
@@ -65,9 +75,16 @@ public:
* @param[in] epsilon (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f.
* @param[in] fbn_type (Optional) Fused batch normalization type. Defaults to Convolution.
*/
- void configure(const ITensor *input_weights, const ITensor *bn_mean, const ITensor *bn_var, ITensor *fused_weights, ITensor *fused_bias,
- const ITensor *input_bias = nullptr, const ITensor *bn_beta = nullptr, const ITensor *bn_gamma = nullptr,
- float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION);
+ void configure(const ITensor *input_weights,
+ const ITensor *bn_mean,
+ const ITensor *bn_var,
+ ITensor *fused_weights,
+ ITensor *fused_bias,
+ const ITensor *input_bias = nullptr,
+ const ITensor *bn_beta = nullptr,
+ const ITensor *bn_gamma = nullptr,
+ float epsilon = 0.001f,
+ FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION);
/** Static function to check if given info will lead to a valid configuration of @ref NEFuseBatchNormalization
*
* @param[in] input_weights Input weights tensor info for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC
@@ -85,16 +102,22 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input_weights, const ITensorInfo *bn_mean, const ITensorInfo *bn_var,
- const ITensorInfo *fused_weights, const ITensorInfo *fused_bias,
- const ITensorInfo *input_bias = nullptr, const ITensorInfo *bn_beta = nullptr, const ITensorInfo *bn_gamma = nullptr,
- float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION);
+ static Status validate(const ITensorInfo *input_weights,
+ const ITensorInfo *bn_mean,
+ const ITensorInfo *bn_var,
+ const ITensorInfo *fused_weights,
+ const ITensorInfo *fused_bias,
+ const ITensorInfo *input_bias = nullptr,
+ const ITensorInfo *bn_beta = nullptr,
+ const ITensorInfo *bn_gamma = nullptr,
+ float epsilon = 0.001f,
+ FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION);
// Inherited methods overridden:
void run() override;
private:
- NEFuseBatchNormalizationKernel _fuse_bn_kernel;
+ std::unique_ptr<NEFuseBatchNormalizationKernel> _fuse_bn_kernel;
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_NEFUSEBATCHNORMALIZATION_H */
diff --git a/arm_compute/runtime/NEON/functions/NEGEMM.h b/arm_compute/runtime/NEON/functions/NEGEMM.h
index 8dc6b88bb0..29650a5eca 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMM.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMM.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 ARM Limited.
+ * Copyright (c) 2017-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,37 +24,18 @@
#ifndef ARM_COMPUTE_NEGEMM_H
#define ARM_COMPUTE_NEGEMM_H
-#include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
+#include "arm_compute/function_info/GEMMInfo.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
#include "arm_compute/runtime/IWeightsManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h"
-#include "arm_compute/runtime/Tensor.h"
+
+#include <memory>
namespace arm_compute
{
-/** Basic function to execute GEMM on NEON. This function calls the following NEON kernels:
- *
- * If optimized assembly is available:
- * -# @ref NEGEMMAssemblyDispatch
- * -# @ref NEActivationLayer (if alpha != 1.0)
- * Else:
- * -# @ref NEGEMMInterleave4x4Kernel (if the output tensor is a matrix)
- * -# @ref NEGEMMTranspose1xWKernel (if the output tensor is a matrix)
- * -# @ref NEGEMMMatrixMultiplyKernel
- * In both cases:
- * -# @ref NEGEMMMatrixAdditionKernel (if c != nullptr and beta != 0.0 and is not reshaped once)
- * Else:
- * -# @ref NEArithmeticAdditionKernel (if c != nullptr and is reshaped once and not optimized assembly in place)
+/** Basic function to execute GEMM. This function calls the following kernels:
*
- * -# @ref NEActivationLayer (if activation is specified in GEMMInfo)
+ * -# @ref cpu::CpuGemm
*/
class NEGEMM : public IFunction
{
@@ -69,11 +50,25 @@ public:
NEGEMM &operator=(const NEGEMM &) = delete;
/** Default move assignment operator */
NEGEMM &operator=(NEGEMM &&) = default;
+ /** Default destructor */
+ ~NEGEMM();
/** Initialise the kernel's inputs, output
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:------------|:-----------|:---------|:--------------|
+ * |F32 |F32 |F32 |F32 |
+ * |F16 |F16 |F16 |F16 |
+ * |BFLOAT16 |BFLOAT16 |BFLOAT16 |BFLOAT16 |
+ *
* @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C].
* @note GEMM: The tensors a, b, c, d must have the same data type. You should not mix data types when calling this function.
*
+ * @note Batched GEMM only supports broadcasting cases where RHS rank < LHS rank but not the other way around
+ *
* @param[in] a First input tensor (Matrix A or Vector A). Data type supported: BFLOAT16/F16/F32
* @param[in] b Second input tensor (Matrix B). Data type supported: same as @p a
* @param[in] c Third input tensor (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a
@@ -83,49 +78,49 @@ public:
* @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and
* if the reshape of matrix B should happen only for the first run
*/
- void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *d, float alpha, float beta, const GEMMInfo &gemm_info = GEMMInfo());
+ void configure(const ITensor *a,
+ const ITensor *b,
+ const ITensor *c,
+ ITensor *d,
+ float alpha,
+ float beta,
+ const GEMMInfo &gemm_info = GEMMInfo());
/** Static function to check if given info will lead to a valid configuration of @ref NEGEMM.
*
- * @param[in] a First input tensor info (Matrix or Vector A). Data types supported: BFLOAT16/F16/F32
- * @param[in] b Second input tensor info (Matrix B). Data type supported: same as @p a.
- * @param[in] c Third input tensor info (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a.
- * @param[out] output Output tensor info. Data type supported: same as @p a
- * @param[in] alpha Weight of the matrix product
- * @param[in] beta Weight of matrix C
- * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and
- * if the reshape of matrix B should happen only for the first run
+ * Similar to @ref NEGEMM::configure()
*
* @return a status
*/
- static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info = GEMMInfo());
+ static Status validate(const ITensorInfo *a,
+ const ITensorInfo *b,
+ const ITensorInfo *c,
+ const ITensorInfo *output,
+ float alpha,
+ float beta,
+ const GEMMInfo &gemm_info = GEMMInfo());
+
+ /** Static function that queries whether there exists fixed-format kernel and if it exists it will return in the first argument in what format
+ * weights are expected to be reshaped as defined by WeightFormat class. Apart from the first argument the rest of the arguments are the same
+ * as in @ref NEGEMM::validate() except that all arguments are required.
+ *
+ * @return a status
+ */
+ static Status has_opt_impl(arm_compute::WeightFormat &expected_weight_format,
+ const ITensorInfo *a,
+ const ITensorInfo *b,
+ const ITensorInfo *c,
+ const ITensorInfo *output,
+ float alpha,
+ float beta,
+ const GEMMInfo &gemm_info = GEMMInfo());
// Inherited methods overridden:
void run() override;
void prepare() override;
private:
- MemoryGroup _memory_group;
- IWeightsManager *_weights_manager;
- NEGEMMInterleave4x4Kernel _interleave_kernel;
- NEGEMMTranspose1xWKernel _transpose_kernel;
- NEGEMMMatrixMultiplyKernel _mm_kernel;
- NEGEMMAssemblyDispatch _asm_glue;
- NEGEMMMatrixAdditionKernel _ma_kernel;
- NEActivationLayer _alpha_scale_func;
- NEArithmeticAdditionKernel _add_bias_kernel;
- NEActivationLayer _activation_func;
-
- Tensor _tmp_a;
- Tensor _tmp_b;
- Tensor _tmp_d;
- const ITensor *_original_b;
- bool _run_vector_matrix_multiplication;
- bool _run_alpha_scale;
- bool _run_addition;
- bool _run_bias_addition;
- bool _run_activation;
- bool _reshape_b_only_on_first_run;
- bool _is_prepared;
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_NEGEMM_H */
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h b/arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h
deleted file mode 100644
index ae0ae440d8..0000000000
--- a/arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGEMMASSEMBLYDISPATCH_H
-#define ARM_COMPUTE_NEGEMMASSEMBLYDISPATCH_H
-
-#include "arm_compute/core/NEON/kernels/assembly/NEGEMMAssemblyWrapperKernel.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/IWeightsManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/Tensor.h"
-
-#include "arm_compute/core/NEON/kernels/assembly/arm_gemm.hpp"
-
-namespace arm_compute
-{
-/** Assembly kernel glue */
-class NEGEMMAssemblyDispatch : public IFunction
-{
-public:
- /** Constructor */
- NEGEMMAssemblyDispatch(std::shared_ptr<IMemoryManager> memory_manager = nullptr, IWeightsManager *weights_manager = nullptr);
- /** Prevent instances of this class from being copy constructed */
- NEGEMMAssemblyDispatch(const NEGEMMAssemblyDispatch &) = delete;
- /** Prevent instances of this class from being copied */
- NEGEMMAssemblyDispatch &operator=(const NEGEMMAssemblyDispatch &) = delete;
- NEGEMMAssemblyDispatch(NEGEMMAssemblyDispatch &&) = default;
- NEGEMMAssemblyDispatch &operator=(NEGEMMAssemblyDispatch &&) = default;
- ~NEGEMMAssemblyDispatch() = default;
-
- class IFallback
- {
- public:
- virtual void run() = 0;
- virtual void prepare() = 0;
- virtual bool is_configured() const = 0;
- virtual ~IFallback() = default;
- };
-
-private:
- /** Interface for the arm_gemm fallback */
- std::unique_ptr<IFallback> _arm_gemm;
- MemoryGroup _memory_group; /**< Function memory group */
- IWeightsManager *_weights_manager; /**< Pointer to the weights manager */
-public:
- /** If supported create an ACL function else fallback to the arm_gemm function.
- *
- * @param[in] a Input tensor (Matrix A)
- * @param[in] b Input tensor (Matrix B)
- * @param[in] c Input tensor (Matrix C) used to pass the bias for quantized calculations
- * @param[out] d Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0.
- * @param[in] gemm_info GEMM meta-data
- */
- void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *d, const GEMMInfo &gemm_info);
-
- /** Indicates whether or not this function can be used to process the given parameters.
- *
- * @param[in] a Input tensor info (Matrix A)
- * @param[in] b Input tensor info (Matrix B)
- * @param[in] c Input tensor info (Matrix C) used to pass the bias for quantized calculations
- * @param[in] d Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0.
- * @param[in] gemm_info GEMM meta-data
- *
- * @return a status.
- */
- static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *d, const GEMMInfo &gemm_info);
- /** Checks if activation is supported by the gemm assembly dispatcher
- *
- * @param[in] activation Activation to check
- *
- * @return True if activation is supported else false
- */
- static bool is_activation_supported(const ActivationLayerInfo &activation);
- /** Was the function successfully configured ?
- *
- * @return True if the function is configured and ready to run
- */
- bool is_configured() const;
- // Inherited methods overridden:
- /** Runs a preparation step, usually for pre-transposing matrix b */
- void prepare() override;
- void run() override;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEGEMMASSEMBLYDISPATCH_H */
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h b/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h
new file mode 100644
index 0000000000..d1c5a1c9b3
--- /dev/null
+++ b/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2020-2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEGEMMCONV2D_H
+#define ARM_COMPUTE_NEGEMMCONV2D_H
+
+#include "arm_compute/runtime/FunctionDescriptors.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IMemoryManager.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+// Forward declarations
+class ITensor;
+class ITensorInfo;
+
+/** Basic function to compute the convolution layer. This function calls the following kernels/functions:
+ *
+ * Supports only NHWC data layout
+ *
+ * -# @ref cpu::CpuGemmAssemblyDispatch
+ * -# @ref NEActivationLayer, in case activation cannot be fused in the assembly dispatch
+ *
+ * Weights are transformed from OHWI to HWIO format using the following kernels:
+ * -# @ref NEPermute
+ */
+class NEGEMMConv2d : public IFunction
+{
+public:
+ /** Constructor */
+ NEGEMMConv2d(const std::shared_ptr<IMemoryManager> &memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGEMMConv2d(const NEGEMMConv2d &) = delete;
+ /** Default move constructor */
+ NEGEMMConv2d(NEGEMMConv2d &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGEMMConv2d &operator=(const NEGEMMConv2d &) = delete;
+ /** Default move assignment operator */
+ NEGEMMConv2d &operator=(NEGEMMConv2d &&) = default;
+ /** Destructor */
+ ~NEGEMMConv2d();
+ /** Set the input and output tensors.
+ *
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:--------------|:--------------|:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |S32 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED |
+ * |F16 |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |F32 |
+ * |BFLOAT16 |BFLOAT16 |BFLOAT16 |BFLOAT16 |
+ *
+ * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of inputs.
+ * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
+ * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.
+ * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
+ * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
+ * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
+ * Data types supported: Same as @p input.
+ * @param[in] info Convolution layer descriptor
+ */
+ void
+ configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const Conv2dInfo &info);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMConv2d
+ *
+ * @param[in] input Source tensor info. 3 lower dimensions represent a single input [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of inputs.
+ * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
+ * @param[in] weights Weights tensor info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
+ * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.
+ * @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
+ * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
+ * @param[in] output Destination tensor info. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
+ * Data types supported: Same as @p input.
+ * @param[in] info Contains padding and stride information described in @ref PadStrideInfo.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *biases,
+ const ITensorInfo *output,
+ const Conv2dInfo &info);
+
+ // Inherited methods overridden:
+ void run() override;
+ void prepare() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NEGEMMCONV2D_H */
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
index e7da1006e0..3e84c3e2cf 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 ARM Limited.
+ * Copyright (c) 2017-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,254 +24,191 @@
#ifndef ARM_COMPUTE_NEGEMMCONVOLUTIONLAYER_H
#define ARM_COMPUTE_NEGEMMCONVOLUTIONLAYER_H
-#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/core/NEON/kernels/NECol2ImKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
-#include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h"
-#include "arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h"
#include "arm_compute/core/Types.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IMemoryManager.h"
#include "arm_compute/runtime/IWeightsManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMM.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h"
-#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h"
-#include "arm_compute/runtime/Tensor.h"
#include <memory>
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
-/** Function to reshape the weights. This function calls the following kernel:
- * -# @ref NEWeightsReshapeKernel
- */
-class NEConvolutionLayerReshapeWeights : public IFunction
-{
-public:
- /** Constructor */
- NEConvolutionLayerReshapeWeights();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEConvolutionLayerReshapeWeights(const NEConvolutionLayerReshapeWeights &) = delete;
- /** Default move constructor */
- NEConvolutionLayerReshapeWeights(NEConvolutionLayerReshapeWeights &&) = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEConvolutionLayerReshapeWeights &operator=(const NEConvolutionLayerReshapeWeights &) = delete;
- /** Default move assignment operator */
- NEConvolutionLayerReshapeWeights &operator=(NEConvolutionLayerReshapeWeights &&) = default;
- /** Set the input and output tensors.
- *
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
- * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.
- * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
- * Data type supported: Same as @p weights, S32 if @p weights is QASYMM8/QASYMM8_SIGNED, FP32 if @p weights is BFLOAT16
- * @param[out] output Destination tensor.
- * Data types supported: Same as @p weights, FP32 if @p weights is BFLOAT16
- */
- void configure(const ITensor *weights, const ITensor *biases, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEConvolutionLayerReshapeWeights
- *
- * @param[in] weights Weights tensor info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
- * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.
- * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
- * Data type supported: Same as @p weights, S32 if @p weights is QASYMM8/QASYMM8_SIGNED, FP32 if @p weights is BFLOAT16
- * @param[in] output Destination tensor.
- * Data types supported: Same as @p weights FP32 if @p weights is BFLOAT16
- *
- * @return an error status
- */
- static Status validate(const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- NEWeightsReshapeKernel _weights_reshape_kernel;
-};
-
-namespace weights_transformations
-{
-/** Basic function to manage the reshape weights generated from @ref NEConvolutionLayerReshapeWeights */
-class NEConvolutionLayerReshapeWeightsTransform : public ITransformWeights
-{
-public:
- void configure(const ITensor *input, const ITensor *biases)
- {
- _bias_bit = (biases != nullptr) ? 1 : 0;
- _func.configure(input, biases, &_output);
- }
-
- void run() override
- {
- _output.allocator()->allocate();
- _func.run();
- _reshape_run = true;
- }
-
- ITensor *get_weights() override
- {
- return &_output;
- }
-
- void release() override
- {
- _output.allocator()->free();
- }
-
- uint32_t uid() override
- {
- return ((0x8) | (_bias_bit << 7));
- }
-
- bool is_reshape_run()
- {
- return _reshape_run;
- }
-
-private:
- Tensor _output{};
- NEConvolutionLayerReshapeWeights _func{};
- int32_t _bias_bit{ 0 };
-};
-} // namespace weights_transformations
-
-/** Basic function to compute the convolution layer. This function calls the following NEON kernels/functions:
+/** Basic function to compute the convolution layer. This function calls the following kernels/functions:
*
- * -# @ref NEIm2ColKernel
- * -# @ref NEGEMM (if the data type is BFLOAT16/FP16/FP32)
- * -# @ref NEGEMMLowpMatrixMultiplyCore (if the data type is QASYMM8/QASYMM8_SIGNED)
- * -# @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint (if the data type is QASYMM8/QASYMM8_SIGNED)
- * -# @ref NEArithmeticAdditionKernel (if biases != nullptr and we have a 1x1 convolution with the NHWC data layout)
- * -# @ref NECol2ImKernel (if NCHW data layout)
+ * -# @ref cpu::CpuGemmConv2d
*
*/
class NEGEMMConvolutionLayer : public IFunction
{
public:
/** Constructor */
- NEGEMMConvolutionLayer(const std::shared_ptr<IMemoryManager> &memory_manager = nullptr, IWeightsManager *weights_manager = nullptr);
+ NEGEMMConvolutionLayer(const std::shared_ptr<IMemoryManager> &memory_manager = nullptr,
+ IWeightsManager *weights_manager = nullptr);
/** Prevent instances of this class from being copied (As this class contains pointers) */
NEGEMMConvolutionLayer(const NEGEMMConvolutionLayer &) = delete;
- /** Default move constructor */
- NEGEMMConvolutionLayer(NEGEMMConvolutionLayer &&) = default;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEGEMMConvolutionLayer(NEGEMMConvolutionLayer &&) = delete;
/** Prevent instances of this class from being copied (As this class contains pointers) */
NEGEMMConvolutionLayer &operator=(const NEGEMMConvolutionLayer &) = delete;
- /** Default move assignment operator */
- NEGEMMConvolutionLayer &operator=(NEGEMMConvolutionLayer &&) = default;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEGEMMConvolutionLayer &operator=(NEGEMMConvolutionLayer &&) = delete;
+ /** Default destructor */
+ ~NEGEMMConvolutionLayer();
/** Set the input and output tensors.
*
- * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
- * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.
- * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
- * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
- * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
- * Data types supported: Same as @p input.
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights
- * tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
- * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:--------------|:------------------|:--------|:--------------|
+ * |F16 |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |F32 |
+ * |BFLOAT16 |BFLOAT16 |BFLOAT16 |BFLOAT16 |
+ * |QASYMM8 |QASYMM8 |S32 |QASYMM8 |
+ * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED |
+ * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED |
+ *
+ * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of inputs.
+ * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
+ * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.
+ * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
+ * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
+ * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
+ * Data types supported: Same as @p input.
+ * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights
+ * tensor has also been transposed with cpu::kernels::CpuGemmTranspose1xWKernel. Data type supported: Same as @p input.
+ * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
+ * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
+ * available which may introduce a drop of accuracy as well. Default is false
+ * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported
*/
- void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(),
- const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1);
+ void configure(const ITensor *input,
+ const ITensor *weights,
+ const ITensor *biases,
+ ITensor *output,
+ const PadStrideInfo &conv_info,
+ const WeightsInfo &weights_info = WeightsInfo(),
+ const Size2D &dilation = Size2D(1U, 1U),
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ bool enable_fast_math = false,
+ unsigned int num_groups = 1);
/** Static function to check if given info will lead to a valid configuration of @ref NEGEMMConvolutionLayer
*
- * @param[in] input Source tensor info. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
- * @param[in] weights Weights tensor info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
- * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.
- * @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
- * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
- * @param[in] output Destination tensor info. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
- * Data types supported: Same as @p input.
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights
- * tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
- * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported
+ * @param[in] input Source tensor info. 3 lower dimensions represent a single input [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of inputs.
+ * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
+ * @param[in] weights Weights tensor info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
+ * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.
+ * @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
+ * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
+ * @param[in] output Destination tensor info. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
+ * Data types supported: Same as @p input.
+ * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights
+ * tensor has also been transposed with cpu::kernels::CpuGemmTranspose1xWKernel. Data type supported: Same as @p input.
+ * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
+ * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
+ * available which may introduce a drop of accuracy as well. Default is false
+ * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1);
-
- // Inherited methods overridden:
- void run() override;
- void prepare() override;
-
-private:
- /** Configures the appropriate matrix multiply routine
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *biases,
+ const ITensorInfo *output,
+ const PadStrideInfo &conv_info,
+ const WeightsInfo &weights_info = WeightsInfo(),
+ const Size2D &dilation = Size2D(1U, 1U),
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ bool enable_fast_math = false,
+ unsigned int num_groups = 1);
+
+ /** Static function to check if there is an optimized version of
+ * GEMM available for the input parameters.
*
- * @param[in] input Input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
- * @param[in] weights Weights tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.
- * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
- * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
- * @param[out] output Output tensor. Data types supported: Same as @p input,
- * except for input of QASYMM8/QASYMM8_SIGNED type where output should be of S32 type.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
- * @param[in] gemm_3d_depth (Optional) Depth of GEMM 3D (Defaults to 1)
- */
- void configure_mm(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo(), int gemm_3d_depth = 1);
- /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMConvolutionLayer matrix multiply routines
+ * The method is intended to be used to find out the optimal
+ * memory layout to be used for the weights tensor when running
+ * variable weights execution.
*
- * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
- * @param[in] weights Weights tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.
- * @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
- * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
- * @param[in] output Output tensor info. Data types supported: Same as @p input,
- * except for input of QASYMM8/QASYMM8_SIGNED type where output should be of S32 type.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
- * @param[in] gemm_3d_depth (Optional) Depth of GEMM 3D (Defaults to 1)
- * @param[in] skip_im2col (Optional) Flag which specifies if im2col has to be skipped. i.e. 1x1 convolution with NHWC data layout. (Default to false)
+ * The user can query the database of optimised kernels in
+ * arm_gemm by specifying one of the enumerations of
+ * arm_compute::WeightFormat in the weight_format field of the input
+ * parameter weights_info. In case of success, the method
+ * writes the expected format in the output parameter
+ * expected_weight_format. The expected_weight_format can than be
+ * used in the configure method of the class for retrieving the
+ * best optimal kernel.
*
- * @return a status
- */
- static Status validate_mm(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo(),
- int gemm_3d_depth = 1, bool skip_im2col = false);
- /** Static function to check if GEMM3D is supported in @ref NEGEMM or in @ref NEGEMMLowpMatrixMultiplyCore
+ * Use case one - query for a specific format:
*
- * @param[in] input_info Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
- * @param[in] weights_info Weights tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
- * @param[in] act_info Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
- * @param[in] gemm_3d_depth Depth of GEMM 3D
- * @param[in] skip_im2col Flag which specifies if im2col has to be skipped. i.e. 1x1 convolution with NHWC data layout
+ * WeightInfo weights_info(..., arm_compute::WeightFormat::OHWIo4, ...); // Set the value of the input query.
+ * if (NEGEMMConvolutionlayer::has_opt_impl(WeightFormat(), ...., weights_info, ...))
+ * {
+ * auto conv = std::unique_ptr<NEGEMMConvolutionlayer>();
+ * conv->configure(..., weights_info, ...); // uses the same WeightFormat the user wanted originally, OHWYo4.
+ * conv->run(...);
+ * }
*
- * @return a status
+ * Use case two - query for any format that would be optimal for the GEMM to execute:
+ *
+ * WeightInfo weights_info(..., arm_compute::WeightFormat::ANY, ...); // Set the value of the input query.
+ * arm_compute::WeightFormat expected_wf;
+ * if (NEGEMMConvolutionlayer::has_opt_impl(expected_wf, ...., weights_info, ...))
+ * {
+ * auto conv = std::unique_ptr<NEGEMMConvolutionlayer>();
+ * // ... code to convert the layout of the weights tensor to the layout returned by has_opt_impl
+ * WeightInfo new_weights_info(..., expected_wf, ...); // Set the value of the WeightFormat returned by has_opt_impl.
+ * conv->configure(..., new_weights_info, ...);
+ * conv->run(...);
+ * }
+ *
+ * Notice that a GEMM configured with a WeightFormat other than
+ * UNSPECIFIED will run GEMM with variable weights mode.
+ *
+ * @param[out] expected_weight_format The arm_compute::WeightFormat expected by the kernel.
+ * @param[in] src Source tensor info.
+ * @param[in] weights Weights tensor info.
+ * @param[in] biases Biases tensor info. Shared biases supported.
+ * @param[in] dst Destination tensor info.
+ * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ * @param[in] weights_info (optional) Specifies additional configuration parameters for the weights of the GEMM computation.
+ * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. And no activation (i.e. Linear) which is the default value.
+ * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
+ *
+ * @return a Status
*/
- static Status validate_gemm3d(const ITensorInfo *input_info, const ITensorInfo *weights_info, const ActivationLayerInfo &act_info, int gemm_3d_depth, bool skip_im2col);
+ static Status has_opt_impl(arm_compute::WeightFormat &expected_weight_format,
+ const ITensorInfo *src,
+ const ITensorInfo *weights,
+ const ITensorInfo *biases,
+ const ITensorInfo *dst,
+ const PadStrideInfo &conv_info,
+ const WeightsInfo &weights_info = WeightsInfo(),
+ const Size2D &dilation = Size2D(1U, 1U),
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ bool enable_fast_math = false);
+ // Inherited methods overridden:
+ void run() override;
+ void prepare() override;
private:
- MemoryGroup _memory_group;
- IWeightsManager *_weights_manager;
- NEConvolutionLayerReshapeWeights _reshape_weights;
- weights_transformations::NEConvolutionLayerReshapeWeightsTransform _reshape_weights_managed;
- NEIm2ColKernel _im2col_kernel;
- NEGEMM _mm_gemm;
- NEGEMMLowpMatrixMultiplyCore _mm_gemmlowp;
- NECol2ImKernel _col2im_kernel;
- NEReshapeLayer _reshape_layer;
-
- const ITensor *_original_weights;
-
- Tensor _im2col_output;
- Tensor _weights_reshaped;
- Tensor _gemm_output;
- Tensor _tmp_output;
-
- DataLayout _data_layout;
-
- bool _skip_im2col;
- bool _skip_col2im;
- bool _is_quantized;
- bool _is_prepared;
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
-#endif /* ARM_COMPUTE_NECONVOLUTIONGEMMLAYER_H */
+#endif /* ARM_COMPUTE_NEGEMMCONVOLUTIONLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h b/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h
deleted file mode 100644
index 10d9c378ae..0000000000
--- a/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGEMMINTERLEAVE4X4_H
-#define ARM_COMPUTE_NEGEMMINTERLEAVE4X4_H
-
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to execute NEGEMMInterleave4x4Kernel. This function calls the following NEON kernel:
- *
- * -# @ref NEGEMMInterleave4x4Kernel
- *
- */
-class NEGEMMInterleave4x4 : public INESimpleFunctionNoBorder
-{
-public:
- /** Initialise the kernel's inputs, output
- *
- * @param[in] input First input tensor. Data types supported: All
- * @param[out] output Output tensor. Data type supported: same as @p input
- */
- void configure(const ITensor *input, ITensor *output);
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEGEMMINTERLEAVE4X4_H */
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.h
deleted file mode 100644
index a8ce1e511b..0000000000
--- a/arm_compute/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGEMMLOWPASSEMBLYMATRIXMULTIPLYCORE_H
-#define ARM_COMPUTE_NEGEMMLOWPASSEMBLYMATRIXMULTIPLYCORE_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h"
-#include "arm_compute/runtime/Tensor.h"
-
-#include <memory>
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Basic function to execute matrix multiply assembly kernels. */
-class NEGEMMLowpAssemblyMatrixMultiplyCore : public IFunction
-{
-public:
- /** Constructor */
- NEGEMMLowpAssemblyMatrixMultiplyCore(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Initialise the kernel's inputs, output
- *
- * @param[in] a First input tensor (Matrix A). Data type supported: U8, S8.
- * @param[in] b Second input tensor (Matrix B). Data type supported: same as @p a
- * @param[in] c Third input tensor (Matrix C). Data type supported: same as @p a
- * @param[out] output Output tensor. Data type supported: Data type supported: U32, S32
- */
- void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *output);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- MemoryGroup _memory_group;
- NEGEMMAssemblyDispatch _asm_glue;
- std::unique_ptr<INEKernel> _mm_kernel;
- std::unique_ptr<INEKernel> _mtx_a_reshape_kernel;
- std::unique_ptr<INEKernel> _mtx_b_reshape_kernel;
- Tensor _tmp_a;
- Tensor _tmp_b;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEGEMMLOWPASSEMBLYMATRIXMULTIPLYCORE_H */
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
index 11683c5b95..6d07675d3d 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 ARM Limited.
+ * Copyright (c) 2017-2021, 2023-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,49 +21,34 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H
-#define ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H
+#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEGEMMLOWPMATRIXMULTIPLYCORE_H
+#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEGEMMLOWPMATRIXMULTIPLYCORE_H
-#include "NEActivationLayer.h"
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h"
-#include "arm_compute/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/function_info/GEMMInfo.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h"
-#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/IWeightsManager.h"
#include <memory>
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
-/** Basic function to execute GEMMLowpMatrixMultiplyCore on NEON. This function calls the following NEON kernels if the DOT product instruction is not available:
+/** Function to run Gemm on quantized types.
*
- * -# @ref NEGEMMInterleave4x4Kernel
- * -# @ref NEGEMMTranspose1xWKernel
- * -# @ref NEGEMMLowpMatrixMultiplyKernel
- * -# @ref NEGEMMLowpOffsetContributionKernel
- * -# @ref NEActivationLayer
+ * This function calls the following:
*
- * otherwise if the DOT product instruction is available:
- *
- * -# @ref NEGEMMLowpOffsetContributionKernel
- *
-*/
+ * -# @ref cpu::CpuGemmLowpMatrixMultiplyCore
+ */
class NEGEMMLowpMatrixMultiplyCore : public IFunction
{
public:
/** Constructor */
- NEGEMMLowpMatrixMultiplyCore(std::shared_ptr<IMemoryManager> memory_manager = nullptr, IWeightsManager *weights_manager = nullptr);
+ NEGEMMLowpMatrixMultiplyCore(std::shared_ptr<IMemoryManager> memory_manager = nullptr,
+ IWeightsManager *weights_manager = nullptr);
/** Prevent instances of this class from being copied (As this class contains pointers) */
NEGEMMLowpMatrixMultiplyCore(const NEGEMMLowpMatrixMultiplyCore &) = delete;
/** Default move constructor */
@@ -72,8 +57,31 @@ public:
NEGEMMLowpMatrixMultiplyCore &operator=(const NEGEMMLowpMatrixMultiplyCore &) = delete;
/** Default move assignment operator */
NEGEMMLowpMatrixMultiplyCore &operator=(NEGEMMLowpMatrixMultiplyCore &&) = default;
+ /** Default destructor */
+ ~NEGEMMLowpMatrixMultiplyCore();
/** Initialise the kernel's inputs, output
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:--------------|:------------------|:--------|:--------------|
+ * |QASYMM8 |QASYMM8 |S32 |QASYMM8 |
+ * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 |
+ * |QASYMM8 |QSYMM8 |S32 |QASYMM8 |
+ * |QASYMM8 |QASYMM8 |S32 |S32 |
+ * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |S32 |
+ * |QASYMM8 |QSYMM8 |S32 |S32 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED |
+ * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED |
+ * |QASYMM8_SIGNED |QSYMM8 |S32 |QASYMM8_SIGNED |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |S32 |
+ * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |S32 |
+ * |QASYMM8_SIGNED |QSYMM8 |S32 |S32 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |F32 |F32 |
+ *
* @note GEMM_LOWP: low precision GEMM kernel
* This kernel performs the following computations:
*
@@ -81,69 +89,36 @@ public:
* -# Convert b values from QASYMM8 to int32 add b_offset to each of them.
* -# Compute the matrix product of the resulting a * b in int32.
*
- * @note The @p output type is S32 if @p gemm_info.type == GEMMLowpOutputStageType::NONE. It is QASYMM8/QASYMM8_SIGNED otherwise
+ * @note The @p output type is S32 if @p gemm_info.type == GEMMLowpOutputStageType::NONE. It is QASYMM8/QASYMM8_SIGNED/F32 otherwise
*
* @param[in] a First input tensor (Matrix A). Data type supported: QASYMM8/QASYMM8_SIGNED.
* @param[in] b Second input tensor (Matrix B). Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL.
- * @param[in] c Third input tensor (Matrix C). It can be a nullptr. Data type supported: S32
- * @param[out] output Output tensor. Data type supported: Data type supported: S32/QASYMM8/QASYMM8_SIGNED
+ * @param[in] c Third input tensor (Matrix C). It can be a nullptr. Data type supported: S32/F32
+ * @param[out] output Output tensor. Data type supported: Data type supported: S32/QASYMM8/QASYMM8_SIGNED/F32
* @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and
* if the reshape of matrix B should be executed only for the first run
*/
- void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *output, const GEMMInfo &gemm_info = GEMMInfo());
+ void configure(
+ const ITensor *a, const ITensor *b, const ITensor *c, ITensor *output, const GEMMInfo &gemm_info = GEMMInfo());
/** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpMatrixMultiplyCore
*
- * @note The @p output type is S32 if @p gemm_info.type == GEMMLowpOutputStageType::NONE. It is QASYMM8/QASYMM8_SIGNED otherwise
- *
- * @param[in] a First input tensor info (Matrix A). Data type supported: QASYMM8/QASYMM8_SIGNED.
- * @param[in] b Second input tensor info (Matrix B). Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL.
- * @param[in] c Third input tensor info (Matrix C). It can be a nullptr. Data type supported: S32
- * @param[in] output Output tensor info. Data type supported: Data type supported: S32/QASYMM8/QASYMM8_SIGNED
- * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and
- * if the reshape of matrix B should be executed only for the first run
+ * Similar to @ref NEGEMMLowpMatrixMultiplyCore::configure()
*
* @return a status
*/
- static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info = GEMMInfo());
+ static Status validate(const ITensorInfo *a,
+ const ITensorInfo *b,
+ const ITensorInfo *c,
+ const ITensorInfo *output,
+ const GEMMInfo &gemm_info = GEMMInfo());
// Inherited methods overridden
void run() override;
void prepare() override;
private:
- MemoryGroup _memory_group;
- IWeightsManager *_weights_manager;
- NEGEMMAssemblyDispatch _asm_glue;
- NEGEMMLowpMatrixMultiplyKernel _mm_kernel;
- NEGEMMInterleave4x4Kernel _mtx_a_reshape_kernel;
- NEGEMMTranspose1xWKernel _mtx_b_reshape_kernel;
- NEGEMMLowpMatrixAReductionKernel _mtx_a_reduction_kernel;
- NEGEMMLowpMatrixBReductionKernel _mtx_b_reduction_kernel;
- NEGEMMLowpOffsetContributionKernel _offset_contribution_kernel;
- NEGEMMLowpOffsetContributionOutputStageKernel _offset_contribution_output_stage_kernel;
- NEActivationLayer _activation_func;
- NEConvertQuantizedSignednessKernel _convert_to_signed_asymm;
- NEConvertQuantizedSignednessKernel _convert_from_signed_asymm;
-
- Tensor _vector_sum_col;
- Tensor _vector_sum_row;
- Tensor _tmp_a;
- Tensor _tmp_b;
- Tensor _mm_result_s32;
- Tensor _signed_a;
- Tensor _signed_output;
- const ITensor *_original_b;
- int32_t _a_offset;
- int32_t _b_offset;
-
- bool _run_vector_matrix_multiplication;
- bool _assembly_path;
- bool _fused_assembly_path;
- bool _reshape_b_only_on_first_run;
- bool _is_prepared;
- bool _fuse_output_stage;
- bool _run_activation;
- bool _flip_signedness;
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H */
+#endif // ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEGEMMLOWPMATRIXMULTIPLYCORE_H
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h
index cbdc788c0a..0d932bb4af 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 ARM Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,9 +24,10 @@
#ifndef ARM_COMPUTE_NEGEMMLOWPOUTPUTSTAGE_H
#define ARM_COMPUTE_NEGEMMLOWPOUTPUTSTAGE_H
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
-/** This file contains all available output stages for GEMMLowp on NEON.
+/** This file contains all available output stages for GEMMLowp.
*
* In gemmlowp, the "output stage" is the process that takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyCore),
* and processes it to obtain the final ASYMM8 value.
@@ -37,253 +38,39 @@
namespace arm_compute
{
class ITensor;
-
-/** Basic function to execute NEGEMMLowpQuantizeDownInt32ToUint8Scale on NEON.
- *
- * NEGEMMLowpQuantizeDownInt32ToUint8Scale depends on 3 parameters: result_offset, result_mult_int, result_shift
- * The final result is:
- *
- * ((input[i][k] + result_offset) * result_mult_int) >> result_shift
- *
- * In case the bias tensor is provided, the final result is:
- *
- * ((input[i][k] + bias[k] + result_offset) * result_mult_int) >> result_shift
- *
- * This function calls the following NEON kernels:
- *
- * -# @ref NEGEMMLowpQuantizeDownInt32ScaleKernel
- *
- * @note The function accepts also 2 optional input arguments (min and max) which can be used to implement "rectified linear unit" activation functions
- * after the result is shifted right by result_shift
-*/
-class NEGEMMLowpQuantizeDownInt32ToUint8Scale : public INESimpleFunctionNoBorder
-{
-public:
- /** Initialise the kernel's inputs, output
- *
- * @param[in] input Input tensor. It is the output of @ref NEGEMMLowpMatrixMultiplyCore function. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8
- * @param[in] result_offset Offset to be added to each element of the input matrix
- * @param[in] result_mult_int Value to be multiplied to each element of the input matrix when once the result_offset has been add
- * @param[in] result_shift Number of bits to shift right the result before converting back to QASYMM8
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
- * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
- */
- ARM_COMPUTE_DEPRECATED_REL(20.05)
- void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_offset, int result_mult_int, int result_shift, int min = std::numeric_limits<int32_t>::lowest(),
- int max = std::numeric_limits<int32_t>::max());
- /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToUint8Scale
- *
- * @param[in] input Input tensor. It is the output of @ref NEGEMMLowpMatrixMultiplyCore function. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
- * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
- *
- * @return a status
- */
- ARM_COMPUTE_DEPRECATED_REL(20.05)
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
-};
-
-/** Basic function to execute NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint on NEON.
- *
- * NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint depends on 3 parameters:
- *
- * result_fixedpoint_multiplier, result_shift, result_offset_after_shift
- *
- * The final result is:
- *
- * (FixedPointMul(input[i][k], result_fixedpoint_multiplier) >> result_shift) + result_offset_after_shift
- *
- * where FixedPointMul(x, y) is the nearest integer to the following
- * mathematical expression, evaluated without overflow or intermediate rounding:
- *
- * (x * y) / 2^31
- *
- * For more information: https://github.com/google/gemmlowp/blob/master/public/output_stages.h#L68
- *
- * In case the bias tensor is provided, the final result is:
- *
- * ((FixedPointMul(input[i][k] + bias[k], result_fixedpoint_multiplier)) >> result_shift) + result_offset_after_shift
- *
- * This function calls the following NEON kernels:
- *
- * -# @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel
- *
- * @note The function accepts also 2 optional input arguments (min and max) which can be used to implement "rectified linear unit" activation functions
- * after the result is shifted right by result_shift
-*/
-class NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint : public INESimpleFunctionNoBorder
-{
-public:
- /** Initialise the kernel's inputs, output
- *
- * @param[in] input Input tensor. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8
- * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
- * @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication
- * @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
- * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
- */
- void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift,
- int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
- /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint
- *
- * @param[in] input Input tensor. It is the output of @ref NEGEMMLowpMatrixMultiplyCore function. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
- * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
-};
-/** Basic function to execute NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint on NEON.
- *
- * NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint depends on 3 parameters:
- *
- * result_fixedpoint_multiplier, result_shift, result_offset_after_shift
- *
- * The final result is:
+class ITensorInfo;
+/** Basic function to execute GEMMLowpQuantizeDown kernels.
*
- * (FixedPointMul(input[i][k], result_fixedpoint_multiplier) >> result_shift) + result_offset_after_shift
+ * This function calls the following operators:
*
- * where FixedPointMul(x, y) is the nearest integer to the following
- * mathematical expression, evaluated without overflow or intermediate rounding:
- *
- * (x * y) / 2^31
- *
- * For more information: https://github.com/google/gemmlowp/blob/master/public/output_stages.h#L68
- *
- * In case the bias tensor is provided, the final result is:
- *
- * ((FixedPointMul(input[i][k] + bias[k], result_fixedpoint_multiplier)) >> result_shift) + result_offset_after_shift
- *
- * This function calls the following NEON kernels:
- *
- * -# @ref NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel
- *
- * @note The function accepts also 2 optional input arguments (min and max) which can be used to implement "rectified linear unit" activation functions
- * after the result is shifted right by result_shift
-*/
-class NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint : public INESimpleFunctionNoBorder
-{
-public:
- /** Initialise the kernel's inputs, output
- *
- * @param[in] input Input tensor. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8_SIGNED
- * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
- * @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication
- * @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8_SIGNED
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to the minimum possible 32-bit signed integer.
- * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
- */
- void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift,
- int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
- /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint
- *
- * @param[in] input Input tensor. It is the output of @ref NEGEMMLowpMatrixMultiplyCore function. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8_SIGNED
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to the minimum possible 32-bit signed integer.
- * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
-};
-/** Basic function to execute NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint on NEON.
- *
- * NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint depends on 2 parameters:
- *
- * result_fixedpoint_multiplier, result_shift
- *
- * The final result is:
- *
- * (FixedPointMul(input[i][k], result_fixedpoint_multiplier) >> result_shift)
- *
- * where FixedPointMul(x, y) is the nearest integer to the following
- * mathematical expression, evaluated without overflow or intermediate rounding:
- *
- * (x * y) / 2^31
- *
- * For more information: https://github.com/google/gemmlowp/blob/master/public/output_stages.h#L68
- *
- * In case the bias tensor is provided, the final result is:
- *
- * ((FixedPointMul(input[i][k] + bias[k], result_fixedpoint_multiplier)) >> result_shift) + result_offset_after_shift
- *
- * This function calls the following NEON kernels:
- *
- * -# @ref NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel
- *
- * @note The function accepts also 2 optional input arguments (min and max) which can be used to implement "rectified linear unit" activation functions
- * after the result is shifted right by result_shift
+ * -# @ref cpu::CpuGemmLowpOutputStage
*/
-class NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint : public INESimpleFunctionNoBorder
+class NEGEMMLowpOutputStage : public IFunction
{
public:
+ /** Constructor */
+ NEGEMMLowpOutputStage();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGEMMLowpOutputStage(const NEGEMMLowpOutputStage &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGEMMLowpOutputStage &operator=(const NEGEMMLowpOutputStage &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEGEMMLowpOutputStage(NEGEMMLowpOutputStage &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEGEMMLowpOutputStage &operator=(NEGEMMLowpOutputStage &&) = delete;
+ /** Default destructor */
+ ~NEGEMMLowpOutputStage();
/** Initialise the kernel's inputs, output
*
- * @param[in] input Input tensor. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[out] output Output tensor. Data type supported: Data type supported: QSYMM16
- * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
- * @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to the minimum possible 32-bit signed integer.
- * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16.
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
- */
- void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int min = std::numeric_limits<int32_t>::lowest(),
- int max = std::numeric_limits<int32_t>::max());
- /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint
- *
- * @param[in] input Input tensor info. It is the output of @ref NEGEMMLowpMatrixMultiplyCore function. Data type supported: S32
- * @param[in] bias Biases tensor info. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[in] output Output tensor info. Data type supported: Data type supported: QSYMM16
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to the minimum possible 32-bit signed integer.
- * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
+ * Valid data layouts:
+ * - All
*
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
-};
-
-/** Basic function to execute GEMMLowpQuantizeDown kernels on NEON.
- *
- * This function calls the following NEON kernels:
- *
- * -# @ref NEGEMMLowpQuantizeDownInt32ScaleKernel
- * -# @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel
- * -# @ref NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel
- * -# @ref NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel
-*/
-class NEGEMMLowpOutputStage : public INESimpleFunctionNoBorder
-{
-public:
- /** Initialise the kernel's inputs, output
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:-------------|:-------------|
+ * |S32 |S32 |QASYMM8 |
+ * |S32 |S32 |QASYMM8_SIGNED|
+ * |S32 |S32 |QSYMM16 |
*
* @param[in] input Input tensor. Data type supported: S32
* @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
@@ -302,7 +89,17 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo &info);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *bias,
+ const ITensorInfo *output,
+ const GEMMLowpOutputStageInfo &info);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_NEGEMMLOWPOUTPUTSTAGE_H */
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h b/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h
deleted file mode 100644
index 6a38490ed4..0000000000
--- a/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGEMMTRANSPOSE1XW_H
-#define ARM_COMPUTE_NEGEMMTRANSPOSE1XW_H
-
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Basic function to execute NEGEMMTranspose1xWKernel. This function calls the following NEON kernels:
- *
- * -# @ref NEGEMMTranspose1xWKernel
- *
- */
-class NEGEMMTranspose1xW : public INESimpleFunctionNoBorder
-{
-public:
- /** Initialise the kernel's inputs, output
- *
- * @param[in] input First input tensor. Data type supported: All
- * @param[out] output Output tensor. Data type supported: same as @p input
- */
- void configure(const ITensor *input, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMTranspose1xW
- *
- * @param[in] input First input tensor. Data type supported: All
- * @param[in] output Output tensor. Data type supported: same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEGEMMTRANSPOSE1XW_H */
diff --git a/arm_compute/runtime/NEON/functions/NEGather.h b/arm_compute/runtime/NEON/functions/NEGather.h
index 7ed45c0f15..9c7ae0134d 100644
--- a/arm_compute/runtime/NEON/functions/NEGather.h
+++ b/arm_compute/runtime/NEON/functions/NEGather.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -32,6 +32,7 @@ namespace arm_compute
{
// Forward declarations
class ITensor;
+class ITensorInfo;
/** Basic function to run @ref NEGatherKernel */
class NEGather : public INESimpleFunctionNoBorder
@@ -39,19 +40,26 @@ class NEGather : public INESimpleFunctionNoBorder
public:
/** Initialise the kernel's inputs and outputs
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |All |All |
+ *
* @param[in] input Source tensor. Supported tensor rank: up to 4. Data type supported: All
- * @param[in] indices Indices tensor. Supported tensor rank: up to 1. Must be one of the following type: U32/S32. Each value Must be in range [0, input.shape[@p axis])
+ * @param[in] indices Indices tensor. Supported tensor rank: up to 3. Must be one of the following type: U32/S32. Each value must be in range [0, input.shape[@p axis]), otherwise the result will become unpredictable.
+ * @note The "axis" must be in the range [0, input.rank -1] when indices is a vector, and must be 1 when indices is a 2D or 3D tensor.
* @param[out] output Destination tensor. Data type supported: Same as @p input
* @param[in] axis (Optional) The axis in @p input to gather @p indices from. Defaults to 0
+ *
*/
void configure(const ITensor *input, const ITensor *indices, ITensor *output, int axis = 0);
- /** Static function to check if given info will lead to a valid configuration of @ref NEGatherKernel
+ /** Static function to check if given info will lead to a valid configuration
*
- * @param[in] input Source tensor info. Supported tensor rank: up to 4. Data type supported: All
- * @param[in] indices Indices tensor info. Supported tensor rank: up to 1. Must be one of the following types: U32/S32. Each value Must be in range [0, input.shape[@p axis])
- * @param[in] output Destination tensor info. Data type supported: Same as @p input
- * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Defaults to 0
+ * Similar to @ref NEGather::configure()
*
* @return a status
*/
diff --git a/arm_compute/runtime/NEON/functions/NEGaussian3x3.h b/arm_compute/runtime/NEON/functions/NEGaussian3x3.h
deleted file mode 100644
index 45736664ea..0000000000
--- a/arm_compute/runtime/NEON/functions/NEGaussian3x3.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGAUSSIAN3x3_H
-#define ARM_COMPUTE_NEGAUSSIAN3x3_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to execute gaussian filter 3x3. This function calls the following NEON kernels:
- *
- * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref NEGaussian3x3Kernel
- *
- */
-class NEGaussian3x3 : public INESimpleFunction
-{
-public:
- /** Initialise the function's input, output and border mode.
- *
- * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination tensor, Data type supported: U8.
- * @param[in] border_mode Strategy to use for borders.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value = 0);
-};
-}
-#endif /*ARM_COMPUTE_NEGAUSSIAN3x3_H */
diff --git a/arm_compute/runtime/NEON/functions/NEGaussian5x5.h b/arm_compute/runtime/NEON/functions/NEGaussian5x5.h
deleted file mode 100644
index 847530169a..0000000000
--- a/arm_compute/runtime/NEON/functions/NEGaussian5x5.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGAUSSIAN5x5_H
-#define ARM_COMPUTE_NEGAUSSIAN5x5_H
-
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/Tensor.h"
-
-#include <cstdint>
-#include <memory>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to execute gaussian filter 5x5. This function calls the following NEON kernels:
- *
- * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref NEGaussian5x5HorKernel
- * -# @ref NEGaussian5x5VertKernel
- *
- */
-class NEGaussian5x5 : public IFunction
-{
-public:
- /** Default constructor
- */
- NEGaussian5x5(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Initialise the function's input, output and border mode.
- *
- * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination tensor, Data type supported: U8.
- * @param[in] border_mode Strategy to use for borders.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value = 0);
-
- // Inherited methods overridden:
- void run() override;
-
-protected:
- MemoryGroup _memory_group; /**< Function memory group */
- NEGaussian5x5HorKernel _kernel_hor; /**< kernel for horizontal pass */
- NEGaussian5x5VertKernel _kernel_vert; /**< kernel for vertical pass */
- Tensor _tmp; /**< temporary buffer for output of horizontal pass */
- NEFillBorderKernel _border_handler; /**< kernel to handle tensor borders */
-};
-}
-#endif /*ARM_COMPUTE_NEGAUSSIAN5x5_H */
diff --git a/arm_compute/runtime/NEON/functions/NEGaussianPyramid.h b/arm_compute/runtime/NEON/functions/NEGaussianPyramid.h
deleted file mode 100644
index a6b21278ba..0000000000
--- a/arm_compute/runtime/NEON/functions/NEGaussianPyramid.h
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGAUSSIANPYRAMID_H
-#define ARM_COMPUTE_NEGAUSSIANPYRAMID_H
-
-#include "arm_compute/core/IPyramid.h"
-#include "arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h"
-#include "arm_compute/runtime/NEON/functions/NEScale.h"
-#include "arm_compute/runtime/Pyramid.h"
-#include "arm_compute/runtime/Tensor.h"
-
-#include <cstdint>
-#include <memory>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Common interface for all Gaussian pyramid functions */
-class NEGaussianPyramid : public IFunction
-{
-public:
- /** Default constructor */
- NEGaussianPyramid();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEGaussianPyramid(const NEGaussianPyramid &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEGaussianPyramid &operator=(const NEGaussianPyramid &) = delete;
- /** Allow instances of this class to be moved */
- NEGaussianPyramid(NEGaussianPyramid &&) = default;
- /** Allow instances of this class to be moved */
- NEGaussianPyramid &operator=(NEGaussianPyramid &&) = default;
- /** Default destructor */
- virtual ~NEGaussianPyramid() = default;
-
- /** Initialise the function's source, destinations and border mode.
- *
- * @param[in] input Source tensor. Data type supported: U8.
- * @param[out] pyramid Destination pyramid tensors, Data type supported at each level: U8.
- * @param[in] border_mode Border mode to use.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- *
- */
- virtual void configure(const ITensor *input, IPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) = 0;
-
-protected:
- const ITensor *_input;
- IPyramid *_pyramid;
- Pyramid _tmp;
-};
-
-/** Basic function to execute gaussian pyramid with HALF scale factor. This function calls the following NEON kernels:
- *
- * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref NEGaussianPyramidHorKernel
- * -# @ref NEGaussianPyramidVertKernel
- *
- */
-class NEGaussianPyramidHalf : public NEGaussianPyramid
-{
-public:
- /** Constructor */
- NEGaussianPyramidHalf();
-
- // Inherited methods overridden:
- void configure(const ITensor *input, IPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override;
- void run() override;
-
-private:
- std::vector<NEFillBorderKernel> _horizontal_border_handler;
- std::vector<NEFillBorderKernel> _vertical_border_handler;
- std::vector<NEGaussianPyramidHorKernel> _horizontal_reduction;
- std::vector<NEGaussianPyramidVertKernel> _vertical_reduction;
-};
-
-/** Basic function to execute gaussian pyramid with ORB scale factor. This function calls the following NEON kernels and functions:
- *
- * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref NEGaussian5x5
- * -# @ref NEScaleKernel
- *
- */
-class NEGaussianPyramidOrb : public NEGaussianPyramid
-{
-public:
- /** Constructor */
- NEGaussianPyramidOrb();
-
- // Inherited methods overridden:
- void configure(const ITensor *input, IPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override;
- void run() override;
-
-private:
- std::vector<NEGaussian5x5> _gaus5x5;
- std::vector<NEScale> _scale_nearest;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEGAUSSIANPYRAMID_H */
diff --git a/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h b/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h
index 7c470fbaf0..0f294fde22 100644
--- a/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 ARM Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,34 +23,35 @@
*/
#ifndef ARM_COMPUTE_NEGENERATEPROPOSALSLAYER_H
#define ARM_COMPUTE_NEGENERATEPROPOSALSLAYER_H
-#include "arm_compute/core/NEON/kernels/NEBoundingBoxTransformKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEPadLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEPermuteKernel.h"
-#include "arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h"
+
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CPP/CPPScheduler.h"
#include "arm_compute/runtime/CPP/functions/CPPBoxWithNonMaximaSuppressionLimit.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h"
+#include "arm_compute/runtime/NEON/functions/NEDequantizationLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEPadLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEPermute.h"
+#include "arm_compute/runtime/NEON/functions/NEQuantizationLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h"
#include "arm_compute/runtime/Tensor.h"
namespace arm_compute
{
class ITensor;
+class NEComputeAllAnchorsKernel;
/** Basic function to generate proposals for a RPN (Region Proposal Network)
*
- * This function calls the following Neon kernels:
- * -# @ref NEComputeAllAnchors
+ * This function calls the following Arm(R) Neon(TM) layers/kernels:
+ * -# @ref NEComputeAllAnchorsKernel
* -# @ref NEPermute x 2
* -# @ref NEReshapeLayer x 2
* -# @ref NEBoundingBoxTransform
* -# @ref NEPadLayerKernel
- * -# @ref NEDequantizationLayerKernel x 2
- * -# @ref NEQuantizationLayerKernel
+ * -# @ref NEDequantizationLayer x 2
+ * -# @ref NEQuantizationLayer
* And the following CPP kernels:
* -# @ref CPPBoxWithNonMaximaSuppressionLimit
*/
@@ -66,9 +67,21 @@ public:
NEGenerateProposalsLayer(const NEGenerateProposalsLayer &) = delete;
/** Prevent instances of this class from being copied (As this class contains pointers) */
NEGenerateProposalsLayer &operator=(const NEGenerateProposalsLayer &) = delete;
+ /** Default destructor */
+ ~NEGenerateProposalsLayer();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:--------------|:------------------|:--------|:--------------|
+ * |F16 |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |F32 |
+ * |QASYMM8 |QSYMM8 |QSYMM16 |QASYMM8 |
+ *
* @param[in] scores Scores from convolution layer of size (W, H, A), where H and W are the height and width of the feature map, and A is the number of anchors.
* Data types supported: QASYMM8/F16/F32
* @param[in] deltas Bounding box deltas from convolution layer of size (W, H, 4*A). Data types supported: Same as @p scores
@@ -82,7 +95,12 @@ public:
* @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the @ref GenerateProposalsInfo struct.
* @note Proposals contains all the proposals. Of those, only the first num_valid_proposals are valid.
*/
- void configure(const ITensor *scores, const ITensor *deltas, const ITensor *anchors, ITensor *proposals, ITensor *scores_out, ITensor *num_valid_proposals,
+ void configure(const ITensor *scores,
+ const ITensor *deltas,
+ const ITensor *anchors,
+ ITensor *proposals,
+ ITensor *scores_out,
+ ITensor *num_valid_proposals,
const GenerateProposalsInfo &info);
/** Static function to check if given info will lead to a valid configuration of @ref NEGenerateProposalsLayer
@@ -99,7 +117,11 @@ public:
*
* @return a Status
*/
- static Status validate(const ITensorInfo *scores, const ITensorInfo *deltas, const ITensorInfo *anchors, const ITensorInfo *proposals, const ITensorInfo *scores_out,
+ static Status validate(const ITensorInfo *scores,
+ const ITensorInfo *deltas,
+ const ITensorInfo *anchors,
+ const ITensorInfo *proposals,
+ const ITensorInfo *scores_out,
const ITensorInfo *num_valid_proposals,
const GenerateProposalsInfo &info);
@@ -110,17 +132,17 @@ private:
// Memory group manager
MemoryGroup _memory_group;
- // Neon kernels
- NEPermuteKernel _permute_deltas_kernel;
- NEReshapeLayerKernel _flatten_deltas_kernel;
- NEPermuteKernel _permute_scores_kernel;
- NEReshapeLayerKernel _flatten_scores_kernel;
- NEComputeAllAnchorsKernel _compute_anchors_kernel;
- NEBoundingBoxTransformKernel _bounding_box_kernel;
- NEPadLayerKernel _pad_kernel;
- NEDequantizationLayerKernel _dequantize_anchors;
- NEDequantizationLayerKernel _dequantize_deltas;
- NEQuantizationLayerKernel _quantize_all_proposals;
+ // kernels/layers
+ NEPermute _permute_deltas;
+ NEReshapeLayer _flatten_deltas;
+ NEPermute _permute_scores;
+ NEReshapeLayer _flatten_scores;
+ std::unique_ptr<NEComputeAllAnchorsKernel> _compute_anchors;
+ NEBoundingBoxTransform _bounding_box;
+ NEPadLayer _pad;
+ NEDequantizationLayer _dequantize_anchors;
+ NEDequantizationLayer _dequantize_deltas;
+ NEQuantizationLayer _quantize_all_proposals;
// CPP functions
CPPBoxWithNonMaximaSuppressionLimit _cpp_nms;
diff --git a/arm_compute/runtime/NEON/functions/NEHOGDescriptor.h b/arm_compute/runtime/NEON/functions/NEHOGDescriptor.h
deleted file mode 100644
index f0f46ce3ee..0000000000
--- a/arm_compute/runtime/NEON/functions/NEHOGDescriptor.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEHOGDESCRIPTOR_H
-#define ARM_COMPUTE_NEHOGDESCRIPTOR_H
-
-#include "arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/functions/NEHOGGradient.h"
-#include "arm_compute/runtime/Tensor.h"
-
-#include <memory>
-
-namespace arm_compute
-{
-class IHOG;
-/** Basic function to calculate HOG descriptor. This function calls the following NEON kernels:
- *
- * -# @ref NEHOGGradient
- * -# @ref NEHOGOrientationBinningKernel
- * -# @ref NEHOGBlockNormalizationKernel
- *
- */
-class NEHOGDescriptor : public IFunction
-{
-public:
- /** Default constructor */
- NEHOGDescriptor(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Initialise the function's source, destination, HOG data-object and border mode
- *
- * @param[in, out] input Input tensor. Data type supported: U8
- * (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Output tensor which stores the HOG descriptor. DataType supported: F32. The number of channels is equal to the number of histogram bins per block
- * @param[in] hog HOG data object which describes the HOG descriptor
- * @param[in] border_mode Border mode to use.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(ITensor *input, ITensor *output, const IHOG *hog, BorderMode border_mode, uint8_t constant_border_value = 0);
-
- // Inherited method overridden:
- void run() override;
-
-private:
- MemoryGroup _memory_group;
- NEHOGGradient _gradient;
- NEHOGOrientationBinningKernel _orient_bin;
- NEHOGBlockNormalizationKernel _block_norm;
- Tensor _mag;
- Tensor _phase;
- Tensor _hog_space;
-};
-}
-
-#endif /* ARM_COMPUTE_NEHOGDESCRIPTOR_H */
diff --git a/arm_compute/runtime/NEON/functions/NEHOGDetector.h b/arm_compute/runtime/NEON/functions/NEHOGDetector.h
deleted file mode 100644
index c0bd3da468..0000000000
--- a/arm_compute/runtime/NEON/functions/NEHOGDetector.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEHOGDETECTOR_H
-#define ARM_COMPUTE_NEHOGDETECTOR_H
-
-#include "arm_compute/core/IHOG.h"
-#include "arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h"
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
-namespace arm_compute
-{
-/** Basic function to execute HOG detector based on linear SVM. This function calls the following NEON kernel:
- *
- * -# @ref NEHOGDetectorKernel
- *
- */
-class NEHOGDetector : public INESimpleFunctionNoBorder
-{
-public:
- /** Initialise the kernel's input, output, HOG data object, detection window stride, threshold and index class
- *
- * @attention The function does not reset the number of values in @ref IDetectionWindowArray so it is caller's responsibility to clear it.
- *
- * @param[in] input Input tensor. It is the output of @ref NEHOGDescriptor. Data type supported: F32
- * @param[in] hog HOG data-object that describes the HOG descriptor
- * @param[out] detection_windows Array of @ref DetectionWindow used to store the detected objects
- * @param[in] detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions.
- * It must be multiple of the block stride stored in hog
- * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane
- * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to
- */
- void configure(const ITensor *input, const IHOG *hog, IDetectionWindowArray *detection_windows, const Size2D &detection_window_stride, float threshold = 0.0f, size_t idx_class = 0);
-};
-} // namespace arm_compute
-
-#endif /* ARM_COMPUTE_NEHOGDETECTOR_H */
diff --git a/arm_compute/runtime/NEON/functions/NEHOGGradient.h b/arm_compute/runtime/NEON/functions/NEHOGGradient.h
deleted file mode 100644
index f8c3827049..0000000000
--- a/arm_compute/runtime/NEON/functions/NEHOGGradient.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEHOGGRADIENT_H
-#define ARM_COMPUTE_NEHOGGRADIENT_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/functions/NEDerivative.h"
-#include "arm_compute/runtime/Tensor.h"
-
-#include <cstdint>
-#include <memory>
-
-namespace arm_compute
-{
-class ITensor;
-/** Basic function to calculate the gradient for HOG. This function calls the following NEON kernels:
- *
- * -# @ref NEDerivative
- * -# NEMagnitudePhaseKernel
- *
- */
-class NEHOGGradient : public IFunction
-{
-public:
- /** Default constructor */
- NEHOGGradient(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Initialise the function's source, destinations, phase type and border mode
- *
- * @param[in, out] input Input tensor. Data type supported: U8.
- * (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output_magnitude Output tensor (magnitude). Data type supported: U16.
- * @param[out] output_phase Output tensor.(phase). Format supported: U8
- * @param[in] phase_type Type of @ref PhaseType
- * @param[in] border_mode Border mode to use
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(ITensor *input, ITensor *output_magnitude, ITensor *output_phase, PhaseType phase_type, BorderMode border_mode, uint8_t constant_border_value = 0);
-
- // Inherited method overridden:
- void run() override;
-
-private:
- MemoryGroup _memory_group;
- NEDerivative _derivative;
- std::unique_ptr<INEKernel> _mag_phase;
- Tensor _gx;
- Tensor _gy;
-};
-}
-#endif /*ARM_COMPUTE_NEHOGGRADIENT_H */
diff --git a/arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h b/arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h
deleted file mode 100644
index 3840b9c0ad..0000000000
--- a/arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEHOGMULTIDETECTION_H
-#define ARM_COMPUTE_NEHOGMULTIDETECTION_H
-
-#include "arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h"
-#include "arm_compute/core/IArray.h"
-#include "arm_compute/core/IMultiHOG.h"
-#include "arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/functions/NEHOGDetector.h"
-#include "arm_compute/runtime/NEON/functions/NEHOGGradient.h"
-#include "arm_compute/runtime/Tensor.h"
-
-#include <memory>
-
-namespace arm_compute
-{
-/** Basic function to detect multiple objects (or the same object at different scales) on the same input image using HOG. This function calls the following NEON kernels:
- *
- * -# @ref NEHOGGradient
- * -# @ref NEHOGOrientationBinningKernel
- * -# @ref NEHOGBlockNormalizationKernel
- * -# @ref NEHOGDetector
- * -# @ref CPPDetectionWindowNonMaximaSuppressionKernel (executed if non_maxima_suppression == true)
- *
- * @note This implementation works if all the HOG data-objects within the IMultiHOG container have the same:
- * -# Phase type
- -# Normalization type
- -# L2 hysteresis threshold if the normalization type is L2HYS_NORM
- *
- */
-class NEHOGMultiDetection : public IFunction
-{
-public:
- /** Default constructor */
- NEHOGMultiDetection(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEHOGMultiDetection(const NEHOGMultiDetection &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEHOGMultiDetection &operator=(const NEHOGMultiDetection &) = delete;
- /** Initialise the function's source, destination, detection window strides, border mode, threshold and non-maxima suppression
- *
- * @param[in, out] input Input tensor. Data type supported: U8
- * (Written to only for @p border_mode != UNDEFINED)
- * @param[in] multi_hog Container of multiple HOG data object. Each HOG data object describes one HOG model to detect.
- * This container should store the HOG data-objects in descending or ascending cell_size width order.
- * This will help to understand if the HOG descriptor computation can be skipped for some HOG data-objects
- * @param[out] detection_windows Array of @ref DetectionWindow used for locating the detected objects
- * @param[in] detection_window_strides Array of @ref Size2D used to specify the distance in pixels between 2 consecutive detection windows in x and y directions for each HOG data-object
- * The dimension of this array must be the same of multi_hog->num_models()
- * The i-th detection_window_stride of this array must be multiple of the block_stride stored in the i-th multi_hog array
- * @param[in] border_mode Border mode to use.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane
- * @param[in] non_maxima_suppression (Optional) Flag to specify whether the non-maxima suppression is required or not.
- * True if the non-maxima suppression stage has to be computed
- * @param[in] min_distance (Optional) Radial Euclidean distance to use for the non-maxima suppression stage
- *
- */
- void configure(ITensor *input, const IMultiHOG *multi_hog, IDetectionWindowArray *detection_windows, const ISize2DArray *detection_window_strides, BorderMode border_mode,
- uint8_t constant_border_value = 0,
- float threshold = 0.0f, bool non_maxima_suppression = false, float min_distance = 1.0f);
-
- // Inherited method overridden:
- void run() override;
-
-private:
- MemoryGroup _memory_group;
- NEHOGGradient _gradient_kernel;
- std::vector<NEHOGOrientationBinningKernel> _orient_bin_kernel;
- std::vector<NEHOGBlockNormalizationKernel> _block_norm_kernel;
- std::vector<NEHOGDetector> _hog_detect_kernel;
- CPPDetectionWindowNonMaximaSuppressionKernel _non_maxima_kernel;
- std::vector<Tensor> _hog_space;
- std::vector<Tensor> _hog_norm_space;
- IDetectionWindowArray *_detection_windows;
- Tensor _mag;
- Tensor _phase;
- bool _non_maxima_suppression;
- size_t _num_orient_bin_kernel;
- size_t _num_block_norm_kernel;
- size_t _num_hog_detect_kernel;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEHOGMULTIDETECTION_H */
diff --git a/arm_compute/runtime/NEON/functions/NEHarrisCorners.h b/arm_compute/runtime/NEON/functions/NEHarrisCorners.h
deleted file mode 100644
index caf887d492..0000000000
--- a/arm_compute/runtime/NEON/functions/NEHarrisCorners.h
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEHARRISCORNERS_H
-#define ARM_COMPUTE_NEHARRISCORNERS_H
-
-#include "arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h"
-#include "arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/Array.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h"
-#include "arm_compute/runtime/Tensor.h"
-
-#include <cstdint>
-#include <memory>
-
-namespace arm_compute
-{
-class ITensor;
-using IImage = ITensor;
-
-/** Basic function to execute harris corners detection. This function calls the following NEON kernels and functions:
- *
- * -# @ref NESobel3x3 (if gradient_size == 3) or<br/>
- * @ref NESobel5x5 (if gradient_size == 5) or<br/>
- * @ref NESobel7x7 (if gradient_size == 7)
- * -# @ref NEFillBorderKernel
- * -# NEHarrisScoreKernel<3> (if block_size == 3) or<br/>
- * NEHarrisScoreKernel<5> (if block_size == 5) or<br/>
- * NEHarrisScoreKernel<7> (if block_size == 7)
- * -# @ref NENonMaximaSuppression3x3
- * -# @ref CPPCornerCandidatesKernel
- * -# @ref CPPSortEuclideanDistanceKernel
- *
- */
-class NEHarrisCorners : public IFunction
-{
-public:
- /** Constructor
- *
- * Initialize _sobel, _harris_score and _corner_list to nullptr.
- *
- * @param[in] memory_manager (Optional) Memory manager.
- */
- NEHarrisCorners(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Initialize the function's source, destination, conv and border_mode.
- *
- * @param[in, out] input Source image. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[in] threshold Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel).
- * @param[in] min_dist Radial Euclidean distance for the euclidean diatance stage
- * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation
- * @param[in] gradient_size The gradient window size to use on the input. The implementation supports 3, 5, and 7
- * @param[in] block_size The block window size used to compute the Harris Corner score. The implementation supports 3, 5, and 7.
- * @param[out] corners Array of keypoints to store the results.
- * @param[in] border_mode Border mode to use
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(IImage *input, float threshold, float min_dist, float sensitivity,
- int32_t gradient_size, int32_t block_size, KeyPointArray *corners,
- BorderMode border_mode, uint8_t constant_border_value = 0);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- MemoryGroup _memory_group; /**< Function's memory group */
- std::unique_ptr<IFunction> _sobel; /**< Sobel function */
- std::unique_ptr<INEHarrisScoreKernel> _harris_score; /**< Harris score kernel */
- NENonMaximaSuppression3x3 _non_max_suppr; /**< Non-maxima suppression function */
- CPPCornerCandidatesKernel _candidates; /**< Sort kernel */
- CPPSortEuclideanDistanceKernel _sort_euclidean; /**< Euclidean distance kernel */
- NEFillBorderKernel _border_gx; /**< Border handler before running harris score */
- NEFillBorderKernel _border_gy; /**< Border handler before running harris score */
- Image _gx; /**< Source image - Gx component */
- Image _gy; /**< Source image - Gy component */
- Image _score; /**< Source image - Harris score */
- Image _nonmax; /**< Source image - Non-Maxima suppressed image */
- std::vector<InternalKeypoint> _corners_list; /**< Array of InternalKeypoint. It stores the potential corner candidates */
- int32_t _num_corner_candidates; /**< Number of potential corner candidates */
-};
-}
-#endif /*ARM_COMPUTE_NEHARRISCORNERS_H */
diff --git a/arm_compute/runtime/NEON/functions/NEHistogram.h b/arm_compute/runtime/NEON/functions/NEHistogram.h
deleted file mode 100644
index e1a5e42de9..0000000000
--- a/arm_compute/runtime/NEON/functions/NEHistogram.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEHISTOGRAM_H
-#define ARM_COMPUTE_NEHISTOGRAM_H
-
-#include "arm_compute/core/NEON/kernels/NEHistogramKernel.h"
-#include "arm_compute/runtime/IFunction.h"
-
-#include <cstddef>
-#include <cstdint>
-#include <memory>
-
-namespace arm_compute
-{
-class IDistribution1D;
-
-/** Basic function to run @ref NEHistogramKernel. */
-class NEHistogram : public IFunction
-{
-public:
- /** Default Constructor. */
- NEHistogram();
- /** Initialise the kernel's inputs.
- *
- * @param[in] input Input image. Data type supported: U8.
- * @param[out] output Output distribution.
- */
- void configure(const IImage *input, IDistribution1D *output);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- NEHistogramKernel _histogram_kernel;
- std::vector<uint32_t> _local_hist;
- std::vector<uint32_t> _window_lut;
- size_t _local_hist_size;
- /** 256 possible pixel values as we handle only U8 images */
- static constexpr unsigned int window_lut_default_size = 256;
-};
-}
-#endif /*ARM_COMPUTE_NEHISTOGRAM_H */
diff --git a/arm_compute/runtime/NEON/functions/NEIm2Col.h b/arm_compute/runtime/NEON/functions/NEIm2Col.h
deleted file mode 100644
index cb905a3652..0000000000
--- a/arm_compute/runtime/NEON/functions/NEIm2Col.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEIM2COL_H
-#define ARM_COMPUTE_NEIM2COL_H
-
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
-#include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h"
-#include "arm_compute/core/Size2D.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Basic function to run @ref NEIm2ColKernel */
-class NEIm2Col : public IFunction
-{
-public:
- /** Default constructor */
- NEIm2Col();
- /** Configure the im2col NEON kernel
- *
- * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32
- * Note: QASYMM8 works only for has_bias = false
- * @param[out] output The output tensor. Data types supported: Same as @p input
- * @param[in] kernel_dims The kernel dimensions (width and height).
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- * @param[in] has_bias In case biases are provided expands the matrix with 1.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution
- */
- void configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation = Size2D(1U, 1U),
- unsigned int num_groups = 1);
- /** Static function to check if given info will lead to a valid configuration of @ref NEIm2Col
- *
- * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32
- * Note: QASYMM8 works only for has_bias = false
- * @param[in] output The output tensor. Data types supported: Same as @p input
- * @param[in] kernel_dims The kernel dimensions (width and height).
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- * @param[in] has_bias In case biases are provided expands the matrix with 1.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation = Size2D(1U, 1U),
- unsigned int num_groups = 1);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- NEIm2ColKernel _kernel;
- unsigned int _y_dim;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEIM2COL_H */
diff --git a/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayer.h b/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayer.h
index e128ec5835..0bc57be09e 100644
--- a/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,7 +24,6 @@
#ifndef ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYER_H
#define ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYER_H
-#include "arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
@@ -37,6 +36,7 @@
namespace arm_compute
{
class ITensor;
+class NEInstanceNormalizationLayerKernel;
/** Basic function to perform a Instance normalization.
*
@@ -48,8 +48,28 @@ class NEInstanceNormalizationLayer : public IFunction
public:
/** Constructor */
NEInstanceNormalizationLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEInstanceNormalizationLayer(const NEInstanceNormalizationLayer &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEInstanceNormalizationLayer &operator=(const NEInstanceNormalizationLayer &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEInstanceNormalizationLayer(NEInstanceNormalizationLayer &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEInstanceNormalizationLayer &operator=(NEInstanceNormalizationLayer &&) = delete;
+ /** Default destructor */
+ ~NEInstanceNormalizationLayer();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------|:---------|
+ * |F16 |F16 |
+ * |F32 |F32 |
+ *
* @param[in, out] input Source tensor. In case of @p output tensor = nullptr this tensor will store the result of the normalization.
* Data types supported: F16/F32. Data layout supported: NHWC, NCHW
* @param[out] output Destination tensor. Data types and data layouts supported: same as @p input.
@@ -69,19 +89,23 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, float gamma = 1.0f, float beta = 0.0f, float epsilon = 1e-12f);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *output,
+ float gamma = 1.0f,
+ float beta = 0.0f,
+ float epsilon = 1e-12f);
// Inherited methods overridden:
void run() override;
private:
- MemoryGroup _memory_group;
- NEInstanceNormalizationLayerKernel _normalization_kernel;
- bool _is_nchw;
- NEPermute _permute_input;
- NEPermute _permute_output;
- Tensor _permuted_input;
- Tensor _permuted_output;
+ MemoryGroup _memory_group;
+ std::unique_ptr<NEInstanceNormalizationLayerKernel> _normalization_kernel;
+ bool _is_nchw;
+ NEPermute _permute_input;
+ NEPermute _permute_output;
+ Tensor _permuted_input;
+ Tensor _permuted_output;
};
-}
+} // namespace arm_compute
#endif /* ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEIntegralImage.h b/arm_compute/runtime/NEON/functions/NEIntegralImage.h
deleted file mode 100644
index 2d7669d3ef..0000000000
--- a/arm_compute/runtime/NEON/functions/NEIntegralImage.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEINTEGRALIMAGE_H
-#define ARM_COMPUTE_NEINTEGRALIMAGE_H
-
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to run a @ref NEIntegralImageKernel */
-class NEIntegralImage : public INESimpleFunction
-{
-public:
- /** Initialise the function's source, destinations and border mode.
- *
- * @param[in] input Source tensor. Data type supported: U8.
- * @param[out] output Destination tensor. Data type supported: U32.
- */
- void configure(const ITensor *input, ITensor *output);
-};
-}
-#endif /*ARM_COMPUTE_NEINTEGRALIMAGE_H */
diff --git a/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h b/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h
index a581600dbb..8502cee5d2 100644
--- a/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,7 +24,6 @@
#ifndef ARM_COMPUTE_NEL2NORMALIZELAYER_H
#define ARM_COMPUTE_NEL2NORMALIZELAYER_H
-#include "arm_compute/core/NEON/kernels/NEL2NormalizeLayerKernel.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
@@ -36,6 +35,7 @@
namespace arm_compute
{
class ITensor;
+class NEL2NormalizeLayerKernel;
/** Basic function to perform a L2 normalization on a given axis.
*
@@ -48,14 +48,34 @@ class NEL2NormalizeLayer : public IFunction
public:
/** Constructor */
NEL2NormalizeLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEL2NormalizeLayer(const NEL2NormalizeLayer &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEL2NormalizeLayer &operator=(const NEL2NormalizeLayer &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEL2NormalizeLayer(NEL2NormalizeLayer &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEL2NormalizeLayer &operator=(NEL2NormalizeLayer &&) = delete;
+ /** Default destructor */
+ ~NEL2NormalizeLayer();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------|:---------|
+ * |F16 |F16 |
+ * |F32 |F32 |
+ *
* @param[in, out] input Source tensor. Data types supported: F16/F32. (Written to only for border_size != 0)
* @param[out] output Destination tensor. Data types and data layouts supported: same as @p input.
* @param[in] axis Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2
* @param[in] epsilon (Optional) Lower bound value for the normalization.
*/
- void configure(ITensor *input, ITensor *output, int axis, float epsilon = 1e-12f);
+ void configure(ITensor *input, ITensor *output, int axis, float epsilon = 1e-6f);
/** Static function to check if given info will lead to a valid configuration of @ref NEL2NormalizeLayer.
*
@@ -66,16 +86,16 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, int axis, float epsilon = 1e-12f);
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, int axis, float epsilon = 1e-6f);
// Inherited methods overridden:
void run() override;
private:
- MemoryGroup _memory_group;
- NEReductionOperation _reduce_func;
- NEL2NormalizeLayerKernel _normalize_kernel;
- Tensor _sumsq;
+ MemoryGroup _memory_group;
+ NEReductionOperation _reduce_func;
+ std::unique_ptr<NEL2NormalizeLayerKernel> _normalize_kernel;
+ Tensor _sumsq;
};
-}
+} // namespace arm_compute
#endif /* ARM_COMPUTE_NEL2NORMALIZELAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NELSTMLayer.h b/arm_compute/runtime/NEON/functions/NELSTMLayer.h
index e85e87b88e..629c5d10a0 100644
--- a/arm_compute/runtime/NEON/functions/NELSTMLayer.h
+++ b/arm_compute/runtime/NEON/functions/NELSTMLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 ARM Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,19 +24,18 @@
#ifndef ARM_COMPUTE_NELSTMLAYER_H
#define ARM_COMPUTE_NELSTMLAYER_H
-#include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h"
-#include "arm_compute/core/NEON/kernels/NECopyKernel.h"
-#include "arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h"
-
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/common/LSTMParams.h"
+#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
+#include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h"
#include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h"
+#include "arm_compute/runtime/NEON/functions/NECopy.h"
#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h"
#include "arm_compute/runtime/NEON/functions/NEGEMM.h"
#include "arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h"
-#include "arm_compute/runtime/common/LSTMParams.h"
+#include "arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h"
+#include "arm_compute/runtime/NEON/functions/NETranspose.h"
namespace arm_compute
{
@@ -49,8 +48,27 @@ class NELSTMLayer : public IFunction
public:
/** Default constructor */
NELSTMLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NELSTMLayer(const NELSTMLayer &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NELSTMLayer &operator=(const NELSTMLayer &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NELSTMLayer(NELSTMLayer &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NELSTMLayer &operator=(NELSTMLayer &&) = delete;
+ /** Default destructor */
+ ~NELSTMLayer();
/** Initialize function's tensors.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 - src13 | dst0 - dst3 |
+ * |:------------|:------------|
+ * |F16 |F16 |
+ * |F32 |F32 |
+ *
* @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: F16/F32.
* @param[in] input_to_forget_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input.
* @param[in] input_to_cell_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input.
@@ -86,13 +104,26 @@ public:
* @param[in] projection_threshold The clipping threshold for the output from the projection layer, such that values are bound within [-proj_clip, proj_clip].
* If set to 0.0 then clipping is disabled.
*/
- void configure(const ITensor *input,
- const ITensor *input_to_forget_weights, const ITensor *input_to_cell_weights, const ITensor *input_to_output_weights,
- const ITensor *recurrent_to_forget_weights, const ITensor *recurrent_to_cell_weights, const ITensor *recurrent_to_output_weights,
- const ITensor *forget_gate_bias, const ITensor *cell_bias, const ITensor *output_gate_bias,
- const ITensor *output_state_in, const ITensor *cell_state_in,
- ITensor *scratch_buffer, ITensor *output_state_out, ITensor *cell_state_out, ITensor *output,
- const LSTMParams<ITensor> &lstm_params, const ActivationLayerInfo &activation_info, float cell_threshold = 0.f, float projection_threshold = 0.f);
+ void configure(const ITensor *input,
+ const ITensor *input_to_forget_weights,
+ const ITensor *input_to_cell_weights,
+ const ITensor *input_to_output_weights,
+ const ITensor *recurrent_to_forget_weights,
+ const ITensor *recurrent_to_cell_weights,
+ const ITensor *recurrent_to_output_weights,
+ const ITensor *forget_gate_bias,
+ const ITensor *cell_bias,
+ const ITensor *output_gate_bias,
+ const ITensor *output_state_in,
+ const ITensor *cell_state_in,
+ ITensor *scratch_buffer,
+ ITensor *output_state_out,
+ ITensor *cell_state_out,
+ ITensor *output,
+ const LSTMParams<ITensor> &lstm_params,
+ const ActivationLayerInfo &activation_info,
+ float cell_threshold = 0.f,
+ float projection_threshold = 0.f);
/** Static function to check if given info will lead to a valid configuration of @ref NELSTMLayer
*
@@ -133,102 +164,115 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input,
- const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights,
- const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights,
- const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias,
- const ITensorInfo *output_state_in, const ITensorInfo *cell_state_in,
- const ITensorInfo *scratch_buffer, const ITensorInfo *output_state_out, const ITensorInfo *cell_state_out, const ITensorInfo *output,
- const LSTMParams<ITensorInfo> &lstm_params, const ActivationLayerInfo &activation_info, float cell_threshold = 0.f, float projection_threshold = 0.f);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *input_to_forget_weights,
+ const ITensorInfo *input_to_cell_weights,
+ const ITensorInfo *input_to_output_weights,
+ const ITensorInfo *recurrent_to_forget_weights,
+ const ITensorInfo *recurrent_to_cell_weights,
+ const ITensorInfo *recurrent_to_output_weights,
+ const ITensorInfo *forget_gate_bias,
+ const ITensorInfo *cell_bias,
+ const ITensorInfo *output_gate_bias,
+ const ITensorInfo *output_state_in,
+ const ITensorInfo *cell_state_in,
+ const ITensorInfo *scratch_buffer,
+ const ITensorInfo *output_state_out,
+ const ITensorInfo *cell_state_out,
+ const ITensorInfo *output,
+ const LSTMParams<ITensorInfo> &lstm_params,
+ const ActivationLayerInfo &activation_info,
+ float cell_threshold = 0.f,
+ float projection_threshold = 0.f);
// Inherited methods overridden:
void run() override;
void prepare() override;
private:
- MemoryGroup _memory_group;
- NEFullyConnectedLayer _fully_connected_input_gate;
- NEArithmeticAddition _accum_input_gate1;
- NEArithmeticSubtractionKernel _subtract_input_gate;
- NEPixelWiseMultiplicationKernel _pixelwise_mul_input_gate;
- NEActivationLayerKernel _activation_input_gate;
- NEFullyConnectedLayer _fully_connected_forget_gate;
- NEArithmeticAddition _accum_forget_gate1;
- NEPixelWiseMultiplicationKernel _pixelwise_mul_forget_gate;
- NEActivationLayerKernel _activation_forget_gate;
- NEFullyConnectedLayer _fully_connected_cell_state;
- NEGEMM _gemm_cell_state1;
- NETransposeKernel _transpose_cell_state;
- NEArithmeticAdditionKernel _accum_cell_state1;
- NEArithmeticAdditionKernel _accum_cell_state2;
- NEPixelWiseMultiplicationKernel _pixelwise_mul_cell_state1;
- NEActivationLayerKernel _activation_cell_state;
- NEActivationLayerKernel _cell_clip;
- NEPixelWiseMultiplicationKernel _pixelwise_mul_cell_state2;
- NEFullyConnectedLayer _fully_connected_output;
- NEPixelWiseMultiplicationKernel _pixelwise_mul_output_state1;
- NEArithmeticAddition _accum_output1;
- NEActivationLayerKernel _activation_output;
- NEActivationLayerKernel _activation_output_state;
- NEPixelWiseMultiplicationKernel _pixelwise_mul_output_state2;
- NEFullyConnectedLayer _fully_connected_output_state;
- NEActivationLayerKernel _projection_clip;
- NECopyKernel _copy_cell_state;
- NECopyKernel _copy_output;
- NEConcatenateLayer _concat_scratch_buffer;
- NEConcatenateLayer _concat_inputs_forget_gate;
- NEConcatenateLayer _concat_weights_forget_gate;
- NEConcatenateLayer _concat_weights_input_gate;
- NEConcatenateLayer _concat_weights_output;
- NEMeanStdDevNormalizationLayer _mean_std_norm_input_gate;
- NEPixelWiseMultiplicationKernel _pixelwise_mul_input_gate_coeff;
- NEArithmeticAdditionKernel _accum_input_gate_bias;
- NEMeanStdDevNormalizationLayer _mean_std_norm_forget_gate;
- NEPixelWiseMultiplicationKernel _pixelwise_mul_forget_gate_coeff;
- NEArithmeticAdditionKernel _accum_forget_gate_bias;
- NEMeanStdDevNormalizationLayer _mean_std_norm_cell_gate;
- NEPixelWiseMultiplicationKernel _pixelwise_mul_cell_gate_coeff;
- NEArithmeticAdditionKernel _accum_cell_gate_bias;
- NEMeanStdDevNormalizationLayer _mean_std_norm_output_gate;
- NEPixelWiseMultiplicationKernel _pixelwise_mul_output_gate_coeff;
- NEArithmeticAdditionKernel _accum_output_gate_bias;
- Tensor _input_gate_out1;
- Tensor _input_gate_out2;
- Tensor _input_gate_out3;
- Tensor _input_gate_out4;
- Tensor _forget_gate_out1;
- Tensor _forget_gate_out2;
- Tensor _forget_gate_out3;
- Tensor _forget_gate_out4;
- Tensor _forget_gate_out5;
- Tensor _forget_gate_out6;
- Tensor _cell_state_out1;
- Tensor _cell_state_out2;
- Tensor _cell_state_out3;
- Tensor _cell_state_out4;
- Tensor _cell_state_out5;
- Tensor _output1;
- Tensor _output2;
- Tensor _output3;
- Tensor _output4;
- Tensor _cell_state_activation;
- Tensor _output_state1;
- Tensor _ones;
- Tensor _input_layer_norm_out1;
- Tensor _input_layer_norm_out2;
- Tensor _forget_layer_norm_out1;
- Tensor _forget_layer_norm_out2;
- Tensor _cell_layer_norm_out1;
- Tensor _cell_layer_norm_out2;
- Tensor _output_layer_norm_out1;
- Tensor _output_layer_norm_out2;
- bool _run_peephole_opt;
- bool _run_cifg_opt;
- bool _perform_cell_clipping;
- bool _has_projection_weights;
- bool _perform_projection_clipping;
- bool _is_prepared;
- bool _is_layer_norm_lstm;
+ MemoryGroup _memory_group;
+ NEFullyConnectedLayer _fully_connected_input_gate;
+ NEArithmeticAddition _accum_input_gate1;
+ NEArithmeticSubtraction _subtract_input_gate;
+ NEPixelWiseMultiplication _pixelwise_mul_input_gate;
+ NEActivationLayer _activation_input_gate;
+ NEFullyConnectedLayer _fully_connected_forget_gate;
+ NEArithmeticAddition _accum_forget_gate1;
+ NEPixelWiseMultiplication _pixelwise_mul_forget_gate;
+ NEActivationLayer _activation_forget_gate;
+ NEFullyConnectedLayer _fully_connected_cell_state;
+ NEGEMM _gemm_cell_state1;
+ NETranspose _transpose_cell_state;
+ NEArithmeticAddition _accum_cell_state1;
+ NEArithmeticAddition _accum_cell_state2;
+ NEPixelWiseMultiplication _pixelwise_mul_cell_state1;
+ NEActivationLayer _activation_cell_state;
+ NEActivationLayer _cell_clip;
+ NEPixelWiseMultiplication _pixelwise_mul_cell_state2;
+ NEFullyConnectedLayer _fully_connected_output;
+ NEPixelWiseMultiplication _pixelwise_mul_output_state1;
+ NEArithmeticAddition _accum_output1;
+ NEActivationLayer _activation_output;
+ NEActivationLayer _activation_output_state;
+ NEPixelWiseMultiplication _pixelwise_mul_output_state2;
+ NEFullyConnectedLayer _fully_connected_output_state;
+ NEActivationLayer _projection_clip;
+ NECopy _copy_cell_state;
+ NECopy _copy_output;
+ NEConcatenateLayer _concat_scratch_buffer;
+ NEConcatenateLayer _concat_inputs_forget_gate;
+ NEConcatenateLayer _concat_weights_forget_gate;
+ NEConcatenateLayer _concat_weights_input_gate;
+ NEConcatenateLayer _concat_weights_output;
+ NEMeanStdDevNormalizationLayer _mean_std_norm_input_gate;
+ NEPixelWiseMultiplication _pixelwise_mul_input_gate_coeff;
+ NEArithmeticAddition _accum_input_gate_bias;
+ NEMeanStdDevNormalizationLayer _mean_std_norm_forget_gate;
+ NEPixelWiseMultiplication _pixelwise_mul_forget_gate_coeff;
+ NEArithmeticAddition _accum_forget_gate_bias;
+ NEMeanStdDevNormalizationLayer _mean_std_norm_cell_gate;
+ NEPixelWiseMultiplication _pixelwise_mul_cell_gate_coeff;
+ NEArithmeticAddition _accum_cell_gate_bias;
+ NEMeanStdDevNormalizationLayer _mean_std_norm_output_gate;
+ NEPixelWiseMultiplication _pixelwise_mul_output_gate_coeff;
+ NEArithmeticAddition _accum_output_gate_bias;
+ Tensor _input_gate_out1;
+ Tensor _input_gate_out2;
+ Tensor _input_gate_out3;
+ Tensor _input_gate_out4;
+ Tensor _forget_gate_out1;
+ Tensor _forget_gate_out2;
+ Tensor _forget_gate_out3;
+ Tensor _forget_gate_out4;
+ Tensor _forget_gate_out5;
+ Tensor _forget_gate_out6;
+ Tensor _cell_state_out1;
+ Tensor _cell_state_out2;
+ Tensor _cell_state_out3;
+ Tensor _cell_state_out4;
+ Tensor _cell_state_out5;
+ Tensor _output1;
+ Tensor _output2;
+ Tensor _output3;
+ Tensor _output4;
+ Tensor _cell_state_activation;
+ Tensor _output_state1;
+ Tensor _ones;
+ Tensor _input_layer_norm_out1;
+ Tensor _input_layer_norm_out2;
+ Tensor _forget_layer_norm_out1;
+ Tensor _forget_layer_norm_out2;
+ Tensor _cell_layer_norm_out1;
+ Tensor _cell_layer_norm_out2;
+ Tensor _output_layer_norm_out1;
+ Tensor _output_layer_norm_out2;
+ bool _run_peephole_opt;
+ bool _run_cifg_opt;
+ bool _perform_cell_clipping;
+ bool _has_projection_weights;
+ bool _perform_projection_clipping;
+ bool _is_prepared;
+ bool _is_layer_norm_lstm;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NELSTMLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h b/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h
index 2f3b8fd336..ae951669b3 100644
--- a/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h
+++ b/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,6 +25,7 @@
#define ARM_COMPUTE_NELSTMLAYERQUANTIZED_H
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/common/LSTMParams.h"
#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
#include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h"
@@ -38,8 +39,6 @@
#include "arm_compute/runtime/NEON/functions/NESlice.h"
#include "arm_compute/runtime/NEON/functions/NETranspose.h"
-#include "arm_compute/runtime/common/LSTMParams.h"
-
namespace arm_compute
{
// Forward declarations
@@ -47,10 +46,10 @@ class ITensor;
/** Basic function to run @ref NELSTMLayerQuantized
*
- * This function calls the following NEON functions/kernels:
+ * This function calls the following functions/kernels:
*
* -# @ref NEGEMMLowpMatrixMultiplyCore Quantized matrix multiplication core. Accumulators are 32-bit integers
- * -# @ref NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint Convert 32-bit integers into QSYMM16
+ * -# @ref NEGEMMLowpOutputStage Convert 32-bit integers into QSYMM16
* -# @ref NETranspose Matrix transpose
* -# @ref NEConcatenateLayer Tensor concatenation
* -# @ref NEActivationLayer Activation functions (tanh and logistic)
@@ -67,14 +66,24 @@ public:
NELSTMLayerQuantized(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
/** Prevent instances of this class from being copied (As this class contains pointers) */
NELSTMLayerQuantized(const NELSTMLayerQuantized &) = delete;
- /** Default move constructor */
- NELSTMLayerQuantized(NELSTMLayerQuantized &&) = default;
+ /** Prevent instances of this class from being moved (As this class contains pointers) */
+ NELSTMLayerQuantized(NELSTMLayerQuantized &&) = delete;
/** Prevent instances of this class from being copied (As this class contains pointers) */
NELSTMLayerQuantized &operator=(const NELSTMLayerQuantized &) = delete;
- /** Default move assignment operator */
- NELSTMLayerQuantized &operator=(NELSTMLayerQuantized &&) = default;
+ /** Prevent instances of this class from being moved (As this class contains pointers) */
+ NELSTMLayerQuantized &operator=(NELSTMLayerQuantized &&) = delete;
+ /** Default destructor */
+ ~NELSTMLayerQuantized();
/** Initialize function's tensors.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 - src8 |src9 - src12 |src13 |src14 |dst0 |dst1 |
+ * |:-----------|:------------|:-------|:------|:------|:------|
+ * |QASYMM8 |S32 |QSYMM16 |QASYMM8|QSYMM16|QASYMM8|
+ *
* @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: QASYMM8.
* @param[in] input_to_input_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input.
* @param[in] input_to_forget_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input.
@@ -94,11 +103,22 @@ public:
* @param[out] output_state_out Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size].Data types supported: Same as @p input.
*/
void configure(const ITensor *input,
- const ITensor *input_to_input_weights, const ITensor *input_to_forget_weights, const ITensor *input_to_cell_weights, const ITensor *input_to_output_weights,
- const ITensor *recurrent_to_input_weights, const ITensor *recurrent_to_forget_weights, const ITensor *recurrent_to_cell_weights, const ITensor *recurrent_to_output_weights,
- const ITensor *input_gate_bias, const ITensor *forget_gate_bias, const ITensor *cell_bias, const ITensor *output_gate_bias,
- ITensor *cell_state_in, const ITensor *output_state_in,
- ITensor *cell_state_out, ITensor *output_state_out);
+ const ITensor *input_to_input_weights,
+ const ITensor *input_to_forget_weights,
+ const ITensor *input_to_cell_weights,
+ const ITensor *input_to_output_weights,
+ const ITensor *recurrent_to_input_weights,
+ const ITensor *recurrent_to_forget_weights,
+ const ITensor *recurrent_to_cell_weights,
+ const ITensor *recurrent_to_output_weights,
+ const ITensor *input_gate_bias,
+ const ITensor *forget_gate_bias,
+ const ITensor *cell_bias,
+ const ITensor *output_gate_bias,
+ ITensor *cell_state_in,
+ const ITensor *output_state_in,
+ ITensor *cell_state_out,
+ ITensor *output_state_out);
/** Static function to check if given info will lead to a valid configuration of @ref NELSTMLayer
*
@@ -123,11 +143,22 @@ public:
* @return a status
*/
static Status validate(const ITensorInfo *input,
- const ITensorInfo *input_to_input_weights, const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights,
- const ITensorInfo *recurrent_to_input_weights, const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights,
- const ITensorInfo *input_gate_bias, const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias,
- const ITensorInfo *cell_state_in, const ITensorInfo *output_state_in,
- const ITensorInfo *cell_state_out, const ITensorInfo *output_state_out);
+ const ITensorInfo *input_to_input_weights,
+ const ITensorInfo *input_to_forget_weights,
+ const ITensorInfo *input_to_cell_weights,
+ const ITensorInfo *input_to_output_weights,
+ const ITensorInfo *recurrent_to_input_weights,
+ const ITensorInfo *recurrent_to_forget_weights,
+ const ITensorInfo *recurrent_to_cell_weights,
+ const ITensorInfo *recurrent_to_output_weights,
+ const ITensorInfo *input_gate_bias,
+ const ITensorInfo *forget_gate_bias,
+ const ITensorInfo *cell_bias,
+ const ITensorInfo *output_gate_bias,
+ const ITensorInfo *cell_state_in,
+ const ITensorInfo *output_state_in,
+ const ITensorInfo *cell_state_out,
+ const ITensorInfo *output_state_out);
// Inherited methods overridden:
void run() override;
@@ -137,30 +168,30 @@ private:
MemoryGroup _memory_group;
// Functions used
- NEGEMMLowpMatrixMultiplyCore _gemmlowp;
- NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint _output_stage;
- NETranspose _transpose_weights;
- NEConcatenateLayer _concat_input_weights;
- NEConcatenateLayer _concat_recurrent_weights;
- NEConcatenateLayer _concat_weights;
- NEConcatenateLayer _concat_inputs;
- NEConcatenateLayer _concat_bias;
- NEActivationLayer _sigmoid_forget_gate;
- NEActivationLayer _sigmoid_input_gate;
- NEActivationLayer _sigmoid_output_gate;
- NEActivationLayer _tanh_modulation_gate;
- NEActivationLayer _tanh_output_state;
- NEArithmeticAddition _add1;
- NEArithmeticAddition _add2;
- NEPixelWiseMultiplication _mul1;
- NEPixelWiseMultiplication _mul2;
- NEPixelWiseMultiplication _mul3;
- NESlice _slice_input_tensor;
- NESlice _slice_forget_tensor;
- NESlice _slice_cell_tensor;
- NESlice _slice_output_tensor;
- NEDequantizationLayer _dequantize;
- NEQuantizationLayer _quantize;
+ NEGEMMLowpMatrixMultiplyCore _gemmlowp;
+ NEGEMMLowpOutputStage _output_stage;
+ NETranspose _transpose_weights;
+ NEConcatenateLayer _concat_input_weights;
+ NEConcatenateLayer _concat_recurrent_weights;
+ NEConcatenateLayer _concat_weights;
+ NEConcatenateLayer _concat_inputs;
+ NEConcatenateLayer _concat_bias;
+ NEActivationLayer _sigmoid_forget_gate;
+ NEActivationLayer _sigmoid_input_gate;
+ NEActivationLayer _sigmoid_output_gate;
+ NEActivationLayer _tanh_modulation_gate;
+ NEActivationLayer _tanh_output_state;
+ NEArithmeticAddition _add1;
+ NEArithmeticAddition _add2;
+ NEPixelWiseMultiplication _mul1;
+ NEPixelWiseMultiplication _mul2;
+ NEPixelWiseMultiplication _mul3;
+ NESlice _slice_input_tensor;
+ NESlice _slice_forget_tensor;
+ NESlice _slice_cell_tensor;
+ NESlice _slice_output_tensor;
+ NEDequantizationLayer _dequantize;
+ NEQuantizationLayer _quantize;
// Tensor pointers
const ITensor *_input_to_input_weights;
diff --git a/arm_compute/runtime/NEON/functions/NELaplacianPyramid.h b/arm_compute/runtime/NEON/functions/NELaplacianPyramid.h
deleted file mode 100644
index 5389f67bad..0000000000
--- a/arm_compute/runtime/NEON/functions/NELaplacianPyramid.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NELAPLACIANPYRAMID_H
-#define ARM_COMPUTE_NELAPLACIANPYRAMID_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h"
-#include "arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h"
-#include "arm_compute/runtime/NEON/functions/NEGaussianPyramid.h"
-#include "arm_compute/runtime/Pyramid.h"
-
-#include <cstddef>
-#include <cstdint>
-#include <memory>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to execute laplacian pyramid. This function calls the following NEON kernels and functions:
- *
- * -# @ref NEGaussianPyramidHalf
- * -# @ref NEGaussian5x5
- * -# @ref NEArithmeticSubtraction
- *
- * First a Gaussian pyramid is created. Then, for each level i, the corresponding tensor I(i) is blurred with the Gaussian 5x5 filter, and then
- * difference between the two tensors is the corresponding level L(i) of the Laplacian pyramid.
- * L(i) = I(i) - Gaussian5x5(I(i))
- * Level 0 has always the same first two dimensions as the input tensor.
-*/
-class NELaplacianPyramid : public IFunction
-{
-public:
- /** Constructor */
- NELaplacianPyramid();
- /** Initialise the function's source, destinations and border mode.
- *
- * @param[in] input Source tensor. Data type supported: U8.
- * @param[out] pyramid Destination pyramid tensors, Data type supported at each level: S16.
- * @param[out] output The lowest resolution tensor necessary to reconstruct the input tensor from the pyramid. Data type supported: S16.
- * The first two dimensions of this tensor must match the first two dimensions of the tensor in the last level of the pyramid, that is:
- * out.width = in.width() / pow(2,pyramid_levels-1) and out.height = in.height() / pow(2,pyramid_levels-1)
- * @param[in] border_mode Border mode to use.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- *
- */
- void configure(const ITensor *input, IPyramid *pyramid, ITensor *output, BorderMode border_mode, uint8_t constant_border_value);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- size_t _num_levels;
- NEGaussianPyramidHalf _gaussian_pyr_function;
- std::vector<NEGaussian5x5> _convf;
- std::vector<NEArithmeticSubtraction> _subf;
- Pyramid _gauss_pyr;
- Pyramid _conv_pyr;
- NEDepthConvertLayer _depth_function;
-};
-}
-#endif /*ARM_COMPUTE_NELAPLACIANPYRAMID_H */
diff --git a/arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h b/arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h
deleted file mode 100644
index f939725d51..0000000000
--- a/arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NELAPLACIANRECONSTRUCT_H
-#define ARM_COMPUTE_NELAPLACIANRECONSTRUCT_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
-#include "arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEScale.h"
-#include "arm_compute/runtime/Pyramid.h"
-
-#include <cstdint>
-#include <memory>
-
-namespace arm_compute
-{
-class ITensor;
-using IImage = ITensor;
-
-/** Basic function to execute laplacian reconstruction. This function calls the following NEON kernels and functions:
- *
- * -# @ref NEArithmeticAddition
- * -# @ref NEScale
- * -# @ref NEDepthConvertLayer
- *
- * This function reconstructs the original image from a Laplacian Image Pyramid.
- *
- * The input image is added to the last level of the Laplacian pyramid L(n-2), the resulting image is upsampled to the
- * resolution of the next pyramid level.
- *
- * I(n-2) = upsample( input + L(n-1)
- *
- * For each pyramid level i, except i=0 and i=n-1:
- * I(i-1) = upsample(I(i) + L(i))
- *
- * output = I(0) + L(0)
-*/
-class NELaplacianReconstruct : public IFunction
-{
-public:
- /** Constructor */
- NELaplacianReconstruct();
- /** Initialise the function's source, destinations and border mode.
- *
- * The Output image must have the same size as the first level of the pyramid.
- * The Input image must have the same size as the last level of the pyramid.
- *
- * The idea is to reconstuct the original hi-res image from a low-res representation of it and the laplacian pyramid.
- *
- * @param[in] pyramid Laplacian pyramid tensors, Data type supported at each level: S16.
- * @param[in] input Source tensor. Data type supported: S16.
- * @param[out] output Output tensor. Data type supported: U8.
- * @param[in] border_mode Border mode to use for the convolution.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- *
- */
- void configure(const IPyramid *pyramid, ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- Pyramid _tmp_pyr;
- std::vector<NEArithmeticAddition> _addf;
- std::vector<NEScale> _scalef;
- NEDepthConvertLayer _depthf;
-};
-}
-#endif /*ARM_COMPUTE_NELAPLACIANRECONSTRUCT_H */
diff --git a/arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h
deleted file mode 100644
index b2f2b88fce..0000000000
--- a/arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NELOCALLYCONNECTEDLAYER_H
-#define ARM_COMPUTE_NELOCALLYCONNECTEDLAYER_H
-
-#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/core/NEON/kernels/NECol2ImKernel.h"
-#include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h"
-#include "arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h"
-#include "arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/Tensor.h"
-
-#include <memory>
-
-namespace arm_compute
-{
-class INETensor;
-
-/** Basic function to compute the locally connected layer. This function calls the following NEON kernels:
- *
- * -# @ref NEWeightsReshapeKernel (executed only once for each configuration)
- * -# @ref NEIm2ColKernel
- * -# @ref NELocallyConnectedMatrixMultiplyKernel
- * -# @ref NECol2ImKernel
- */
-class NELocallyConnectedLayer : public IFunction
-{
-public:
- /** Default constructor */
- NELocallyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NELocallyConnectedLayer(const NELocallyConnectedLayer &) = delete;
- /** Default move constructor */
- NELocallyConnectedLayer(NELocallyConnectedLayer &&) = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NELocallyConnectedLayer &operator=(const NELocallyConnectedLayer &) = delete;
- /** Default move assignment operator */
- NELocallyConnectedLayer &operator=(NELocallyConnectedLayer &&) = default;
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: F16, F32.
- * @param[in] weights Weights tensor. Weights are 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches]. Data type supported:Same as @p input.
- * @param[in] biases Biases tensor. Shared biases supported. Biases are 2D tensor with dimensions [OFM, num_patches]. Data type supported:Same as @p input.
- * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
- * Data types supported: Same as @p input.
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- */
- void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info);
- /** Static function to check if given info will lead to a valid configuration of @ref NELocallyConnectedLayer
- *
- * @param[in] input Input tensor info. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: F16, F32.
- * @param[in] weights Weights tensor info. Weights are 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches]. Data type supported:Same as @p input.
- * @param[in] biases Biases tensor info. Shared biases supported. Biases are 2D tensor with dimensions [OFM, num_patches]. Data type supported:Same as @p input.
- * @param[in] output Output tensor info. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
- * Data types supported: Same as @p input.
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info);
-
- // Inherited methods overridden:
- void run() override;
- void prepare() override;
-
-private:
- MemoryGroup _memory_group;
- NEIm2ColKernel _input_im2col_kernel;
- NEWeightsReshapeKernel _weights_reshape_kernel;
- NELocallyConnectedMatrixMultiplyKernel _mm_kernel;
- NECol2ImKernel _output_col2im_kernel;
- Tensor _input_im2col_reshaped;
- Tensor _weights_reshaped;
- Tensor _gemm_output;
- bool _is_prepared;
- const ITensor *_original_weights;
-};
-}
-#endif /* ARM_COMPUTE_NELOCALLYCONNECTEDLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NELogical.h b/arm_compute/runtime/NEON/functions/NELogical.h
new file mode 100644
index 0000000000..0ad23200c6
--- /dev/null
+++ b/arm_compute/runtime/NEON/functions/NELogical.h
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 2020-2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NELOGICAL_H
+#define ARM_COMPUTE_NELOGICAL_H
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+// Forward declarations
+class ITensor;
+class ITensorInfo;
+
+/** Basic function to perform logical AND */
+class NELogicalAnd : public IFunction
+{
+public:
+ /** Constructor */
+ NELogicalAnd();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NELogicalAnd(const NELogicalAnd &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NELogicalAnd(NELogicalAnd &&) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NELogicalAnd &operator=(const NELogicalAnd &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NELogicalAnd &operator=(NELogicalAnd &&) = delete;
+ /** Destructor */
+ ~NELogicalAnd();
+
+ /** Initialise the kernel's inputs and output
+ *
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:-------------|:------------|
+ * |U8 |U8 |U8 |
+ *
+ * @param[in] input1 First tensor input. Data type supported: U8.
+ * @param[in] input2 Second tensor input. Data type supported: U8.
+ * @param[out] output Output tensor. Data type supported: U8.
+ */
+ void configure(const ITensor *input1, const ITensor *input2, ITensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref NELogicalAnd
+ *
+ * @param[in] input1 First input tensor info. Data types supported: U8.
+ * @param[in] input2 Second input tensor info. Data types supported: U8.
+ * @param[in] output Output tensor info. Data type supported: U8
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
+
+ // Inherited methods overridden
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
+};
+
+/** Basic function to perform logical OR */
+class NELogicalOr : public IFunction
+{
+public:
+ /** Constructor */
+ NELogicalOr();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NELogicalOr(const NELogicalOr &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NELogicalOr(NELogicalOr &&) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NELogicalOr &operator=(const NELogicalOr &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NELogicalOr &operator=(NELogicalOr &&) = delete;
+ /** Destructor */
+ ~NELogicalOr();
+
+ /** Initialise the kernel's inputs and output
+ *
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:-------------|:------------|
+ * |U8 |U8 |U8 |
+ *
+ * @param[in] input1 First tensor input. Data type supported: U8.
+ * @param[in] input2 Second tensor input. Data type supported: U8.
+ * @param[out] output Output tensor. Data type supported: U8.
+ */
+ void configure(const ITensor *input1, const ITensor *input2, ITensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref NELogicalOr
+ *
+ * @param[in] input1 First input tensor info. Data types supported: U8.
+ * @param[in] input2 Second input tensor info. Data types supported: U8.
+ * @param[in] output Output tensor info. Data type supported: U8
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
+
+ // Inherited methods overridden
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
+};
+
+/** Basic function to perform logical NOT */
+class NELogicalNot : public IFunction
+{
+public:
+ /** Constructor */
+ NELogicalNot();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NELogicalNot(const NELogicalNot &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NELogicalNot(NELogicalNot &&) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NELogicalNot &operator=(const NELogicalNot &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NELogicalNot &operator=(NELogicalNot &&) = delete;
+ /** Destructor */
+ ~NELogicalNot();
+
+ /** Initialise the kernel's inputs and output
+ *
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:-------------|
+ * |U8 |U8 |
+ *
+ * @param[in] input Input tensor. Data type supported: U8.
+ * @param[out] output Output tensor. Data type supported: U8.
+ */
+ void configure(const ITensor *input, ITensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref NELogicalNot
+ *
+ * @param[in] input Input tensor info. Data types supported: U8.
+ * @param[in] output Output tensor info. Data type supported: U8
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+
+ // Inherited methods overridden
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NELOGICAL_H */
diff --git a/arm_compute/runtime/NEON/functions/NEMagnitude.h b/arm_compute/runtime/NEON/functions/NEMagnitude.h
deleted file mode 100644
index 168500050e..0000000000
--- a/arm_compute/runtime/NEON/functions/NEMagnitude.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEMAGNITUDE_H
-#define ARM_COMPUTE_NEMAGNITUDE_H
-
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to run @ref NEMagnitudePhaseKernel */
-class NEMagnitude : public INESimpleFunctionNoBorder
-{
-public:
- /** Initialise the kernel's inputs.
- *
- * @param[in] input1 First tensor input. Data type supported: S16.
- * @param[in] input2 Second tensor input. Data type supported: S16.
- * @param[out] output Output tensor. Data type supported: S16.
- * @param[in] mag_type (Optional) Magnitude calculation type. Default: L2NORM.
- */
- void configure(const ITensor *input1, const ITensor *input2, ITensor *output, MagnitudeType mag_type = MagnitudeType::L2NORM);
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEMAGNITUDE_H */
diff --git a/arm_compute/runtime/NEON/functions/NEMatMul.h b/arm_compute/runtime/NEON/functions/NEMatMul.h
new file mode 100644
index 0000000000..58dd7a6f6b
--- /dev/null
+++ b/arm_compute/runtime/NEON/functions/NEMatMul.h
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEMATMUL_H
+#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEMATMUL_H
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+/** Settings for MatMul Cpu implementation*/
+class CpuMatMulSettings
+{
+public:
+ // get fast math flag
+ bool fast_math() const
+ {
+ return _fast_math;
+ }
+ // get fixed format flag
+ bool fixed_format() const
+ {
+ return _fixed_format;
+ }
+ // Set fast math flag
+ CpuMatMulSettings &fast_math(bool fmath)
+ {
+ _fast_math = fmath;
+ return *this;
+ }
+ // Set fixed format flag
+ CpuMatMulSettings &fixed_format(bool fixed_format)
+ {
+ _fixed_format = fixed_format;
+ return *this;
+ }
+
+private:
+ bool _fast_math{false};
+ bool _fixed_format{false};
+};
+
+// Forward declarations
+class ITensor;
+class ITensorInfo;
+class MatMulInfo;
+class Status;
+
+/** Basic function to run the following operators:
+ *
+ * -# @ref cpu::CpuMatMul
+ */
+class NEMatMul : public IFunction
+{
+public:
+ /** Constructor */
+ NEMatMul();
+ /** Destructor */
+ ~NEMatMul();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEMatMul(const NEMatMul &) = delete;
+ /** Default move constructor */
+ NEMatMul(NEMatMul &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEMatMul &operator=(const NEMatMul &) = delete;
+ /** Default move assignment operator */
+ NEMatMul &operator=(NEMatMul &&) = default;
+ /** Initialize
+ *
+ * Valid data layouts:
+ * - Any
+ *
+ * Valid data type configurations:
+ * |lhs |rhs |dst |
+ * |:--------------|:------------------|:--------------|
+ * |F32 |F32 |F32 |
+ * |F16 |F16 |F16 |
+ * |BFLOAT16 |BFLOAT16 |BFLOAT16 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |QASYMM8 |QASYMM8 |QASYMM8 |
+ *
+ * @param[in] lhs Left-hand side tensor info. Data types supported: F16/F32/QASYMM8_SIGNED/QASYMM8.
+ * @param[in] rhs Right-hand side tensor info. Data types supported: same as @p lhs.
+ * @param[out] dst Output tensor to store the result of the batched matrix multiplication. Data types supported: same as @p lhs / @p rhs.
+ * @param[in] info Contains MatMul operation information described in @ref MatMulInfo.
+ * @param[in] settings Contains flags for function level settings i.e fast math
+ * @param[in] act_info (Optional) Contains activation function and lower and upper bound values for bounded activation functions.
+ */
+ void configure(ITensor *lhs,
+ ITensor *rhs,
+ ITensor *dst,
+ const MatMulInfo &info,
+ const CpuMatMulSettings &settings,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ /** Static function to check if given info will lead to a valid configuration of @ref NEMatMul
+ *
+ * @param[in] lhs Left-hand side tensor info. Data types supported: F16/F32/QASYMM8_SIGNED/QASYMM8.
+ * @param[in] rhs Right-hand side tensor info. Data types supported: same as @p lhs.
+ * @param[out] dst Output tensor info to store the result of the batched matrix multiplication. Data types supported: same as @p lhs / @p rhs.
+ * @param[in] info Contains MatMul operation information described in @ref MatMulInfo.
+ * @param[in] settings Contains flags for function level settings i.e fast math
+ * @param[in] act_info (Optional) Contains activation function and lower and upper bound values for bounded activation functions.
+ *
+ * @return Status
+ */
+ static Status validate(const ITensorInfo *lhs,
+ const ITensorInfo *rhs,
+ const ITensorInfo *dst,
+ const MatMulInfo &info,
+ const CpuMatMulSettings &settings,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
+
+ // Inherited methods overridden
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
+};
+} // namespace arm_compute
+#endif // ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEMATMUL_H
diff --git a/arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h b/arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h
new file mode 100644
index 0000000000..e00fc4544f
--- /dev/null
+++ b/arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2020-2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEMAXUNPOOLINGLAYER_H
+#define ARM_COMPUTE_NEMAXUNPOOLINGLAYER_H
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+class ITensor;
+class ITensorInfo;
+class NEFill;
+
+/** Function to perform MaxUnpooling. This function calls the following kernels:
+ *
+ * -# @ref NEFill
+ */
+class NEMaxUnpoolingLayer : public IFunction
+{
+public:
+ /** Constructor */
+ NEMaxUnpoolingLayer();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEMaxUnpoolingLayer(const NEMaxUnpoolingLayer &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEMaxUnpoolingLayer &operator=(const NEMaxUnpoolingLayer &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEMaxUnpoolingLayer(NEMaxUnpoolingLayer &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEMaxUnpoolingLayer &operator=(NEMaxUnpoolingLayer &&) = delete;
+ /** Default destructor */
+ ~NEMaxUnpoolingLayer();
+ /** Set the input and output tensors.
+ *
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |F16 |F16 |
+ * |F32 |F32 |
+ *
+ * @note Only supported pool size 2
+ *
+ * @param[in, out] input Source tensor. (Written to only when padding != 0) Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[out] output Destination tensor. Data types supported: Same as @p input.
+ * @param[out] indices The indices of the maximal values. Data type supported: U32.
+ * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
+ */
+ void configure(ITensor *input, ITensor *indices, ITensor *output, const PoolingLayerInfo &pool_info);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEMaxUnpoolingLayer
+ *
+ * @note Only supported pool size 2
+ *
+ * @param[in] input Source tensor. (Written to only when padding != 0) Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] indices The indices of the maximal values. Data type supported: U32.
+ * @param[in] output Destination tensor. Data types supported: Same as @p input.
+ * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *indices,
+ const ITensorInfo *output,
+ const PoolingLayerInfo &pool_info);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ std::unique_ptr<NEFill> _fill_func;
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NEMAXUNPOOLINGLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEMeanStdDev.h b/arm_compute/runtime/NEON/functions/NEMeanStdDev.h
deleted file mode 100644
index 954b2228dd..0000000000
--- a/arm_compute/runtime/NEON/functions/NEMeanStdDev.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEMEANSTDDEV_H
-#define ARM_COMPUTE_NEMEANSTDDEV_H
-
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/IFunction.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-/** Basic function to execute mean and std deviation. This function calls the following NEON kernels:
- *
- * @ref NEMeanStdDevKernel
- *
- */
-class NEMeanStdDev : public IFunction
-{
-public:
- /** Default Constructor. */
- NEMeanStdDev();
- /** Initialise the kernel's inputs and outputs.
- *
- * @param[in, out] input Input image. Data types supported: U8. (Written to only for border filling)
- * @param[out] mean Output average pixel value.
- * @param[out] stddev (Optional) Output standard deviation of pixel values.
- */
- void configure(IImage *input, float *mean, float *stddev = nullptr);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- NEMeanStdDevKernel _mean_stddev_kernel; /**< Kernel that standard deviation calculation. */
- NEFillBorderKernel _fill_border_kernel; /**< Kernel that fills tensor's borders with zeroes. */
- uint64_t _global_sum; /**< Variable that holds the global sum among calls in order to ease reduction */
- uint64_t _global_sum_squared; /**< Variable that holds the global sum of squared values among calls in order to ease reduction */
-};
-}
-#endif /*ARM_COMPUTE_NEMEANSTDDEV_H */
diff --git a/arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h b/arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h
index 3ce2b2792b..41aa81946b 100644
--- a/arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -30,13 +30,36 @@
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
/** Basic function to execute mean and standard deviation normalization by calling @ref NEMeanStdDevNormalizationKernel */
class NEMeanStdDevNormalizationLayer : public INESimpleFunctionNoBorder
{
public:
+ /** Constructor */
+ NEMeanStdDevNormalizationLayer() = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEMeanStdDevNormalizationLayer(const NEMeanStdDevNormalizationLayer &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEMeanStdDevNormalizationLayer &operator=(const NEMeanStdDevNormalizationLayer &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEMeanStdDevNormalizationLayer(NEMeanStdDevNormalizationLayer &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEMeanStdDevNormalizationLayer &operator=(NEMeanStdDevNormalizationLayer &&) = delete;
+ /** Default destructor */
+ ~NEMeanStdDevNormalizationLayer();
/** Initialise the function's input and outputs.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------|:---------|
+ * |F32 |F32 |
+ * |F16 |F16 |
+ *
* @note If the output tensor is a nullptr, the normalization will be performed in-place.
*
* @param[in, out] input Input tensor with 2 dimensions. Data types supported: F16/F32.
diff --git a/arm_compute/runtime/NEON/functions/NEMedian3x3.h b/arm_compute/runtime/NEON/functions/NEMedian3x3.h
deleted file mode 100644
index 55064f8a8c..0000000000
--- a/arm_compute/runtime/NEON/functions/NEMedian3x3.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEMEDIAN3x3_H
-#define ARM_COMPUTE_NEMEDIAN3x3_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to execute median filter. This function calls the following NEON kernels:
- *
- * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref NEMedian3x3Kernel
- *
- */
-class NEMedian3x3 : public INESimpleFunction
-{
-public:
- /** Initialise the function's source, destinations and border mode.
- *
- * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination tensor, Data type supported: U8.
- * @param[in] border_mode Border mode to use for the convolution.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- *
- */
- void configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value = 0);
-};
-}
-#endif /*ARM_COMPUTE_NEMEDIAN3x3_H */
diff --git a/arm_compute/runtime/NEON/functions/NEMinMaxLocation.h b/arm_compute/runtime/NEON/functions/NEMinMaxLocation.h
deleted file mode 100644
index 89b6874320..0000000000
--- a/arm_compute/runtime/NEON/functions/NEMinMaxLocation.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEMINMAXLOCATION_H
-#define ARM_COMPUTE_NEMINMAXLOCATION_H
-
-#include "arm_compute/core/IArray.h"
-#include "arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h"
-#include "arm_compute/runtime/Array.h"
-#include "arm_compute/runtime/IFunction.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-using IImage = ITensor;
-
-/** Basic function to execute min and max location. This function calls the following NEON kernels:
- *
- * -# NEMinMaxKernel
- * -# NEMinMaxLocationKernel
- */
-class NEMinMaxLocation : public IFunction
-{
-public:
- /** Constructor */
- NEMinMaxLocation();
- /** Initialise the kernel's inputs and outputs.
- *
- * @param[in] input Input image. Data types supported: U8/S16/F32.
- * @param[out] min Minimum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32.
- * @param[out] max Maximum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32.
- * @param[out] min_loc (Optional) Array of minimum value locations.
- * @param[out] max_loc (Optional) Array of maximum value locations.
- * @param[out] min_count (Optional) Number of minimum value encounters.
- * @param[out] max_count (Optional) Number of maximum value encounters.
- */
- void configure(const IImage *input, void *min, void *max,
- ICoordinates2DArray *min_loc = nullptr, ICoordinates2DArray *max_loc = nullptr,
- uint32_t *min_count = nullptr, uint32_t *max_count = nullptr);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- NEMinMaxKernel _min_max; /**< Kernel that performs min/max */
- NEMinMaxLocationKernel _min_max_loc; /**< Kernel that extracts min/max locations */
-};
-}
-#endif /*ARM_COMPUTE_NEMINMAXLOCATION_H */
diff --git a/arm_compute/runtime/NEON/functions/NENonLinearFilter.h b/arm_compute/runtime/NEON/functions/NENonLinearFilter.h
deleted file mode 100644
index a758e040c6..0000000000
--- a/arm_compute/runtime/NEON/functions/NENonLinearFilter.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NENONLINEARFILTER_H
-#define ARM_COMPUTE_NENONLINEARFILTER_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to execute non linear filter. This function calls the following NEON kernels:
- *
- * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref NENonLinearFilterKernel
- *
- * @note Supported mask dimensions squares of sizes 3, 5
- */
-class NENonLinearFilter : public INESimpleFunction
-{
-public:
- /** Initialize the function's source, destination, conv and border_mode.
- *
- * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination tensor. Data type supported: U8
- * @param[in] function Non linear function to perform
- * @param[in] mask_size Mask size. Supported sizes: 3, 5
- * @param[in] pattern Mask pattern
- * @param[in] mask The given mask. Will be used only if pattern is specified to PATTERN_OTHER
- * @param[in] border_mode Strategy to use for borders.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(ITensor *input, ITensor *output, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, BorderMode border_mode,
- uint8_t constant_border_value = 0);
-};
-}
-#endif /*ARM_COMPUTE_NENONLINEARFILTER_H */
diff --git a/arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h b/arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h
deleted file mode 100644
index cb8b202811..0000000000
--- a/arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NENONMAXIMASUPPRESSION3X3_H
-#define ARM_COMPUTE_NENONMAXIMASUPPRESSION3X3_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to execute non-maxima suppression over a 3x3 window. This function calls the following NEON kernels:
- *
- * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref NENonMaximaSuppression3x3Kernel
- *
- */
-class NENonMaximaSuppression3x3 : public INESimpleFunction
-{
-public:
- /** Initialise the function's source, destinations and border mode.
- *
- * @note The implementation supports just 2 border modes: UNDEFINED and CONSTANT
- * The constant values used with CONSTANT border mode is 0
- *
- * @param[in, out] input Source tensor. Data type supported: U8/F32. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination for the Non-Maxima suppressions 3x3. Data type supported: same as @p input
- * @param[in] border_mode Border mode to use for non-maxima suppression. The implementation supports just 2 border modes: UNDEFINED and CONSTANT
- *
- */
- void configure(ITensor *input, ITensor *output, BorderMode border_mode);
-};
-}
-#endif /* ARM_COMPUTE_NENONMAXIMASUPPRESSION3X3_H */
diff --git a/arm_compute/runtime/NEON/functions/NENormalizationLayer.h b/arm_compute/runtime/NEON/functions/NENormalizationLayer.h
index af34147bfe..27e3fa674e 100644
--- a/arm_compute/runtime/NEON/functions/NENormalizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NENormalizationLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,14 +24,11 @@
#ifndef ARM_COMPUTE_NENORMALIZATIONLAYER_H
#define ARM_COMPUTE_NENORMALIZATIONLAYER_H
-#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h"
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h"
#include "arm_compute/runtime/Tensor.h"
#include <memory>
@@ -39,10 +36,11 @@
namespace arm_compute
{
class ITensor;
+class NENormalizationLayerKernel;
-/** Basic function to compute a normalization layer. This function calls the following NEON kernels:
+/** Basic function to compute a normalization layer. This function calls the following kernels:
*
- * -# @ref NEPixelWiseMultiplicationKernel
+ * -# @ref NEPixelWiseMultiplication
* -# @ref NEFillBorderKernel
* -# @ref NENormalizationLayerKernel
*
@@ -52,8 +50,28 @@ class NENormalizationLayer : public IFunction
public:
/** Default constructor */
NENormalizationLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NENormalizationLayer(const NENormalizationLayer &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NENormalizationLayer &operator=(const NENormalizationLayer &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NENormalizationLayer(NENormalizationLayer &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NENormalizationLayer &operator=(NENormalizationLayer &&) = delete;
+ /** Default destructor */
+ ~NENormalizationLayer();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------|:---------|
+ * |F32 |F32 |
+ * |F16 |F16 |
+ *
* @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
* and an optional 4th dimension for batch of inputs. Data type supported: F16/F32. Data layouts supported: NCHW/NHWC.
* @param[out] output Destination with the same dimensions, data type, data layout and number of channels of @p input
@@ -69,17 +87,17 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const NormalizationLayerInfo &norm_info);
+ static Status
+ validate(const ITensorInfo *input, const ITensorInfo *output, const NormalizationLayerInfo &norm_info);
// Inherited methods overridden:
void run() override;
private:
- MemoryGroup _memory_group; /**< Function memory group */
- NENormalizationLayerKernel _norm_kernel; /**< Normalization layer kernel */
- NEPixelWiseMultiplicationKernel _multiply_kernel; /**< Pixel multiplication kernel */
- NEFillBorderKernel _border_handler; /**< Kernel to handle borders */
- Tensor _input_squared; /**< The intermediate buffer which stores results of squaring input */
+ MemoryGroup _memory_group; /**< Function memory group */
+ std::unique_ptr<NENormalizationLayerKernel> _norm_kernel; /**< Normalization layer kernel */
+ NEPixelWiseMultiplication _multiply_f; /**< Pixel multiplication function */
+ Tensor _input_squared; /**< The intermediate buffer which stores results of squaring input */
};
-}
+} // namespace arm_compute
#endif /* ARM_COMPUTE_NENORMALIZATIONLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEOpticalFlow.h b/arm_compute/runtime/NEON/functions/NEOpticalFlow.h
deleted file mode 100644
index 95068aaee0..0000000000
--- a/arm_compute/runtime/NEON/functions/NEOpticalFlow.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEOPTICALFLOW_H
-#define ARM_COMPUTE_NEOPTICALFLOW_H
-
-#include "arm_compute/core/IArray.h"
-#include "arm_compute/core/NEON/kernels/NELKTrackerKernel.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/Array.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/functions/NEScharr3x3.h"
-#include "arm_compute/runtime/Tensor.h"
-
-#include <cstddef>
-#include <cstdint>
-#include <memory>
-
-namespace arm_compute
-{
-class Pyramid;
-
-/** Array of LK Internel Keypoints */
-using LKInternalKeypointArray = Array<NELKInternalKeypoint>;
-/** Basic function to execute optical flow. This function calls the following NEON kernels and functions:
- *
- * -# @ref NEScharr3x3
- * -# @ref NELKTrackerKernel
- *
- */
-class NEOpticalFlow : public IFunction
-{
-public:
- /** Constructor
- *
- * @param[in] memory_manager (Optional) Memory manager.
- */
- NEOpticalFlow(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEOpticalFlow(const NEOpticalFlow &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEOpticalFlow &operator=(const NEOpticalFlow &) = delete;
- /** Initialise the function input and output
- *
- * @param[in] old_pyramid Pointer to the pyramid for the old tensor. Data type supported U8
- * @param[in] new_pyramid Pointer to the pyramid for the new tensor. Data type supported U8
- * @param[in] old_points Pointer to the IKeyPointArray storing old key points
- * @param[in] new_points_estimates Pointer to the IKeyPointArray storing new estimates key points
- * @param[out] new_points Pointer to the IKeyPointArray storing new key points
- * @param[in] termination The criteria to terminate the search of each keypoint.
- * @param[in] epsilon The error for terminating the algorithm
- * @param[in] num_iterations The maximum number of iterations before terminate the alogrithm
- * @param[in] window_dimension The size of the window on which to perform the algorithm
- * @param[in] use_initial_estimate The flag to indicate whether the initial estimated position should be used
- * @param[in] border_mode The border mode applied at scharr kernel stage
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT
- *
- */
- void configure(const Pyramid *old_pyramid, const Pyramid *new_pyramid, const IKeyPointArray *old_points, const IKeyPointArray *new_points_estimates,
- IKeyPointArray *new_points, Termination termination, float epsilon, unsigned int num_iterations, size_t window_dimension,
- bool use_initial_estimate, BorderMode border_mode, uint8_t constant_border_value = 0);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- MemoryGroup _memory_group;
- std::vector<NEScharr3x3> _func_scharr;
- std::vector<NELKTrackerKernel> _kernel_tracker;
- std::vector<Tensor> _scharr_gx;
- std::vector<Tensor> _scharr_gy;
- IKeyPointArray *_new_points;
- const IKeyPointArray *_new_points_estimates;
- const IKeyPointArray *_old_points;
- LKInternalKeypointArray _new_points_internal;
- LKInternalKeypointArray _old_points_internal;
- unsigned int _num_levels;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEOPTICALFLOW_H */
diff --git a/arm_compute/runtime/NEON/functions/NEPReluLayer.h b/arm_compute/runtime/NEON/functions/NEPReluLayer.h
index 102a165383..81d5fd162c 100644
--- a/arm_compute/runtime/NEON/functions/NEPReluLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEPReluLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,21 +25,47 @@
#define ARM_COMPUTE_NEPRELULAYER_H
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
-/** Basic function to run @ref NEArithmeticOperationKernel for PRELU
+/** Basic function to run @ref cpu::kernels::CpuArithmeticKernel for PRELU
*
* @note The function implements an activation layer with the PRELU activation function.
*/
-class NEPReluLayer : public INESimpleFunction
+class NEPReluLayer : public IFunction
{
public:
+ /** Default Constructor */
+ NEPReluLayer();
+ /** Default Destructor */
+ ~NEPReluLayer();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEPReluLayer(const NEPReluLayer &) = delete;
+ /** Default move constructor */
+ NEPReluLayer(NEPReluLayer &&);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEPReluLayer &operator=(const NEPReluLayer &) = delete;
+ /** Default move assignment operator */
+ NEPReluLayer &operator=(NEPReluLayer &&);
/** Set the input and output tensor.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |F16 |F16 |
+ * |F32 |F32 |
+ *
* @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
* @param[in] alpha Source alpha tensor. Data types supported: same of @p input.
* @param[out] output Destination tensor. Data type supported: same as @p input
@@ -54,6 +80,13 @@ public:
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *alpha, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEPRELULAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEPadLayer.h b/arm_compute/runtime/NEON/functions/NEPadLayer.h
index d3074e70bc..494b1c0641 100644
--- a/arm_compute/runtime/NEON/functions/NEPadLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEPadLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,24 +24,26 @@
#ifndef ARM_COMPUTE_NEPADLAYER_H
#define ARM_COMPUTE_NEPADLAYER_H
+#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h"
+#include "arm_compute/runtime/NEON/functions/NECopy.h"
#include "arm_compute/runtime/NEON/functions/NEStridedSlice.h"
#include "arm_compute/runtime/SubTensor.h"
-
-#include "arm_compute/core/NEON/kernels/NECopyKernel.h"
-#include "arm_compute/core/NEON/kernels/NEPadLayerKernel.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/Tensor.h"
+#include <memory>
+
namespace arm_compute
{
-/** Basic function to pad a tensor. This function calls the following NEON functions/kernels:
+class NEPadLayerKernel;
+
+/** Basic function to pad a tensor. This function calls the following functions/kernels:
*
* - For padding mode = PaddingMode::CONSTANT:
* -# @ref NEPadLayerKernel
* - Otherwise:
- * -# @ref NECopyKernel
+ * -# @ref NECopy
* -# @ref NEStridedSlice
* -# @ref NEConcatenateLayer
*
@@ -49,10 +51,29 @@ namespace arm_compute
class NEPadLayer : public IFunction
{
public:
- /** Default constructor*/
+ /** Default Constructor */
NEPadLayer();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEPadLayer(const NEPadLayer &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEPadLayer &operator=(const NEPadLayer &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEPadLayer(NEPadLayer &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEPadLayer &operator=(NEPadLayer &&) = delete;
+ /** Default destructor */
+ ~NEPadLayer();
/** Initialize the function
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------|:---------|
+ * |All |All |
+ *
* @param[in] input Source tensor. Data types supported: All.
* @param[out] output Output tensor. Data type supported: same as @p input
* @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i]
@@ -61,7 +82,11 @@ public:
* @param[in] mode (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT,
* or reflect the input, either including the border values (SYMMETRIC) or not (REFLECT).
*/
- void configure(ITensor *input, ITensor *output, const PaddingList &padding, const PixelValue constant_value = PixelValue(), const PaddingMode mode = PaddingMode::CONSTANT);
+ void configure(ITensor *input,
+ ITensor *output,
+ const PaddingList &padding,
+ const PixelValue constant_value = PixelValue(),
+ const PaddingMode mode = PaddingMode::CONSTANT);
/** Static function to check if given info will lead to a valid configuration of @ref NEPadLayer.
*
* @param[in] input Source tensor info. Data types supported: All.
@@ -74,7 +99,11 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding, const PixelValue constant_value = PixelValue(), const PaddingMode mode = PaddingMode::CONSTANT);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *output,
+ const PaddingList &padding,
+ const PixelValue constant_value = PixelValue(),
+ const PaddingMode mode = PaddingMode::CONSTANT);
// Inherited methods overridden:
void run() override;
@@ -88,7 +117,10 @@ private:
* specifies the front and the end padding in the i-th dimension.
* @param[in] constant_value Constant value to be used for the padding
*/
- void configure_constant_mode(ITensor *input, ITensor *output, const PaddingList &padding, const PixelValue constant_value);
+ void configure_constant_mode(ITensor *input,
+ ITensor *output,
+ const PaddingList &padding,
+ const PixelValue constant_value);
/** Configure functions for when reflect or symmetric padding is used.
*
* @param[in] input Source tensor. Data types supported: All.
@@ -97,15 +129,15 @@ private:
void configure_reflect_symmetric_mode(ITensor *input, ITensor *output);
private:
- NECopyKernel _copy_kernel;
- NEPadLayerKernel _pad_kernel;
- PaddingMode _mode;
- PaddingList _padding;
- uint32_t _num_dimensions;
- std::vector<NEStridedSlice> _slice_functions;
- std::vector<NEConcatenateLayer> _concat_functions;
- std::vector<Tensor> _slice_results;
- std::vector<Tensor> _concat_results;
+ NECopy _copy_function;
+ std::unique_ptr<NEPadLayerKernel> _pad_kernel;
+ PaddingMode _mode;
+ PaddingList _padding;
+ uint32_t _num_dimensions;
+ std::vector<NEStridedSlice> _slice_functions;
+ std::vector<NEConcatenateLayer> _concat_functions;
+ std::vector<Tensor> _slice_results;
+ std::vector<Tensor> _concat_results;
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_NEPADLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEPermute.h b/arm_compute/runtime/NEON/functions/NEPermute.h
index 4651b30e8e..2cef64764d 100644
--- a/arm_compute/runtime/NEON/functions/NEPermute.h
+++ b/arm_compute/runtime/NEON/functions/NEPermute.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,20 +24,43 @@
#ifndef ARM_COMPUTE_NEPERMUTE_H
#define ARM_COMPUTE_NEPERMUTE_H
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
namespace arm_compute
{
// Forward declarations
class ITensor;
+class ITensorInfo;
-/** Basic function to run @ref NEPermuteKernel */
-class NEPermute : public INESimpleFunctionNoBorder
+/** Basic function to run @ref cpu::kernels::CpuPermuteKernel */
+class NEPermute : public IFunction
{
public:
- /** Configure the permute NEON kernel
+ /** Default Constructor */
+ NEPermute();
+ /** Default Destructor */
+ ~NEPermute();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEPermute(const NEPermute &) = delete;
+ /** Default move constructor */
+ NEPermute(NEPermute &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEPermute &operator=(const NEPermute &) = delete;
+ /** Default move assignment operator */
+ NEPermute &operator=(NEPermute &&) = default;
+ /** Configure the permute function
+ *
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:------|:------|
+ * |All |All |
*
* @note Arbitrary permutation vectors are supported with rank not greater than 4
*
@@ -57,6 +80,13 @@ public:
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PermutationVector &perm);
+
+ // Inherited methods overridden
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEPERMUTE_H */
diff --git a/arm_compute/runtime/NEON/functions/NEPhase.h b/arm_compute/runtime/NEON/functions/NEPhase.h
deleted file mode 100644
index 220681e9f1..0000000000
--- a/arm_compute/runtime/NEON/functions/NEPhase.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEPHASE_H
-#define ARM_COMPUTE_NEPHASE_H
-
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to run @ref NEMagnitudePhaseKernel */
-class NEPhase : public INESimpleFunctionNoBorder
-{
-public:
- /** Initialise the kernel's inputs, output.
- *
- * @param[in] input1 First tensor input. Data type supported: S16.
- * @param[in] input2 Second tensor input. Data type supported: S16.
- * @param[out] output Output tensor. Data type supported: U8.
- * @param[in] phase_type (Optional) Phase calculation type. Default: SIGNED.
- */
- void configure(const ITensor *input1, const ITensor *input2, ITensor *output, PhaseType phase_type = PhaseType::SIGNED);
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEPHASE_H */
diff --git a/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h b/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h
index 2b31032931..3d81bf6087 100644
--- a/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h
+++ b/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 ARM Limited.
+ * Copyright (c) 2016-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,25 +24,60 @@
#ifndef ARM_COMPUTE_NEPIXELWISEMULTIPLICATION_H
#define ARM_COMPUTE_NEPIXELWISEMULTIPLICATION_H
+#include "arm_compute/core/Rounding.h"
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
-/** Basic function to run @ref NEPixelWiseMultiplicationKernel */
-class NEPixelWiseMultiplication : public INESimpleFunction
+/** Basic function to run @ref cpu::CpuMul */
+class NEPixelWiseMultiplication : public IFunction
{
public:
+ /** Default Constructor */
+ NEPixelWiseMultiplication();
+ /** Default Destructor */
+ ~NEPixelWiseMultiplication();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEPixelWiseMultiplication(const NEPixelWiseMultiplication &) = delete;
+ /** Default move constructor */
+ NEPixelWiseMultiplication(NEPixelWiseMultiplication &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEPixelWiseMultiplication &operator=(const NEPixelWiseMultiplication &) = delete;
+ /** Default move assignment operator */
+ NEPixelWiseMultiplication &operator=(NEPixelWiseMultiplication &&) = default;
/** Initialise the kernel's inputs, output and convertion policy.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |QSYMM16 |QSYMM16 |QASYMM16 |
+ * |QSYMM16 |QSYMM16 |S32 |
+ * |U8 |U8 |U8 |
+ * |U8 |U8 |S16 |
+ * |U8 |S16 |S16 |
+ * |S16 |U8 |S16 |
+ * |S16 |S16 |S16 |
+ * |F16 |F16 |F16 |
+ * |F32 |S32 |F32 |
+ *
* @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported.
* For all other scale values only round to zero (implemented as round towards minus infinity) is supported.
*
- * @param[in, out] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32
+ * @param[in, out] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/QSYMM16/F16/F32
* This input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
- * @param[in, out] input2 An input tensor. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), QASYMM8_SIGNED (only if @p input1 is QASYMM8_SIGNED), S16, QSYMM16 (only if @p input1 is QSYMM16), F16 (only if @p input1 is F16), F32 (only if @p input1 is F32).
+ * @param[in, out] input2 An input tensor. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), QASYMM8_SIGNED (only if @p input1 is QASYMM8_SIGNED), S16, S32, QSYMM16 (only if @p input1 is QSYMM16), F16 (only if @p input1 is F16), F32 (only if @p input1 is F32).
* This input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
* @param[out] output Output tensor. Data types supported:
* - U8, only if both inputs are U8.
@@ -50,49 +85,80 @@ public:
* - QASYMM8_SIGNED, only if @p input1 is QASYMM8_SIGNED.
* - S16.
* - QSYMM16, only if both inputs are QSYMM16.
- * - S32, only if both inputs are QSYMM16.
+ * - S32, only if both inputs are S32 or both are QSYMM16.
* - F16, only if @p input1 is F16.
* - F32, only if both inputs are F32.
* @param[in] scale Scale to apply after multiplication.
* Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
- * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if datatype is QASYMM8, QASYMM8_SIGNED or QSYMM16.
+ * If both @p input1, @p input2 and @p output are of datatype S32, scale cannot be 1/255
+ * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if any of the inputs is of quantized datatype
* @param[in] rounding_policy Rounding policy.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
*/
- void configure(ITensor *input1, ITensor *input2, ITensor *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy,
+ void configure(const ITensor *input1,
+ const ITensor *input2,
+ ITensor *output,
+ float scale,
+ ConvertPolicy overflow_policy,
+ RoundingPolicy rounding_policy,
const ActivationLayerInfo &act_info = ActivationLayerInfo());
/** Static function to check if given info will lead to a valid configuration of @ref NEPixelWiseMultiplication
*
* @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported.
* For all other scale values only round to zero (implemented as round towards minus infinity) is supported.
*
- * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32
- * @param[in] input2 An input tensor info. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), QASYMM8_SIGNED (only if @p input1 is QASYMM8_SIGNED), S16, QSYMM16 (only if both inputs are QSYMM16), F16 (only if @p input1 is F16), F32 (only if @p input1 is F32).
+ * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/QSYMM16/F16/F32
+ * @param[in] input2 An input tensor info. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), QASYMM8_SIGNED (only if @p input1 is QASYMM8_SIGNED), S16, S32, QSYMM16 (only if both inputs are QSYMM16), F16 (only if @p input1 is F16), F32 (only if @p input1 is F32).
* @param[in] output Output tensor info. Data types supported:
* - U8, only if both inputs are U8.
* - QASYMM8, only if both inputs are QASYMM8.
* - QASYMM8_SIGNED, only if @p input1 is QASYMM8_SIGNED.
* - S16.
* - QSYMM16, only if both inputs are QSYMM16.
- * - S32, only if both inputs are QSYMM16.
+ * - S32, only if both inputs are S32 or both are QSYMM16.
* - F16, only if @p input1 is F16.
* - F32, only if both inputs are F32.
* @param[in] scale Scale to apply after multiplication.
* Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
- * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if datatype is QASYMM8, QASYMM8_SIGNED or QSYMM16.
+ * If both @p input1, @p input2 and @p output are of datatype S32, scale cannot be 1/255
+ * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if any of the inputs is of quantized datatype
* @param[in] rounding_policy Rounding policy.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
*
* @return a status
*/
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy,
+ static Status validate(const ITensorInfo *input1,
+ const ITensorInfo *input2,
+ const ITensorInfo *output,
+ float scale,
+ ConvertPolicy overflow_policy,
+ RoundingPolicy rounding_policy,
const ActivationLayerInfo &act_info = ActivationLayerInfo());
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
-/** Basic function to run @ref NEComplexPixelWiseMultiplicationKernel. */
-class NEComplexPixelWiseMultiplication : public INESimpleFunction
+/** Basic function to run @ref cpu::CpuComplexMul. */
+class NEComplexPixelWiseMultiplication : public IFunction
{
public:
+ /** Default Constructor */
+ NEComplexPixelWiseMultiplication();
+ /** Default Destructor */
+ ~NEComplexPixelWiseMultiplication();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEComplexPixelWiseMultiplication(const NEComplexPixelWiseMultiplication &) = delete;
+ /** Default move constructor */
+ NEComplexPixelWiseMultiplication(NEComplexPixelWiseMultiplication &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEComplexPixelWiseMultiplication &operator=(const NEComplexPixelWiseMultiplication &) = delete;
+ /** Default move assignment operator */
+ NEComplexPixelWiseMultiplication &operator=(NEComplexPixelWiseMultiplication &&) = default;
/** Initialise the kernel's inputs, output.
*
* @param[in, out] input1 An input tensor. Data types supported: F32. Number of channels supported: 2 (complex tensor).
@@ -102,7 +168,10 @@ public:
* @param[out] output The output tensor. Data types supported: same as @p input1. Number of channels: same as @p input1.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
*/
- void configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ void configure(ITensor *input1,
+ ITensor *input2,
+ ITensor *output,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
/** Static function to check if given info will lead to a valid configuration of @ref NEComplexPixelWiseMultiplication
*
* @param[in] input1 An input tensor info. Data types supported: F32. Number of channels supported: 2 (complex tensor).
@@ -110,7 +179,17 @@ public:
* @param[in] output The output tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
*/
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ static Status validate(const ITensorInfo *input1,
+ const ITensorInfo *input2,
+ const ITensorInfo *output,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
-}
+} // namespace arm_compute
#endif /*ARM_COMPUTE_NEPIXELWISEMULTIPLICATION_H */
diff --git a/arm_compute/runtime/NEON/functions/NEPooling3dLayer.h b/arm_compute/runtime/NEON/functions/NEPooling3dLayer.h
new file mode 100644
index 0000000000..09251f2a5f
--- /dev/null
+++ b/arm_compute/runtime/NEON/functions/NEPooling3dLayer.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEPOOLING3DLAYER_H
+#define ARM_COMPUTE_NEPOOLING3DLAYER_H
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+// Forward declarations
+class ITensor;
+class ITensorInfo;
+class IMemoryManager;
+/** Basic function to simulate a pooling 3d layer with the specified pooling operation. This function calls the following kernels:
+ *
+ * -# @ref cpu::CpuPool3d
+ */
+class NEPooling3dLayer : public IFunction
+{
+public:
+ /** Constructor */
+ NEPooling3dLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEPooling3dLayer(const NEPooling3dLayer &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEPooling3dLayer &operator=(const NEPooling3dLayer &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEPooling3dLayer(NEPooling3dLayer &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEPooling3dLayer &operator=(NEPooling3dLayer &&) = delete;
+ /** Default destructor */
+ ~NEPooling3dLayer();
+ /** Set the input and output tensors.
+ *
+ * Valid data layouts:
+ * - NDHWC
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |F16 |F16 |
+ * |F32 |F32 |
+ * |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ *
+ * @note Source tensor is padded with -inf for MAX pooling and 0 otherwise
+ *
+ * @param[in] input Source tensor. Data types supported: F16/F32/QASYMM8/QASYMM8_SIGNED.
+ * @param[out] output Destination tensor.
+ * @param[in] pool_info Contains pooling operation information described in @ref Pooling3dLayerInfo.
+ */
+ void configure(const ITensor *input, ITensor *output, const Pooling3dLayerInfo &pool_info);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEPooling3dLayer
+ *
+ *
+ * @param[in] input Source tensor info. Data types supported: F16/F32/QASYMM8/QASYMM8_SIGNED.
+ * @param[in] output Destination tensor info.
+ * @param[in] pool_info Contains pooling operation information described in @ref Pooling3dLayerInfo.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Pooling3dLayerInfo &pool_info);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NEPOOLING3DLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEPoolingLayer.h b/arm_compute/runtime/NEON/functions/NEPoolingLayer.h
index e43741c95b..768ad0d818 100644
--- a/arm_compute/runtime/NEON/functions/NEPoolingLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEPoolingLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 ARM Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,29 +24,55 @@
#ifndef ARM_COMPUTE_NEPOOLINGLAYER_H
#define ARM_COMPUTE_NEPOOLINGLAYER_H
+#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IMemoryManager.h"
+#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h"
-#include "arm_compute/core/Types.h"
+#include <memory>
namespace arm_compute
{
+// Forward declarations
class ITensor;
+class ITensorInfo;
-/** Basic function to simulate a pooling layer with the specified pooling operation. This function calls the following NEON kernels:
+/** Basic function to simulate a pooling layer with the specified pooling operation. This function calls the following kernels:
*
- * -# @ref NEFillBorderKernel (executed if padding size is different from zero)
- * -# @ref NEPoolingLayerKernel
+ * -# @ref cpu::CpuPool2d
*/
class NEPoolingLayer : public IFunction
{
public:
/** Constructor */
- NEPoolingLayer();
+ NEPoolingLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEPoolingLayer(const NEPoolingLayer &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEPoolingLayer &operator=(const NEPoolingLayer &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEPoolingLayer(NEPoolingLayer &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEPoolingLayer &operator=(NEPoolingLayer &&) = delete;
+ /** Default destructor */
+ ~NEPoolingLayer();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |F16 |F16 |
+ * |F32 |F32 |
+ *
* @note F16 is supported for pool sizes 2 and 3 only
+ * @note Source tensor is padded with -inf for MAX pooling and 0 otherwise
+ * Cases where pooling region is completely outside input tensor are only supported for floating point data type
*
* @param[in, out] input Source tensor. (Written to only when padding != 0) Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
* @param[out] output Destination tensor. Data types supported: Same as @p input.
@@ -58,23 +84,24 @@ public:
*
* @note F16 is supported for pool sizes 2 and 3 only
*
- * @param[in] input Source tensor. (Written to only when padding != 0) Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] output Destination tensor. Data types supported: Same as @p input.
+ * @param[in] input Source tensor info. (Written to only when padding != 0) Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] output Destination tensor info. Data types supported: Same as @p input.
* @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
- * @param[in] indices (optional) The indices of the maximal values. Data type supported: U32.
+ * @param[in] indices (optional) Tensor info of the indices of the maximal values. Data type supported: U32.
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices = nullptr);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *output,
+ const PoolingLayerInfo &pool_info,
+ const ITensorInfo *indices = nullptr);
// Inherited methods overridden:
void run() override;
private:
- NEPoolingLayerKernel _pooling_layer_kernel;
- NEFillBorderKernel _border_handler;
- bool _is_global_pooling_layer;
- DataLayout _data_layout;
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
-}
+} // namespace arm_compute
#endif /* ARM_COMPUTE_NEPOOLINGLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h b/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h
index 242460d3a9..858e3299af 100644
--- a/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,13 +24,13 @@
#ifndef ARM_COMPUTE_NEPRIORBOXLAYER_H
#define ARM_COMPUTE_NEPRIORBOXLAYER_H
-#include "arm_compute/core/NEON/kernels/NEPriorBoxLayerKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
/** Basic function to run @ref NEPriorBoxLayerKernel. */
class NEPriorBoxLayer : public INESimpleFunctionNoBorder
@@ -38,6 +38,15 @@ class NEPriorBoxLayer : public INESimpleFunctionNoBorder
public:
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------|:--------|:--------|
+ * |F32 |F32 |F32 |
+ *
* @param[in] input1 First source tensor. Data types supported: F32. Data layouts supported: NCHW/NHWC.
* @param[in] input2 Second source tensor. Data types and layouts supported: same as @p input1
* @param[out] output Destination tensor. Output dimensions are [W * H * num_priors * 4, 2]. Data type supported: same as @p input
@@ -53,7 +62,10 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const PriorBoxLayerInfo &info);
+ static Status validate(const ITensorInfo *input1,
+ const ITensorInfo *input2,
+ const ITensorInfo *output,
+ const PriorBoxLayerInfo &info);
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEPRIORBOXLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h b/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h
index d1cc962940..009a4e0911 100644
--- a/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020 ARM Limited.
+ * Copyright (c) 2020-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,37 +24,46 @@
#ifndef ARM_COMPUTE_NEQLSTMLAYER_H
#define ARM_COMPUTE_NEQLSTMLAYER_H
-#include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h"
-#include "arm_compute/core/NEON/kernels/NECopyKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h"
-#include "arm_compute/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h"
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/common/LSTMParams.h"
#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
+#include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h"
+#include "arm_compute/runtime/NEON/functions/NECopy.h"
+#include "arm_compute/runtime/NEON/functions/NEDequantizationLayer.h"
#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h"
#include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h"
+#include "arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h"
+#include "arm_compute/runtime/NEON/functions/NEQuantizationLayer.h"
#include "arm_compute/runtime/NEON/functions/NETranspose.h"
-#include "arm_compute/runtime/common/LSTMParams.h"
+#include <memory>
namespace arm_compute
{
// Forward declarations
class ITensor;
-
+class ITensorInfo;
+class NEQLSTMLayerNormalizationKernel;
+namespace cpu
+{
+namespace kernels
+{
+class CpuGemmLowpMatrixAReductionKernel;
+} // namespace kernels
+} // namespace cpu
/** Basic function to run @ref NEQLSTMLayer
*
- * This function calls the following NEON functions/kernels:
+ * This function calls the following kernels:
*
* -# @ref NEActivationLayer Activation functions (tanh and logistic)
- * -# @ref NEArithmeticAdditionKernel Elementwise addition
- * -# @ref NEArithmeticSubtractionKernel Elementwise subtraction
- * -# @ref NECopyKernel Copy kernel for copying output_state_out to output
+ * -# @ref NEArithmeticAddition Elementwise addition
+ * -# @ref NEArithmeticSubtraction Elementwise subtraction
+ * -# @ref NECopy Copy kernel for copying output_state_out to output
* -# @ref NEGEMMLowpMatrixMultiplyCore Quantized matrix multiplication core. Accumulators are 32-bit integers
- * -# @ref NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint Convert 32-bit integers into QSYMM16
- * -# @ref NEGEMMLowpMatrixAReductionKernel For precomputing effective biases to use
- * -# @ref NEPixelWiseMultiplicationKernel Elementwise multiplication
+ * -# @ref NEGEMMLowpOutputStage Convert 32-bit integers into QSYMM16
+ * -# @ref cpu::kernels::CpuGemmLowpMatrixAReductionKernel For precomputing effective biases to use
+ * -# @ref NEPixelWiseMultiplication Elementwise multiplication
* -# @ref NETranspose Transpose function for reshaping the weights
* */
class NEQLSTMLayer : public IFunction
@@ -64,14 +73,24 @@ public:
NEQLSTMLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
/** Prevent instances of this class from being copied (As this class contains pointers) */
NEQLSTMLayer(const NEQLSTMLayer &) = delete;
- /** Default move constructor */
- NEQLSTMLayer(NEQLSTMLayer &&) = default;
+ /** Prevent instances of this class from being moved (As this class contains pointers) */
+ NEQLSTMLayer(NEQLSTMLayer &&) = delete;
/** Prevent instances of this class from being copied (As this class contains pointers) */
NEQLSTMLayer &operator=(const NEQLSTMLayer &) = delete;
- /** Default move assignment operator */
- NEQLSTMLayer &operator=(NEQLSTMLayer &&) = default;
+ /** Prevent instances of this class from being moved (As this class contains pointers) */
+ NEQLSTMLayer &operator=(NEQLSTMLayer &&) = delete;
+ /** Default destructor */
+ ~NEQLSTMLayer();
/** Initialize function's tensors.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 - src6 |src7 -src9 |src10 |src11 |dst0 |dst1 - dst2 |
+ * |:-------------|:------------|:------------|:------|:-------------|:------|:-----------------|
+ * |QASYMM8_SIGNED|QASYMM8 |S32 |QSYMM16|QASYMM8_SIGNED|QSYMM16|QASYMM8_SIGNED |
+ *
* @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: QASYMM8_SIGNED.
* @param[in] input_to_forget_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8.
* @param[in] input_to_cell_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8.
@@ -111,12 +130,21 @@ public:
* projection_threshold (Optional) The clipping threshold for the output from the projection layer, such that values are bound within
* [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.
*/
- void configure(const ITensor *input,
- const ITensor *input_to_forget_weights, const ITensor *input_to_cell_weights, const ITensor *input_to_output_weights,
- const ITensor *recurrent_to_forget_weights, const ITensor *recurrent_to_cell_weights, const ITensor *recurrent_to_output_weights,
- const ITensor *forget_gate_bias, const ITensor *cell_bias, const ITensor *output_gate_bias,
- const ITensor *cell_state_in, const ITensor *output_state_in,
- ITensor *cell_state_out, ITensor *output_state_out, ITensor *output,
+ void configure(const ITensor *input,
+ const ITensor *input_to_forget_weights,
+ const ITensor *input_to_cell_weights,
+ const ITensor *input_to_output_weights,
+ const ITensor *recurrent_to_forget_weights,
+ const ITensor *recurrent_to_cell_weights,
+ const ITensor *recurrent_to_output_weights,
+ const ITensor *forget_gate_bias,
+ const ITensor *cell_bias,
+ const ITensor *output_gate_bias,
+ const ITensor *cell_state_in,
+ ITensor *output_state_in,
+ ITensor *cell_state_out,
+ ITensor *output_state_out,
+ ITensor *output,
const LSTMParams<ITensor> &lstm_params);
/** Static function to check if given info will lead to a valid configuration of @ref NEQLSTMLayer
@@ -161,12 +189,21 @@ public:
* [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.
* @return a status
*/
- static Status validate(const ITensorInfo *input,
- const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights,
- const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights,
- const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias,
- const ITensorInfo *cell_state_in, const ITensorInfo *output_state_in,
- const ITensorInfo *cell_state_out, const ITensorInfo *output_state_out, const ITensorInfo *output,
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *input_to_forget_weights,
+ const ITensorInfo *input_to_cell_weights,
+ const ITensorInfo *input_to_output_weights,
+ const ITensorInfo *recurrent_to_forget_weights,
+ const ITensorInfo *recurrent_to_cell_weights,
+ const ITensorInfo *recurrent_to_output_weights,
+ const ITensorInfo *forget_gate_bias,
+ const ITensorInfo *cell_bias,
+ const ITensorInfo *output_gate_bias,
+ const ITensorInfo *cell_state_in,
+ const ITensorInfo *output_state_in,
+ const ITensorInfo *cell_state_out,
+ const ITensorInfo *output_state_out,
+ const ITensorInfo *output,
const LSTMParams<ITensorInfo> &lstm_params);
// Inherited methods overridden:
@@ -199,24 +236,33 @@ private:
* @param[in] mm_res_info Tensor info to be used to initialize output stage result tensor.
*
*/
- void configure_mm(NEGEMMLowpMatrixMultiplyCore &mm, NEGEMMLowpOutputStage &outstage, GEMMLowpOutputStageInfo &gemmlowp_info,
- const ITensor *mm_input, const ITensor *mm_weights, const ITensor *bias, Tensor *mm_res,
- Tensor *outstage_res, float gemmlowp_scale,
- const TensorInfo &mm_res_info, const TensorInfo &outstage_tensor_info);
+ void configure_mm(NEGEMMLowpMatrixMultiplyCore &mm,
+ NEGEMMLowpOutputStage &outstage,
+ GEMMLowpOutputStageInfo &gemmlowp_info,
+ const ITensor *mm_input,
+ const ITensor *mm_weights,
+ const ITensor *bias,
+ Tensor *mm_res,
+ Tensor *outstage_res,
+ float gemmlowp_scale,
+ const TensorInfo &mm_res_info,
+ const TensorInfo &outstage_tensor_info);
- MemoryGroup _memory_group{};
+ MemoryGroup _memory_group;
/** A small internel kernel do the copy between two tensors */
class TensorCopyKernel
{
static constexpr uint32_t max_dimension_supported = 2;
- ITensor *_src{ nullptr };
- ITensor *_dst{ nullptr };
+ ITensor *_src{nullptr};
+ ITensor *_dst{nullptr};
size_t _row_size{};
Window _window{};
public:
+ /** Destructor */
+ ~TensorCopyKernel();
/** Static function to check if given info will lead to a valid configuration of @ref NEQLSTMLayer::TensorCopyKernel
*
* @param[in] src Source tensor info.
@@ -236,93 +282,96 @@ private:
};
// Functions used
- NETranspose _transpose_input_to_forget_weights{};
- NETranspose _transpose_input_to_cell_weights{};
- NETranspose _transpose_input_to_output_weights{};
- NETranspose _transpose_input_to_input_weights{};
- NETranspose _transpose_recurrent_to_forget_weights{};
- NETranspose _transpose_recurrent_to_cell_weights{};
- NETranspose _transpose_recurrent_to_output_weights{};
- NETranspose _transpose_recurrent_to_input_weights{};
- NETranspose _transpose_projection_weights{};
- NEGEMMLowpMatrixAReductionKernel _input_to_input_reduction{};
- NEGEMMLowpMatrixAReductionKernel _recurrent_to_input_reduction{};
- NEGEMMLowpMatrixAReductionKernel _input_to_forget_reduction{};
- NEGEMMLowpMatrixAReductionKernel _recurrent_to_forget_reduction{};
- NEGEMMLowpMatrixAReductionKernel _input_to_cell_reduction{};
- NEGEMMLowpMatrixAReductionKernel _recurrent_to_cell_reduction{};
- NEGEMMLowpMatrixAReductionKernel _input_to_output_reduction{};
- NEGEMMLowpMatrixAReductionKernel _recurrent_to_output_reduction{};
- NEGEMMLowpMatrixAReductionKernel _projection_reduction{};
- NEArithmeticAdditionKernel _projection_bias_add{};
- NEGEMMLowpMatrixMultiplyCore _mm_input_to_forget{};
- NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_forget{};
- NEPixelWiseMultiplicationKernel _pixelwise_mul_cell_to_forget{};
- NEGEMMLowpOutputStage _input_to_forget_outstage{};
- NEGEMMLowpOutputStage _recurrent_to_forget_outstage{};
- NEGEMMLowpOutputStage _cell_to_forget_outstage{};
- NEArithmeticAdditionKernel _accumulate_input_recurrent_forget{};
- NEArithmeticAdditionKernel _accumulate_cell_forget{};
- NEActivationLayer _forget_gate_sigmoid{};
- NEGEMMLowpMatrixMultiplyCore _mm_input_to_cell{};
- NEGEMMLowpOutputStage _input_to_cell_outstage{};
- NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_cell{};
- NEGEMMLowpOutputStage _recurrent_to_cell_outstage{};
- NEArithmeticAdditionKernel _accumulate_input_recurrent_modulation{};
- NEActivationLayer _cell_gate_tanh{};
- NEArithmeticSubtractionKernel _input_gate_sub{};
- NEGEMMLowpMatrixMultiplyCore _mm_input_to_input{};
- NEGEMMLowpOutputStage _input_to_input_outstage{};
- NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_input{};
- NEGEMMLowpOutputStage _recurrent_to_input_outstage{};
- NEArithmeticAdditionKernel _accumulate_input_recurrent_input{};
- NEPixelWiseMultiplicationKernel _pixelwise_mul_cell_to_input{};
- NEGEMMLowpOutputStage _cell_to_input_outstage{};
- NEArithmeticAdditionKernel _accumulate_cell_input{};
- NEActivationLayer _input_gate_sigmoid{};
- NEPixelWiseMultiplicationKernel _pixelwise_mul_forget_cell{};
- NEPixelWiseMultiplicationKernel _pixelwise_mul_input_cell{};
- NEArithmeticAdditionKernel _add_forget_cell{};
- NEActivationLayer _cell_clip{};
- NEGEMMLowpMatrixMultiplyCore _mm_input_to_output{};
- NEGEMMLowpOutputStage _input_to_output_outstage{};
- NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_output{};
- NEGEMMLowpOutputStage _recurrent_to_output_outstage{};
- NEArithmeticAdditionKernel _accumulate_input_recurrent_output{};
- NEPixelWiseMultiplicationKernel _pixelwise_mul_cell_to_output{};
- NEGEMMLowpOutputStage _cell_to_output_outstage{};
- NEArithmeticAdditionKernel _accumulate_cell_to_output{};
- NEActivationLayer _output_gate_sigmoid{};
- NEActivationLayer _hidden_tanh{};
- NEPixelWiseMultiplicationKernel _pixelwise_mul_hidden{};
- NEGEMMLowpOutputStage _hidden_outstage{};
- NEGEMMLowpMatrixMultiplyCore _mm_projection{};
- NEGEMMLowpOutputStage _projection_outstage{};
- NEArithmeticAdditionKernel _accumulate_projection{};
- NEActivationLayer _projection_clip{};
- TensorCopyKernel _projection_bias_copy{};
- TensorCopyKernel _projection_output_to_accumulate_copy{};
- TensorCopyKernel _projection_accumulate_to_output_copy{};
- TensorCopyKernel _hidden_to_output_copy{};
+ NEDequantizationLayer _dequantize_input_to_forget_weights;
+ NEQuantizationLayer _quantize_input_to_forget_weights;
+ NETranspose _transpose_input_to_forget_weights;
+ NETranspose _transpose_input_to_cell_weights;
+ NETranspose _transpose_input_to_output_weights;
+ NETranspose _transpose_input_to_input_weights;
+ NETranspose _transpose_recurrent_to_forget_weights;
+ NETranspose _transpose_recurrent_to_cell_weights;
+ NETranspose _transpose_recurrent_to_output_weights;
+ NETranspose _transpose_recurrent_to_input_weights;
+ NETranspose _transpose_projection_weights;
+ std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _input_to_input_reduction;
+ std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _recurrent_to_input_reduction;
+ std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _input_to_forget_reduction;
+ std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _recurrent_to_forget_reduction;
+ std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _input_to_cell_reduction;
+ std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _recurrent_to_cell_reduction;
+ std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _input_to_output_reduction;
+ std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _recurrent_to_output_reduction;
+ std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _projection_reduction;
+ NEArithmeticAddition _projection_bias_add;
+ NEGEMMLowpMatrixMultiplyCore _mm_input_to_forget;
+ NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_forget;
+ NEPixelWiseMultiplication _pixelwise_mul_cell_to_forget;
+ NEGEMMLowpOutputStage _input_to_forget_outstage;
+ NEGEMMLowpOutputStage _recurrent_to_forget_outstage;
+ NEGEMMLowpOutputStage _cell_to_forget_outstage;
+ NEArithmeticAddition _accumulate_input_recurrent_forget;
+ NEArithmeticAddition _accumulate_cell_forget;
+ NEActivationLayer _forget_gate_sigmoid;
+ NEGEMMLowpMatrixMultiplyCore _mm_input_to_cell;
+ NEGEMMLowpOutputStage _input_to_cell_outstage;
+ NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_cell;
+ NEGEMMLowpOutputStage _recurrent_to_cell_outstage;
+ NEArithmeticAddition _accumulate_input_recurrent_modulation;
+ NEActivationLayer _cell_gate_tanh;
+ NEArithmeticSubtraction _input_gate_sub;
+ NEGEMMLowpMatrixMultiplyCore _mm_input_to_input;
+ NEGEMMLowpOutputStage _input_to_input_outstage;
+ NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_input;
+ NEGEMMLowpOutputStage _recurrent_to_input_outstage;
+ NEArithmeticAddition _accumulate_input_recurrent_input;
+ NEPixelWiseMultiplication _pixelwise_mul_cell_to_input;
+ NEGEMMLowpOutputStage _cell_to_input_outstage;
+ NEArithmeticAddition _accumulate_cell_input;
+ NEActivationLayer _input_gate_sigmoid;
+ NEPixelWiseMultiplication _pixelwise_mul_forget_cell;
+ NEPixelWiseMultiplication _pixelwise_mul_input_cell;
+ NEArithmeticAddition _add_forget_cell;
+ NEActivationLayer _cell_clip;
+ NEGEMMLowpMatrixMultiplyCore _mm_input_to_output;
+ NEGEMMLowpOutputStage _input_to_output_outstage;
+ NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_output;
+ NEGEMMLowpOutputStage _recurrent_to_output_outstage;
+ NEArithmeticAddition _accumulate_input_recurrent_output;
+ NEPixelWiseMultiplication _pixelwise_mul_cell_to_output;
+ NEGEMMLowpOutputStage _cell_to_output_outstage;
+ NEArithmeticAddition _accumulate_cell_to_output;
+ NEActivationLayer _output_gate_sigmoid;
+ NEActivationLayer _hidden_tanh;
+ NEPixelWiseMultiplication _pixelwise_mul_hidden;
+ NEGEMMLowpOutputStage _hidden_outstage;
+ NEGEMMLowpMatrixMultiplyCore _mm_projection;
+ NEGEMMLowpOutputStage _projection_outstage;
+ NEArithmeticAddition _accumulate_projection;
+ NEActivationLayer _projection_clip;
+
+ TensorCopyKernel _projection_bias_copy;
+ TensorCopyKernel _projection_output_to_accumulate_copy;
+ TensorCopyKernel _projection_accumulate_to_output_copy;
+ TensorCopyKernel _hidden_to_output_copy;
- std::array<NEQLSTMLayerNormalizationKernel, _layer_norm_count> _layer_norms{ {} };
+ std::array<std::unique_ptr<NEQLSTMLayerNormalizationKernel>, _layer_norm_count> _layer_norms;
- NECopyKernel _copy_output{};
+ NECopy _copy_output;
// Tensor pointers
- const ITensor *_input_to_input_weights{ nullptr };
- const ITensor *_recurrent_to_input_weights{ nullptr };
- const ITensor *_projection_bias{ nullptr };
- const ITensor *_input_to_forget_weights{ nullptr };
- const ITensor *_input_to_cell_weights{ nullptr };
- const ITensor *_input_to_output_weights{ nullptr };
- const ITensor *_recurrent_to_forget_weights{ nullptr };
- const ITensor *_recurrent_to_cell_weights{ nullptr };
- const ITensor *_recurrent_to_output_weights{ nullptr };
- const ITensor *_projection_weights{ nullptr };
- std::array<const ITensor *, _layer_norm_count> _layer_norm_weights{ {} };
- std::array<const ITensor *, _layer_norm_count> _layer_norm_bias{ {} };
+ const ITensor *_input_to_input_weights{nullptr};
+ const ITensor *_recurrent_to_input_weights{nullptr};
+ const ITensor *_projection_bias{nullptr};
+ const ITensor *_input_to_forget_weights{nullptr};
+ const ITensor *_input_to_cell_weights{nullptr};
+ const ITensor *_input_to_output_weights{nullptr};
+ const ITensor *_recurrent_to_forget_weights{nullptr};
+ const ITensor *_recurrent_to_cell_weights{nullptr};
+ const ITensor *_recurrent_to_output_weights{nullptr};
+ const ITensor *_projection_weights{nullptr};
+ std::array<const ITensor *, _layer_norm_count> _layer_norm_weights{};
+ std::array<const ITensor *, _layer_norm_count> _layer_norm_bias{};
using LayerNormIndexType = typename std::underlying_type<LayerNormGate>::type;
inline LayerNormIndexType getGateIndex(LayerNormGate g)
@@ -350,99 +399,87 @@ private:
return _layer_norm_bias[getGateIndex(g)];
}
- inline NEQLSTMLayerNormalizationKernel &get_layer_norm(LayerNormGate g)
+ inline std::unique_ptr<NEQLSTMLayerNormalizationKernel> &get_layer_norm(LayerNormGate g)
{
return _layer_norms[getGateIndex(g)];
}
- inline void configure_layer_norm(LayerNormGate g, const ITensor *in)
- {
- ARM_COMPUTE_ERROR_ON(!_has_layer_norm);
-
- Tensor &out = get_layer_norm_output(g);
- _memory_group.manage(&out);
- out.allocator()->init(*(in->info()));
-
- get_layer_norm(g).configure(in, &out, get_layer_norm_weight(g), get_layer_norm_bias(g));
- }
-
- inline static Status validate_layer_norm(const ITensorInfo &in, const ITensorInfo &weight, const ITensorInfo &bias)
- {
- // Output quantization scale will be different, but ignored here
- // since it will be configured at configure() stage.
- const TensorInfo out{ in };
- return NEQLSTMLayerNormalizationKernel::validate(&in, &out, &weight, &bias);
- }
+ void configure_layer_norm(LayerNormGate g, const ITensor *in);
+ static Status validate_layer_norm(const ITensorInfo &in, const ITensorInfo &weight, const ITensorInfo &bias);
// Temporary tensors
- Tensor _input_to_forget_weights_transposed{ nullptr };
- Tensor _input_to_cell_weights_transposed{ nullptr };
- Tensor _input_to_output_weights_transposed{ nullptr };
- Tensor _input_to_input_weights_transposed{ nullptr };
- Tensor _recurrent_to_forget_weights_transposed{ nullptr };
- Tensor _recurrent_to_cell_weights_transposed{ nullptr };
- Tensor _recurrent_to_output_weights_transposed{ nullptr };
- Tensor _recurrent_to_input_weights_transposed{ nullptr };
- Tensor _projection_weights_transposed{ nullptr };
- Tensor _input_to_input_eff_bias{ nullptr };
- Tensor _recurrent_to_input_eff_bias{ nullptr };
- Tensor _input_to_forget_eff_bias{ nullptr };
- Tensor _recurrent_to_forget_eff_bias{ nullptr };
- Tensor _input_to_cell_eff_bias{ nullptr };
- Tensor _recurrent_to_cell_eff_bias{ nullptr };
- Tensor _input_to_output_eff_bias{ nullptr };
- Tensor _recurrent_to_output_eff_bias{ nullptr };
- Tensor _projection_reduction_res{ nullptr };
- Tensor _projection_eff_bias{ nullptr };
- Tensor _mm_input_to_forget_res{ nullptr };
- Tensor _mm_recurrent_to_forget_res{ nullptr };
- Tensor _mul_cell_to_forget_res{ nullptr };
- Tensor _input_to_forget_outstage_res{ nullptr };
- Tensor _cell_to_forget_outstage_res{ nullptr };
- Tensor _recurrent_to_forget_outstage_res{ nullptr };
- Tensor _forget_gate{ nullptr };
- Tensor _mm_input_to_cell_res{ nullptr };
- Tensor _input_to_cell_outstage_res{ nullptr };
- Tensor _mm_recurrent_to_cell_res{ nullptr };
- Tensor _recurrent_to_cell_outstage_res{ nullptr };
- Tensor _cell_gate{ nullptr };
- Tensor _mul_input_cell_res{ nullptr };
- Tensor _mm_input_to_input_res{ nullptr };
- Tensor _input_to_input_outstage_res{ nullptr };
- Tensor _mm_recurrent_to_input_res{ nullptr };
- Tensor _mul_cell_to_input_res{ nullptr };
- Tensor _cell_to_input_outstage_res{ nullptr };
- Tensor _recurrent_to_input_outstage_res{ nullptr };
- Tensor _input_gate{ nullptr };
- Tensor _mm_input_to_output_res{ nullptr };
- Tensor _input_to_output_outstage_res{ nullptr };
- Tensor _mm_recurrent_to_output_res{ nullptr };
- Tensor _mul_cell_to_output_res{ nullptr };
- Tensor _cell_to_output_outstage_res{ nullptr };
- Tensor _recurrent_to_output_outstage_res{ nullptr };
- Tensor _output_gate{ nullptr };
- Tensor _hidden_mul_res{ nullptr };
- Tensor _hidden_gate{ nullptr };
- Tensor _mm_projection_res{ nullptr };
- Tensor _projection_outstage_res{ nullptr };
- Tensor _projection_out_res{ nullptr };
- Tensor _projection_accumulate_res{ nullptr };
- Tensor _ones{ nullptr };
- std::array<Tensor, _layer_norm_count> _layer_norm_output{ {} };
+ Tensor _input_to_forget_weights_f32{nullptr};
+ Tensor _input_to_forget_weights_symm8{nullptr};
+
+ Tensor _input_to_forget_weights_transposed{nullptr};
+ Tensor _input_to_cell_weights_transposed{nullptr};
+ Tensor _input_to_output_weights_transposed{nullptr};
+ Tensor _input_to_input_weights_transposed{nullptr};
+ Tensor _recurrent_to_forget_weights_transposed{nullptr};
+ Tensor _recurrent_to_cell_weights_transposed{nullptr};
+ Tensor _recurrent_to_output_weights_transposed{nullptr};
+ Tensor _recurrent_to_input_weights_transposed{nullptr};
+ Tensor _projection_weights_transposed{nullptr};
+ Tensor _input_to_input_eff_bias{nullptr};
+ Tensor _recurrent_to_input_eff_bias{nullptr};
+ Tensor _input_to_forget_eff_bias{nullptr};
+ Tensor _recurrent_to_forget_eff_bias{nullptr};
+ Tensor _input_to_cell_eff_bias{nullptr};
+ Tensor _recurrent_to_cell_eff_bias{nullptr};
+ Tensor _input_to_output_eff_bias{nullptr};
+ Tensor _recurrent_to_output_eff_bias{nullptr};
+ Tensor _projection_reduction_res{nullptr};
+ Tensor _projection_eff_bias{nullptr};
+ Tensor _mm_input_to_forget_res{nullptr};
+ Tensor _mm_recurrent_to_forget_res{nullptr};
+ Tensor _mul_cell_to_forget_res{nullptr};
+ Tensor _input_to_forget_outstage_res{nullptr};
+ Tensor _cell_to_forget_outstage_res{nullptr};
+ Tensor _recurrent_to_forget_outstage_res{nullptr};
+ Tensor _forget_gate{nullptr};
+ Tensor _mm_input_to_cell_res{nullptr};
+ Tensor _input_to_cell_outstage_res{nullptr};
+ Tensor _mm_recurrent_to_cell_res{nullptr};
+ Tensor _recurrent_to_cell_outstage_res{nullptr};
+ Tensor _cell_gate{nullptr};
+ Tensor _mul_input_cell_res{nullptr};
+ Tensor _mm_input_to_input_res{nullptr};
+ Tensor _input_to_input_outstage_res{nullptr};
+ Tensor _mm_recurrent_to_input_res{nullptr};
+ Tensor _mul_cell_to_input_res{nullptr};
+ Tensor _cell_to_input_outstage_res{nullptr};
+ Tensor _recurrent_to_input_outstage_res{nullptr};
+ Tensor _input_gate{nullptr};
+ Tensor _mm_input_to_output_res{nullptr};
+ Tensor _input_to_output_outstage_res{nullptr};
+ Tensor _mm_recurrent_to_output_res{nullptr};
+ Tensor _mul_cell_to_output_res{nullptr};
+ Tensor _cell_to_output_outstage_res{nullptr};
+ Tensor _recurrent_to_output_outstage_res{nullptr};
+ Tensor _output_gate{nullptr};
+ Tensor _hidden_mul_res{nullptr};
+ Tensor _hidden_gate{nullptr};
+ Tensor _mm_projection_res{nullptr};
+ Tensor _projection_outstage_res{nullptr};
+ Tensor _projection_out_res{nullptr};
+ Tensor _projection_accumulate_res{nullptr};
+ Tensor _ones{nullptr};
+ std::array<Tensor, _layer_norm_count> _layer_norm_output{};
inline Tensor &get_layer_norm_output(LayerNormGate g)
{
return _layer_norm_output[getGateIndex(g)];
}
- bool _is_prepared{ false };
- bool _has_cifg{ false };
- bool _has_cell_clipping{ false };
- bool _has_projection{ false };
- bool _has_projection_clipping{ false };
- bool _has_peephole{ false };
- bool _has_layer_norm{ false };
- bool _projection_tensor_copy_required{ false };
+ bool _is_prepared{false};
+ bool _has_cifg{false};
+ bool _has_cell_clipping{false};
+ bool _has_projection{false};
+ bool _has_projection_clipping{false};
+ bool _has_peephole{false};
+ bool _has_layer_norm{false};
+ bool _projection_tensor_copy_required{false};
+ bool _convert_input_to_forget_weights_to_qsymm8{false};
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEQLSTMLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h b/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h
index fc317be81e..7bf97e28a5 100644
--- a/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 ARM Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,30 +24,45 @@
#ifndef ARM_COMPUTE_NEQUANTIZATIONLAYER_H
#define ARM_COMPUTE_NEQUANTIZATIONLAYER_H
+#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IRuntimeContext.h"
-#include "arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h"
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
-#include "arm_compute/core/Types.h"
+#include <memory>
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
-/** Basic function to simulate a quantization layer. This function calls the following NEON kernels:
- *
- *
- * -# @ref NEQuantizationLayerKernel
- *
- */
-class NEQuantizationLayer : public INESimpleFunctionNoBorder
+/** Basic function to run a quantization layer using @ref cpu::CpuQuantize */
+class NEQuantizationLayer : public IFunction
{
public:
- /** Default constructor */
- NEQuantizationLayer() = default;
+ NEQuantizationLayer();
+ /** Default Destructor */
+ ~NEQuantizationLayer();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEQuantizationLayer(const NEQuantizationLayer &) = delete;
+ /** Default move constructor */
+ NEQuantizationLayer(NEQuantizationLayer &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEQuantizationLayer &operator=(const NEQuantizationLayer &) = delete;
+ /** Default move assignment operator */
+ NEQuantizationLayer &operator=(NEQuantizationLayer &&) = default;
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:------------------|:--------------------------------------|
+ * |QASYMM8 |QASYMM8, QASYMM8_SIGNED, QASYMM16 |
+ * |QASYMM8_SIGNED |QASYMM8, QASYMM8_SIGNED, QASYMM16 |
+ * |F16 |QASYMM8, QASYMM8_SIGNED, QASYMM16 |
+ * |F32 |QASYMM8, QASYMM8_SIGNED, QASYMM16 |
+ *
* @param[in] input Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
* @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16
*/
@@ -60,6 +75,13 @@ public:
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEQUANTIZATIONLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NERNNLayer.h b/arm_compute/runtime/NEON/functions/NERNNLayer.h
index 0bfb905e19..af7f464ac9 100644
--- a/arm_compute/runtime/NEON/functions/NERNNLayer.h
+++ b/arm_compute/runtime/NEON/functions/NERNNLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,11 +24,10 @@
#ifndef ARM_COMPUTE_NERNNLAYER_H
#define ARM_COMPUTE_NERNNLAYER_H
-#include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h"
-#include "arm_compute/core/NEON/kernels/NECopyKernel.h"
-
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
+#include "arm_compute/runtime/NEON/functions/NECopy.h"
#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h"
#include "arm_compute/runtime/NEON/functions/NEGEMM.h"
@@ -45,14 +44,26 @@ public:
NERNNLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
/** Prevent instances of this class from being copied (As this class contains pointers) */
NERNNLayer(const NERNNLayer &) = delete;
- /** Default move constructor */
- NERNNLayer(NERNNLayer &&) = default;
+ /** Prevent instances of this class from being moved (As this class contains pointers) */
+ NERNNLayer(NERNNLayer &&) = delete;
/** Prevent instances of this class from being copied (As this class contains pointers) */
NERNNLayer &operator=(const NERNNLayer &) = delete;
- /** Default move assignment operator */
- NERNNLayer &operator=(NERNNLayer &&) = default;
+ /** Prevent instances of this class from being moved (As this class contains pointers) */
+ NERNNLayer &operator=(NERNNLayer &&) = delete;
+ /** Default destructor */
+ ~NERNNLayer();
/** Initialize the function
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |src3 |dst0 |dst1 |
+ * |:------|:------|:------|:------|:------|:------|
+ * |F16 |F16 |F16 |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |F32 |F32 |F32 |
+ *
* @param[in] input Input is a 2-D tensor of shape [input_size, batch_size]. Data types supported: F16/F32
* @param[in] weights Weights tensor of shape [input_size, num_units] that multiplies the input. Data types supported: Same as @p input
* @param[in] recurrent_weights Weights tensor of shape [num_units, num_units] that multiplies the current 'state'. Data types supported: Same as @p input
@@ -61,7 +72,13 @@ public:
* @param[in,out] hidden_state Output tensor of shape [num_units, batch_size]. Data types supported: Same as @p input
* @param[in] info Activation layer parameter.
*/
- void configure(const ITensor *input, const ITensor *weights, const ITensor *recurrent_weights, const ITensor *bias, ITensor *hidden_state, ITensor *output, ActivationLayerInfo &info);
+ void configure(const ITensor *input,
+ const ITensor *weights,
+ const ITensor *recurrent_weights,
+ const ITensor *bias,
+ ITensor *hidden_state,
+ ITensor *output,
+ ActivationLayerInfo &info);
/** Initialize the function
*
* @param[in] input Input is a 2-D tensor of shape [input_size, batch_size]. Data types supported: F16/F32
@@ -74,7 +91,12 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *recurrent_weights, const ITensorInfo *bias, const ITensorInfo *hidden_state, const ITensorInfo *output,
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *recurrent_weights,
+ const ITensorInfo *bias,
+ const ITensorInfo *hidden_state,
+ const ITensorInfo *output,
const ActivationLayerInfo &info);
// Inherited methods overridden:
@@ -82,16 +104,16 @@ public:
void prepare() override;
private:
- MemoryGroup _memory_group;
- NEGEMM _gemm_state_f;
- NEArithmeticAdditionKernel _add_kernel;
- NEActivationLayerKernel _activation_kernel;
- NEFullyConnectedLayer _fully_connected;
- NECopyKernel _copy_kernel;
- Tensor _fully_connected_out;
- Tensor _gemm_output;
- Tensor _add_output;
- bool _is_prepared;
+ MemoryGroup _memory_group;
+ NEGEMM _gemm_state_f;
+ NEArithmeticAddition _add_f;
+ NEActivationLayer _activation;
+ NEFullyConnectedLayer _fully_connected;
+ NECopy _copy_f;
+ Tensor _fully_connected_out;
+ Tensor _gemm_output;
+ Tensor _add_output;
+ bool _is_prepared;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NERNNLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEROIAlignLayer.h b/arm_compute/runtime/NEON/functions/NEROIAlignLayer.h
index 04a24ac7ec..b06ebe899d 100644
--- a/arm_compute/runtime/NEON/functions/NEROIAlignLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEROIAlignLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 ARM Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,27 +24,35 @@
#ifndef ARM_COMPUTE_NEROIALIGNLAYER_H
#define ARM_COMPUTE_NEROIALIGNLAYER_H
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
-/** Basic function to run @ref NEROIAlignLayerKernel.
- *
- * This function calls the following NEON kernels:
- * -# @ref NEROIAlignLayerKernel
- *
- */
-class NEROIAlignLayer : public INESimpleFunction
+/** Basic function to run @ref NEROIAlignLayerKernel. */
+class NEROIAlignLayer : public INESimpleFunctionNoBorder
{
public:
/** Set the input and output tensors.
*
- * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32.
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:--------------|:--------------|
+ * |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |
+ * |QASYMM8 |QASYMM16 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM16 |QASYMM8_SIGNED |
+ *
+ * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
* @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner
* as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ].
- * Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8, otherwise same as @p input
+ * Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8/QASYMM8_SIGNED, otherwise same as @p input
* @param[out] output Destination tensor. Data types supported: Same as @p input.
* @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo.
*
@@ -56,8 +64,8 @@ public:
void configure(const ITensor *input, const ITensor *rois, ITensor *output, const ROIPoolingLayerInfo &pool_info);
/** Static function to check if given info will lead to a valid configuration of @ref NEROIAlignLayerKernel
*
- * @param[in] input Source tensor info. Data types supported: QASYMM8/F16/F32.
- * @param[in] rois ROIs tensor info. Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8,
+ * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] rois ROIs tensor info. Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8/QASYMM8_SIGNED,
* otherwise same as @p input
* @param[in] output Destination tensor info. Data types supported: Same as @p input.
* @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo.
@@ -69,7 +77,10 @@ public:
*
* @return a Status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *rois, ITensorInfo *output, const ROIPoolingLayerInfo &pool_info);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *rois,
+ ITensorInfo *output,
+ const ROIPoolingLayerInfo &pool_info);
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEROIALIGNLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h b/arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h
index 887b5712da..929111ad4b 100644
--- a/arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,29 +24,46 @@
#ifndef ARM_COMPUTE_NEROIPOOLINGLAYER_H
#define ARM_COMPUTE_NEROIPOOLINGLAYER_H
+#include "arm_compute/core/IArray.h"
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/IArray.h"
-#include "arm_compute/core/NEON/kernels/NEROIPoolingLayerKernel.h"
+#include <memory>
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
+class NEROIPoolingLayerKernel;
+class ROIPoolingLayerInfo;
-/** Basic function to run @ref NEROIPoolingLayerKernel.
- *
- * This function calls the following NEON kernels:
- * -# @ref NEROIPoolingLayerKernel
- *
- */
+/** Basic function to run @ref NEROIPoolingLayerKernel. */
class NEROIPoolingLayer : public IFunction
{
public:
/** Constructor */
NEROIPoolingLayer();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEROIPoolingLayer(const NEROIPoolingLayer &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEROIPoolingLayer &operator=(const NEROIPoolingLayer &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEROIPoolingLayer(NEROIPoolingLayer &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEROIPoolingLayer &operator=(NEROIPoolingLayer &&) = delete;
+ /** Default destructor */
+ ~NEROIPoolingLayer();
/** Set the input and output tensors.
*
- * @param[in] input Source tensor. Data types supported: F32.
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:--------------|:--------------|
+ * |F32 |U16 |F32 |
+ * |QASYMM8 |U16 |QASYMM8 |
+ *
+ * @param[in] input Source tensor. Data types supported: QASYMM8/F32
* @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner
* as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. Data types supported: U16
* @param[out] output Destination tensor. Data types supported: Same as @p input.
@@ -57,13 +74,32 @@ public:
* @note The z dimensions of @p output tensor and @p input tensor must be the same.
* @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
*/
- void configure(const ITensor *input, const ITensor *rois, ITensor *output, const ROIPoolingLayerInfo &pool_info);
+ void
+ configure(const ITensor *input, const ITensor *rois, const ITensor *output, const ROIPoolingLayerInfo &pool_info);
// Inherited methods overridden:
void run() override;
+ /** Static function to check if given info will lead to a valid configuration of @ref NEROIPoolingLayerKernel
+ *
+ * @param[in] input Source tensor info. Data types supported: QASYMM8/F32.
+ * @param[in] rois TensorInfo for rois tensor which is a 2D tensor of size [5,N] (where 5 is the number ROIs). Data types supported: U16
+ * @param[in] output Destination tensor info. Data types supported: Same as @p input.
+ * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo.
+ *
+ * @note The x and y dimensions of @p output tensor must be the same as that specified by @p pool_info 's pooled
+ * width and pooled height.
+ * @note The z dimensions of @p output tensor and @p input tensor must be the same.
+ * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
+ * @return a Status
+ */
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *rois,
+ const ITensorInfo *output,
+ const ROIPoolingLayerInfo &pool_info);
+
private:
- NEROIPoolingLayerKernel _roi_kernel;
+ std::unique_ptr<NEROIPoolingLayerKernel> _roi_kernel;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEROIPOOLINGLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NERange.h b/arm_compute/runtime/NEON/functions/NERange.h
index 83ca625aa7..609456a4ef 100644
--- a/arm_compute/runtime/NEON/functions/NERange.h
+++ b/arm_compute/runtime/NEON/functions/NERange.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,26 +24,55 @@
#ifndef ARM_COMPUTE_NERANGE_H
#define ARM_COMPUTE_NERANGE_H
-#include "arm_compute/core/NEON/kernels/NERangeKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
+#include <memory>
+
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
+class NERangeKernel;
/** Basic function to run @ref NERangeKernel
*
* @note The tensor data type for the output must be U8/S8/U16/S16/U32/S32/F16/F32.
* @note The function performs generates a sequence with the given start, end and step.
+ *
*/
class NERange : public IFunction
{
public:
/** Default constructor */
NERange();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NERange(const NERange &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NERange &operator=(const NERange &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NERange(NERange &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NERange &operator=(NERange &&) = delete;
+ /** Default destructor */
+ ~NERange();
/** Initialize the kernel's start, end, step and output tensor.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |dst |
+ * |:---------|
+ * |U8 |
+ * |S8 |
+ * |U16 |
+ * |S16 |
+ * |U32 |
+ * |S32 |
+ * |F16 |
+ * |F32 |
+ *
* @param[out] output Output tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
* @param[in] start The starting value of the sequence.
* @param[in] end The ending (not including) value of the sequence.
@@ -65,7 +94,7 @@ public:
void run() override;
private:
- NERangeKernel _kernel;
+ std::unique_ptr<NERangeKernel> _kernel;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NERANGE_H */
diff --git a/arm_compute/runtime/NEON/functions/NEReduceMean.h b/arm_compute/runtime/NEON/functions/NEReduceMean.h
index 3c7cc21929..5b8d8cdf2b 100644
--- a/arm_compute/runtime/NEON/functions/NEReduceMean.h
+++ b/arm_compute/runtime/NEON/functions/NEReduceMean.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 ARM Limited.
+ * Copyright (c) 2018-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,10 +24,8 @@
#ifndef ARM_COMPUTE_NEON_REDUCE_MEAN_H
#define ARM_COMPUTE_NEON_REDUCE_MEAN_H
-#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/NEON/functions/NEReductionOperation.h"
#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h"
@@ -41,8 +39,29 @@ class NEReduceMean : public IFunction
public:
/** Constructor */
NEReduceMean(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEReduceMean(const NEReduceMean &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEReduceMean &operator=(const NEReduceMean &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEReduceMean(NEReduceMean &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEReduceMean &operator=(NEReduceMean &&) = delete;
+ /** Default destructor */
+ ~NEReduceMean();
/** Configure kernel
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |F16 |F16 |
+ * |F32 |F32 |
+ *
* @note Supported tensor rank: up to 4
*
* @param[in] input Source tensor. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32
@@ -61,7 +80,8 @@ public:
*
* @return A status
*/
- static Status validate(const ITensorInfo *input, const Coordinates &reduction_axis, bool keep_dims, const ITensorInfo *output);
+ static Status
+ validate(const ITensorInfo *input, const Coordinates &reduction_axis, bool keep_dims, const ITensorInfo *output);
// Inherited methods overridden:
void run() override;
diff --git a/arm_compute/runtime/NEON/functions/NEReductionOperation.h b/arm_compute/runtime/NEON/functions/NEReductionOperation.h
index abda4159ba..f5391a6d0e 100644
--- a/arm_compute/runtime/NEON/functions/NEReductionOperation.h
+++ b/arm_compute/runtime/NEON/functions/NEReductionOperation.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 ARM Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,20 +25,19 @@
#define ARM_COMPUTE_NEREDUCTIONOPERATION_H
#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NEReductionOperationKernel.h"
-#include "arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h"
-#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h"
#include "arm_compute/runtime/Tensor.h"
+#include <memory>
+
namespace arm_compute
{
class ITensor;
+class NEReductionOperationKernel;
-/** Basic function to simulate a reduction operation. This function calls the following NEON kernels:
+/** Basic function to simulate a reduction operation. This function calls the following kernels:
*
- * -# @ref NEFillBorderKernel
+ * -# @ref NEReshapeLayer
* -# @ref NEReductionOperationKernel
*
*/
@@ -47,19 +46,41 @@ class NEReductionOperation : public IFunction
public:
/** Default constructor */
NEReductionOperation(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEReductionOperation(const NEReductionOperation &) = delete;
+ /** Default move constructor */
+ NEReductionOperation(NEReductionOperation &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEReductionOperation &operator=(const NEReductionOperation &) = delete;
+ /** Default move assignment operator */
+ NEReductionOperation &operator=(NEReductionOperation &&) = default;
+ /** Default destructor */
+ ~NEReductionOperation();
/** Set the input and output tensors.
*
- * @param[in] input Source tensor. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32. Data layouts supported: NCHW. (Written to only for border_size != 0)
- * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input.
- * @param[in] axis Dimension along which to reduce. Supported reduction axis : 0
- * @param[in] op Reduction operation to perform.
- * @param[in] keep_dims (Optional) Whether to keep the reduced dimension after the operation. Defaults to true.
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |F16 |F16 |
+ * |F32 |F32 |
+ * |S32 |S32 |
+ *
+ * @param[in, out] input Source tensor. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32/S32. (Written to only for border_size != 0)
+ * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input.
+ * @param[in] axis Dimension along which to reduce. Supported reduction axis : 0
+ * @param[in] op Reduction operation to perform.
+ * @param[in] keep_dims (Optional) Whether to keep the reduced dimension after the operation. Defaults to true.
*/
void configure(ITensor *input, ITensor *output, unsigned int axis, ReductionOperation op, bool keep_dims = true);
/** Static function to check if given info will lead to a valid configuration of @ref NEReductionOperation.
*
- * @param[in] input Source tensor info. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32. Data layouts supported: NCHW. (Written to only for border_size != 0)
+ * @param[in] input Source tensor info. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32/S32.
* @param[in] output Destination tensor info. Data types and data layouts supported: same as @p input.
* @param[in] axis Dimension along which to reduce. Supported reduction axis : 0
* @param[in] op Reduction operation to perform.
@@ -67,20 +88,23 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op, bool keep_dims = true);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *output,
+ unsigned int axis,
+ ReductionOperation op,
+ bool keep_dims = true);
// Inherited methods overridden:
void run() override;
private:
- MemoryGroup _memory_group;
- NEReductionOperationKernel _reduction_kernel;
- NEFillBorderKernel _fill_border_kernel;
- NEReshapeLayerKernel _reshape_kernel;
- Tensor _output_internal;
- size_t _window_split;
- int _reduction_axis;
- bool _is_reshape_required;
+ MemoryGroup _memory_group;
+ std::unique_ptr<NEReductionOperationKernel> _reduction_kernel;
+ NEReshapeLayer _reshape;
+ Tensor _output_internal;
+ size_t _window_split;
+ int _reduction_axis;
+ bool _is_reshape_required;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEREDUCTIONOPERATION_H */
diff --git a/arm_compute/runtime/NEON/functions/NERemap.h b/arm_compute/runtime/NEON/functions/NERemap.h
deleted file mode 100644
index 05a7a8ffd6..0000000000
--- a/arm_compute/runtime/NEON/functions/NERemap.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEREMAP_H
-#define ARM_COMPUTE_NEREMAP_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
-#include "arm_compute/runtime/Tensor.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to execute remap. This function calls the following NEON kernels:
- *
- * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref NERemapKernel
- */
-class NERemap : public INESimpleFunction
-{
-public:
- /** Initialise the function's sources, destination, interpolation policy and border mode.
- *
- * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[in] map_x Map for X coordinates. Data type supported: F32.
- * @param[in] map_y Map for Y coordinates. Data type supported: F32.
- * @param[out] output Output tensor. Data type supported: U8.
- * @param[in] policy Interpolation policy to use. Only NEAREST and BILINEAR are supported.
- * @param[in] border_mode Border mode to use on the input tensor.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- *
- */
- void configure(ITensor *input, const ITensor *map_x, const ITensor *map_y, ITensor *output,
- InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0);
-};
-}
-#endif /*ARM_COMPUTE_NEREMAP_H */
diff --git a/arm_compute/runtime/NEON/functions/NEReorderLayer.h b/arm_compute/runtime/NEON/functions/NEReorderLayer.h
new file mode 100644
index 0000000000..e3fa7b9c16
--- /dev/null
+++ b/arm_compute/runtime/NEON/functions/NEReorderLayer.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#if defined(__aarch64__)
+
+#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEREORDERLAYER
+#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEREORDERLAYER
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
+
+namespace arm_compute
+{
+class ITensor;
+class ITensorInfo;
+class NEReorderKernel;
+/** Function to compute blocked reorder. */
+class NEReorderLayer : public IFunction
+{
+public:
+ /** Default constructor */
+ NEReorderLayer();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEReorderLayer(const NEReorderLayer &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEReorderLayer &operator=(const NEReorderLayer &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEReorderLayer(NEReorderLayer &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEReorderLayer &operator=(NEReorderLayer &&) = delete;
+ /** Default destructor */
+ ~NEReorderLayer();
+ /** Set the input and output tensors.
+ *
+ * Valid data layouts:
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------|:---------|
+ * |F32 |F32 |
+ *
+ * @param[in] input Source tensor. Data type supported: F32. Data layouts supported: NCHW.
+ * @param[out] output Destination with the same dimensions, data type, data layout as @p input
+ * except last dimension of data layout which needs to be multiple of blocking parameter ksize
+ * @param[in] input_wf WeightFormat of input.
+ * @param[in] output_wf WeightFormat of output.
+ */
+ void configure(const ITensor *input,
+ ITensor *output,
+ arm_compute::WeightFormat input_wf,
+ arm_compute::WeightFormat output_wf);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref NEReorderLayer
+ *
+ * Similar to @ref NEReorderLayer::configure()
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *output,
+ arm_compute::WeightFormat input_wf,
+ arm_compute::WeightFormat output_wf);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ std::unique_ptr<NEReorderKernel> _reorder_kernel; /**< Reorder layer kernel */
+};
+} // namespace arm_compute
+#endif /* ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEREORDERLAYER */
+
+#endif // defined(__aarch64__)
diff --git a/arm_compute/runtime/NEON/functions/NEReorgLayer.h b/arm_compute/runtime/NEON/functions/NEReorgLayer.h
index 8ef7f8a1b2..0a7d824d10 100644
--- a/arm_compute/runtime/NEON/functions/NEReorgLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEReorgLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -31,6 +31,7 @@ namespace arm_compute
{
// Forward declarations
class ITensor;
+class ITensorInfo;
/** Basic function to run @ref NEReorgLayerKernel */
class NEReorgLayer : public INESimpleFunctionNoBorder
@@ -38,6 +39,15 @@ class NEReorgLayer : public INESimpleFunctionNoBorder
public:
/** Initialise the kernel's inputs and outputs
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |All |All |
+ *
* @param[in] input First tensor input. Data type supported: All
* @param[out] output Output tensor. Data type supported: Same as @p input
* @param[in] stride Stride to be used during data re-organization
diff --git a/arm_compute/runtime/NEON/functions/NEReshapeLayer.h b/arm_compute/runtime/NEON/functions/NEReshapeLayer.h
index d6643842d9..3e6e33f797 100644
--- a/arm_compute/runtime/NEON/functions/NEReshapeLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEReshapeLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,32 +25,61 @@
#define ARM_COMPUTE_NERESHAPELAYER_H
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/NEON/INEOperator.h"
+#include "arm_compute/runtime/Types.h"
namespace arm_compute
{
// Forward declarations
class ITensor;
-/** Basic function to run @ref NEReshapeLayerKernel */
-class NEReshapeLayer : public INESimpleFunctionNoBorder
+/** Basic function to run @ref cpu::kernels::CpuReshapeKernel */
+class NEReshapeLayer : public IFunction
{
public:
+ /** Default Constructor */
+ NEReshapeLayer();
+ /** Default Destructor */
+ ~NEReshapeLayer();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEReshapeLayer(const NEReshapeLayer &) = delete;
+ /** Default move constructor */
+ NEReshapeLayer(NEReshapeLayer &&);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEReshapeLayer &operator=(const NEReshapeLayer &) = delete;
+ /** Default move assignment operator */
+ NEReshapeLayer &operator=(NEReshapeLayer &&);
/** Initialise the kernel's inputs and outputs
*
- * @param[in] input First tensor input. Data type supported: All
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:------|:------|
+ * |All |All |
+ *
+ * @param[in] input Input tensor. Data type supported: All
* @param[out] output Output tensor. Data type supported: Same as @p input
*/
void configure(const ITensor *input, ITensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref NEReshapeLayer
*
- * @param[in] input First tensor info. Data type supported: All
+ * @param[in] input Input tensor info. Data type supported: All
* @param[in] output Output tensor info. Data type supported: Same as @p input
*
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_NERESHAPELAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEReverse.h b/arm_compute/runtime/NEON/functions/NEReverse.h
index ab5a5d0869..e03e415068 100644
--- a/arm_compute/runtime/NEON/functions/NEReverse.h
+++ b/arm_compute/runtime/NEON/functions/NEReverse.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,16 +21,16 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_NEREVERSE_H
-#define ARM_COMPUTE_NEREVERSE_H
-
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEREVERSE_H
+#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEREVERSE_H
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
/** Basic function to run @ref NEReverseKernel */
class NEReverse : public INESimpleFunctionNoBorder
@@ -38,20 +38,39 @@ class NEReverse : public INESimpleFunctionNoBorder
public:
/** Initialize the function
*
- * @param[in] input Input tensor. Data types supported: All
- * @param[out] output Output tensor. Data type supported: Same as @p input
- * @param[in] axis Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:--------------|:--------------|
+ * |All |U32, S32 |All |
+ *
+ * @param[in] input Input tensor. Data types supported: All
+ * @param[out] output Output tensor. Data type supported: Same as @p input
+ * @param[in] axis Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32/S32
+ * @param[in] use_inverted_axis Reverse ACL axis indices convention, if true, (inverted)axis = (tensor_rank - 1) - axis
+ *
+ * @note The value of each axis should be between [-rank, rank)
+ * @note If there are duplicate values in the tensor, the subsequent axis values are ignored. e.g. an array of [2, 2] has the same effects as [2].
+ *
+ * @deprecated Support for U32 in axis tensor will be removed in 24.02 release
+ *
*/
- void configure(const ITensor *input, ITensor *output, const ITensor *axis);
+ void configure(const ITensor *input, ITensor *output, const ITensor *axis, const bool use_inverted_axis = false);
/** Static function to check if given info will lead to a valid configuration of @ref NEReverseKernel
*
- * @param[in] input Input tensor info. Data types supported: All
- * @param[in] output Output tensor info. Data type supported: Same as @p input
- * @param[in] axis Axis tensor info. Contains the indices of the dimensions to reverse. Data type supported: U32
+ * @param[in] input Input tensor info. Data types supported: All
+ * @param[in] output Output tensor info. Data type supported: Same as @p input
+ * @param[in] axis Axis tensor info. Contains the indices of the dimensions to reverse. Data type supported: U32/S32
+ * @param[in] use_inverted_axis Reverse ACL axis indices convention, if true, (inverted)axis = (tensor_rank - 1) - axis
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *axis);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *output,
+ const ITensorInfo *axis,
+ const bool use_inverted_axis = false);
};
} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEREVERSE_H */
+#endif // ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEREVERSE_H
diff --git a/arm_compute/runtime/NEON/functions/NEScale.h b/arm_compute/runtime/NEON/functions/NEScale.h
index 5350d0646c..72dfa3bda4 100644
--- a/arm_compute/runtime/NEON/functions/NEScale.h
+++ b/arm_compute/runtime/NEON/functions/NEScale.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 ARM Limited.
+ * Copyright (c) 2016-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,65 +24,60 @@
#ifndef ARM_COMPUTE_NESCALEIMAGE_H
#define ARM_COMPUTE_NESCALEIMAGE_H
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NEScaleKernel.h"
+#include "arm_compute/core/KernelDescriptors.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/Tensor.h"
-#include <cstdint>
+#include <memory>
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
-/** Basic function to run @ref NEScaleKernel */
+/** Basic function to compute Scale */
class NEScale : public IFunction
{
public:
- /** Constructor
- *
- * Initialize NEScale
- */
+ /** Constructor */
NEScale();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEScale(const NEScale &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEScale(NEScale &&) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEScale &operator=(const NEScale &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEScale &operator=(NEScale &&) = delete;
+ /** Destructor */
+ ~NEScale();
/** Initialize the function's source, destination, interpolation type and border_mode.
*
- * @param[in, out] input Source tensor. Data type supported: U8/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination tensor. Data type supported: Same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
- * @param[in] policy The interpolation type.
- * @param[in] border_mode Strategy to use for borders.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- * @param[in] sampling_policy (Optional) Sampling policy used by the interpolation. Defaults to @ref SamplingPolicy::CENTER
- * @param[in] use_padding (Optional) Is padding in use or not. Defaults to true.
- * @param[in] align_corners (Optional) Align corners of input and output, only affecting bilinear policy with TOP_LEFT sampling policy. Defaults to false.
- */
- void configure(ITensor *input, ITensor *output, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value = PixelValue(),
- SamplingPolicy sampling_policy = SamplingPolicy::CENTER, bool use_padding = true, bool align_corners = false);
- /** Initialize the function's source, destination, interpolation type and border_mode.
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |F16 |F16 |
+ * |F32 |F32 |
+ * |U8 |U8 |
+ * |S8 |S8 |
+ * |S16 |S16 |
*
- * @param[in, out] input Source tensor. Data type supported: U8/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED)
+ * @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/U8/S8/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED)
* @param[out] output Destination tensor. Data type supported: Same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
* @param[in] info @ref ScaleKernelInfo to be used for configuration
- */
- void configure(ITensor *input, ITensor *output, const ScaleKernelInfo &info);
- /** Static function to check if given info will lead to a valid configuration of @ref NEScale
- *
- * @param[in] input Source tensor. Data type supported: U8/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED)
- * @param[in] output Destination tensor. Data type supported: Same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
- * @param[in] policy The interpolation type.
- * @param[in] border_mode Strategy to use for borders.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- * @param[in] sampling_policy (Optional) Sampling policy used by the interpolation. Defaults to @ref SamplingPolicy::CENTER
- * @param[in] use_padding (Optional) Is padding in use or not. Defaults to true.
- * @param[in] align_corners (Optional) Align corners of input and output, only affecting bilinear policy with TOP_LEFT sampling policy. Defaults to false.
*
- * @return a status
+ * @note Using S8 data type only supports NHWC, @p border_mode Replicate, and @p policy Bilinear
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, InterpolationPolicy policy, BorderMode border_mode,
- PixelValue constant_border_value = PixelValue(), SamplingPolicy sampling_policy = SamplingPolicy::CENTER, bool use_padding = true, bool align_corners = false);
+ void configure(ITensor *input, ITensor *output, const ScaleKernelInfo &info);
/** Static function to check if given info will lead to a valid configuration of @ref NEScale
*
- * @param[in] input Source tensor. Data type supported: U8/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED)
+ * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/U8/S8/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED)
* @param[in] output Destination tensor. Data type supported: Same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
* @param[in] info @ref ScaleKernelInfo to be used for validation
*
@@ -94,13 +89,8 @@ public:
void run() override;
private:
- Tensor _offsets; /**< Offset to access the element with NEAREST interpolation or the top-left element with BILINEAR interpolation in the input tensor */
- Tensor _dx; /**< Element's distance between the X real coordinate and the smallest X following integer */
- Tensor _dy; /**< Element's distance between the Y real coordinate and the smallest Y following integer */
- NEScaleKernel _scale_kernel; /**< Kernel to perform the scaling */
- NEFillBorderKernel _border_handler; /**< kernel to handle tensor borders */
- bool _use_padding; /**< Is padding used on the tensors */
- bool _align_corners; /**< Align corners of input and output */
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_NESCALEIMAGE_H */
diff --git a/arm_compute/runtime/NEON/functions/NEScharr3x3.h b/arm_compute/runtime/NEON/functions/NEScharr3x3.h
deleted file mode 100644
index 6091121e03..0000000000
--- a/arm_compute/runtime/NEON/functions/NEScharr3x3.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NESCHARR3x3_H
-#define ARM_COMPUTE_NESCHARR3x3_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to execute scharr 3x3 filter. This function calls the following NEON kernels:
- *
- * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref NEScharr3x3Kernel
- *
- */
-class NEScharr3x3 : public INESimpleFunction
-{
-public:
- /** Initialise the function's source, destinations and border mode.
- *
- * @note At least one of output_x or output_y must be not NULL.
- *
- * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output_x (optional) Destination for the Scharr 3x3 convolution along the X axis. Data type supported: S16.
- * @param[out] output_y (optional) Destination for the Scharr 3x3 convolution along the Y axis. Data type supported: S16.
- * @param[in] border_mode Border mode to use for the convolution.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- *
- */
- void configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0);
-};
-}
-#endif /*ARM_COMPUTE_NESCHARR3x3_H */
diff --git a/arm_compute/runtime/NEON/functions/NESelect.h b/arm_compute/runtime/NEON/functions/NESelect.h
index 6ac328080d..c8e5a204dd 100644
--- a/arm_compute/runtime/NEON/functions/NESelect.h
+++ b/arm_compute/runtime/NEON/functions/NESelect.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,19 +25,28 @@
#define ARM_COMPUTE_NESELECT_H
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
namespace arm_compute
{
// Forward declarations
class ITensor;
+class ITensorInfo;
/** Basic function to run @ref NESelect */
-class NESelect : public INESimpleFunction
+class NESelect : public INESimpleFunctionNoBorder
{
public:
/** Initialise the kernel's inputs and output.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:--------------|:--------------|:------|:--------------|
+ * |U8 |All |All |All |
+ *
* @param[in] c Condition input tensor. Data types supported: U8.
* @param[in] x First input tensor. Data types supported: All.
* @param[in] y Second input tensor. Data types supported: Same as @p x
diff --git a/arm_compute/runtime/NEON/functions/NESimpleAssemblyFunction.h b/arm_compute/runtime/NEON/functions/NESimpleAssemblyFunction.h
deleted file mode 100644
index a162b6513b..0000000000
--- a/arm_compute/runtime/NEON/functions/NESimpleAssemblyFunction.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NESIMPLEASSEMBLYFUNCTION_H
-#define ARM_COMPUTE_NESIMPLEASSEMBLYFUNCTION_H
-
-#include "arm_compute/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h"
-#include "arm_compute/runtime/IFunction.h"
-
-#include <memory>
-
-namespace arm_compute
-{
-/** Basic interface for functions which have a single NEON GEMM wrapper kernel to run */
-class NESimpleAssemblyFunction : public IFunction
-{
-public:
- /** Constructor */
- NESimpleAssemblyFunction();
-
- /** Configure the function with the kernel to run
- *
- * @param[in] kernel GEMM Wrapper kernel configured and ready to run
- *
- * @note The kernel is expected to have a 1D window. The function will multi-thread this window across the X dimension.
- */
- void configure(std::unique_ptr<INEGEMMWrapperKernel> kernel);
-
- // Inherited methods overridden:
- void run() override final;
-
-protected:
- std::unique_ptr<INEGEMMWrapperKernel> _kernel; /**< Kernel to run */
-};
-} //namespace arm_compute
-#endif /*ARM_COMPUTE_NESIMPLEASSEMBLYFUNCTION_H */
diff --git a/arm_compute/runtime/NEON/functions/NESlice.h b/arm_compute/runtime/NEON/functions/NESlice.h
index 834ec27a33..70a688d3b0 100644
--- a/arm_compute/runtime/NEON/functions/NESlice.h
+++ b/arm_compute/runtime/NEON/functions/NESlice.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,7 +24,8 @@
#ifndef ARM_COMPUTE_NE_SLICE_H
#define ARM_COMPUTE_NE_SLICE_H
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/NEON/INEOperator.h"
namespace arm_compute
{
@@ -32,11 +33,32 @@ namespace arm_compute
class ITensor;
/** Basic function to perform tensor slicing */
-class NESlice : public INESimpleFunctionNoBorder
+class NESlice : public IFunction
{
public:
+ /** Default Constructor */
+ NESlice();
+ /** Default Destructor */
+ ~NESlice();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NESlice(const NESlice &) = delete;
+ /** Default move constructor */
+ NESlice(NESlice &&);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NESlice &operator=(const NESlice &) = delete;
+ /** Default move assignment operator */
+ NESlice &operator=(NESlice &&);
+
/** Configure kernel
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:------|:------|
+ * |All |All |
+ *
* @note Supported tensor rank: up to 4
* @note Start indices must be non-negative. 0 <= starts[i]
* @note End coordinates can be negative, which represents the number of elements before the end of that dimension.
@@ -63,7 +85,54 @@ public:
*
* @return A status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Coordinates &starts, const Coordinates &ends);
+ static Status
+ validate(const ITensorInfo *input, const ITensorInfo *output, const Coordinates &starts, const Coordinates &ends);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
+};
+
+namespace experimental
+{
+/** Basic function to perform tensor slicing */
+class NESlice : public INEOperator
+{
+public:
+ /** Configure kernel
+ *
+ * @note Supported tensor rank: up to 4
+ * @note Start indices must be non-negative. 0 <= starts[i]
+ * @note End coordinates can be negative, which represents the number of elements before the end of that dimension.
+ * @note End indices are not inclusive unless negative.
+ *
+ * @param[in] input Source tensor info. Data type supported: All
+ * @param[out] output Destination tensor info. Data type supported: Same as @p input
+ * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
+ * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
+ */
+ void configure(const ITensorInfo *input, ITensorInfo *output, const Coordinates &starts, const Coordinates &ends);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref NESlice
+ *
+ * @note Supported tensor rank: up to 4
+ * @note Start indices must be non-negative. 0 <= starts[i]
+ * @note End coordinates can be negative, which represents the number of elements before the end of that dimension.
+ * @note End indices are not inclusive unless negative.
+ *
+ * @param[in] input Source tensor info. Data type supported: All
+ * @param[in] output Destination tensor info. Data type supported: Same as @p input
+ * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
+ * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
+ *
+ * @return A status
+ */
+ static Status
+ validate(const ITensorInfo *input, const ITensorInfo *output, const Coordinates &starts, const Coordinates &ends);
};
+} // namespace experimental
} // namespace arm_compute
#endif /* ARM_COMPUTE_NE_SLICE_H */
diff --git a/arm_compute/runtime/NEON/functions/NESobel3x3.h b/arm_compute/runtime/NEON/functions/NESobel3x3.h
deleted file mode 100644
index 0cd633ec3a..0000000000
--- a/arm_compute/runtime/NEON/functions/NESobel3x3.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NESOBEL3x3_H
-#define ARM_COMPUTE_NESOBEL3x3_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to execute sobel 3x3 filter. This function calls the following NEON kernels:
- *
- * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref NESobel3x3Kernel
- *
- */
-class NESobel3x3 : public INESimpleFunction
-{
-public:
- /** Initialise the function's source, destinations and border mode.
- *
- * @note At least one of output_x or output_y must be not NULL.
- *
- * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output_x (optional) Destination for the Sobel 3x3 convolution along the X axis. Data type supported: S16.
- * @param[out] output_y (optional) Destination for the Sobel 3x3 convolution along the Y axis. Data type supported: S16.
- * @param[in] border_mode Border mode to use for the convolution.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- *
- */
- void configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0);
-};
-}
-#endif /*ARM_COMPUTE_NESOBEL3x3_H */
diff --git a/arm_compute/runtime/NEON/functions/NESobel5x5.h b/arm_compute/runtime/NEON/functions/NESobel5x5.h
deleted file mode 100644
index af52292359..0000000000
--- a/arm_compute/runtime/NEON/functions/NESobel5x5.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NESOBEL5x5_H
-#define ARM_COMPUTE_NESOBEL5x5_H
-
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NESobel5x5Kernel.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/Tensor.h"
-
-#include <cstdint>
-#include <memory>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to execute sobel 5x5 filter. This function calls the following NEON kernels:
- *
- * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref NESobel5x5HorKernel
- * -# @ref NESobel5x5VertKernel
- *
- */
-class NESobel5x5 : public IFunction
-{
-public:
- /** Default constructor */
- NESobel5x5(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Initialise the function's source, destinations and border mode.
- *
- * @note At least one of output_x or output_y must be not NULL.
- *
- * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output_x (optional) Destination for the Sobel 5x5 convolution along the X axis. Data type supported: S16.
- * @param[out] output_y (optional) Destination for the Sobel 5x5 convolution along the Y axis. Data type supported: S16.
- * @param[in] border_mode Border mode to use for the convolution.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- *
- */
- void configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0);
-
- // Inherited methods overridden:
- void run() override;
-
-protected:
- MemoryGroup _memory_group; /**< Function memory group */
- NESobel5x5HorKernel _sobel_hor; /**< Sobel Horizontal 5x5 kernel */
- NESobel5x5VertKernel _sobel_vert; /**< Sobel Vertical 5x5 kernel */
- Tensor _tmp_x; /**< Temporary buffer for Sobel X */
- Tensor _tmp_y; /**< Temporary buffer for Sobel Y */
- NEFillBorderKernel _border_handler; /**< Kernel to handle tensor borders */
-};
-}
-#endif /*ARM_COMPUTE_NESOBEL5x5_H */
diff --git a/arm_compute/runtime/NEON/functions/NESobel7x7.h b/arm_compute/runtime/NEON/functions/NESobel7x7.h
deleted file mode 100644
index e9098880f5..0000000000
--- a/arm_compute/runtime/NEON/functions/NESobel7x7.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NESOBEL7x7_H
-#define ARM_COMPUTE_NESOBEL7x7_H
-
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NESobel7x7Kernel.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/Tensor.h"
-
-#include <cstdint>
-#include <memory>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to execute sobel 7x7 filter. This function calls the following NEON kernels:
- *
- * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref NESobel7x7HorKernel
- * -# @ref NESobel7x7VertKernel
- *
- */
-class NESobel7x7 : public IFunction
-{
-public:
- /** Default constructor */
- NESobel7x7(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Initialise the function's source, destinations and border mode.
- *
- * @note At least one of output_x or output_y must be not NULL.
- *
- * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output_x (optional) Destination for the Sobel 7x7 convolution along the X axis. Data type supported: S32.
- * @param[out] output_y (optional) Destination for the Sobel 7x7 convolution along the Y axis. Data type supported: S32.
- * @param[in] border_mode Border mode to use for the convolution.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- *
- */
- void configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0);
-
- // Inherited methods overridden:
- void run() override;
-
-protected:
- MemoryGroup _memory_group; /**< Function memory group */
- NESobel7x7HorKernel _sobel_hor; /**< Sobel Horizontal 7x7 kernel */
- NESobel7x7VertKernel _sobel_vert; /**< Sobel Vertical 7x7 kernel */
- Tensor _tmp_x; /**< Temporary buffer for Sobel X */
- Tensor _tmp_y; /**< Temporary buffer for Sobel Y */
- NEFillBorderKernel _border_handler; /**< Kernel to handle tensor borders */
-};
-}
-#endif /*ARM_COMPUTE_NESOBEL7x7_H */
diff --git a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h
index b80ceaf25c..1787de6237 100644
--- a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h
+++ b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 ARM Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,31 +24,18 @@
#ifndef ARM_COMPUTE_NESOFTMAXLAYER_H
#define ARM_COMPUTE_NESOFTMAXLAYER_H
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h"
+#include "arm_compute/core/Error.h"
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/IMemoryManager.h"
+
+#include <memory>
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
-/** Basic function to compute a SoftmaxLayer and a Log SoftmaxLayer.
- *
- * Softmax is calculated by :
- * @f[ out = \frac{e^{x - max(x)}}{\sum{e^{x - max(x)}}} @f]
- *
- * Log Softmax is calculated by :
- * @f[ out = (x - max(x)) - \sum{e^{x - max(x)}} @f]
- *
- * This function runs the following kernels:
- * -# @ref NEFillBorderKernel
- * -# @ref NELogits1DMaxKernel
- * -# @ref NELogits1DSoftmaxKernel
- */
+/** Basic function to compute a SoftmaxLayer and a Log SoftmaxLayer. */
template <bool IS_LOG = false>
class NESoftmaxLayerGeneric : public IFunction
{
@@ -58,69 +45,53 @@ public:
/** Prevent instances of this class from being copied (As this class contains pointers) */
NESoftmaxLayerGeneric(const NESoftmaxLayerGeneric &) = delete;
/** Default move constructor */
- NESoftmaxLayerGeneric(NESoftmaxLayerGeneric &&) = default;
+ NESoftmaxLayerGeneric(NESoftmaxLayerGeneric &&);
/** Prevent instances of this class from being copied (As this class contains pointers) */
NESoftmaxLayerGeneric &operator=(const NESoftmaxLayerGeneric &) = delete;
/** Default move assignment operator */
- NESoftmaxLayerGeneric &operator=(NESoftmaxLayerGeneric &&) = default;
+ NESoftmaxLayerGeneric &operator=(NESoftmaxLayerGeneric &&);
+ /** Default destructor */
+ ~NESoftmaxLayerGeneric();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |F16 |F16 |
+ * |F32 |F32 |
+ *
* @param[in,out] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. If the width is not a
- * multiple of the internal processing block size, @ref NEFillBorderKernel replicates the
+ * multiple of the internal processing block size, @ref NEFillBorder replicates the
* last value of each row to the nearest multiple.
* @param[out] output Destination tensor. Data types supported: same as @p input.
* @param[in] beta (Optional) A scaling factor for the exponent.
- * @param[in] axis (Optional) Reduction axis. Defaults to -1.
- * Negative index is used to specify axis from the end (e.g. -1 for the last axis).Must be in range [-input_num_dimensions, input_num_dimensions).
- * It has the purpose of squashing the first @p axis dimensions together. For instance, given a [4x4x4x4] image,
- * when @p axis is 2, the Softmax reduction will be applied on each of the [4x4] planes of the input image.
+ * @param[in] axis (Optional) The dimension in which to apply the function. E.g. for input of shape 4x5x6 and
+ * axis=1, softmax will be applied to 4x6=24 vectors of size 5. Defaults to 0
*/
- void configure(ITensor *input, ITensor *output, float beta = 1.0f, int32_t axis = -1);
+ void configure(ITensor *input, ITensor *output, float beta = 1.0f, int32_t axis = 0);
/** Static function to check if given info will lead to a valid configuration of @ref NESoftmaxLayer
*
* @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
* @param[in] output Destination tensor info. Data types supported: same as @p input
* @param[in] beta (Optional) A scaling factor for the exponent.
- * @param[in] axis (Optional) Reduction axis. Defaults to -1.
- * Negative index is used to specify axis from the end (e.g. -1 for the last axis).Must be in range [-input_num_dimensions, input_num_dimensions).
- * It has the purpose of squashing the first @p axis dimensions together. For instance, given a [4x4x4x4] image,
- * when @p axis is 2, the Softmax reduction will be applied on each of the [4x4] planes of the input image.
+ * @param[in] axis (Optional) The dimension in which to apply the function. E.g. for input of shape 4x5x6 and
+ * axis=1, softmax will be applied to 4x6=24 vectors of size 5. Defaults to 0
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, float beta = 1.0f, int32_t axis = -1);
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, float beta = 1.0f, int32_t axis = 0);
// Inherited methods overridden:
void run() override;
private:
- /** Utility method to configure the kernels needed to flatten the input
- * tensor.
- *
- * @note This function changes the internal state of this class. In particular,
- * it initializes the kernel @p _flatten_kernel and the tensors @p _input_flat and
- * @p _output_flat
- *
- * @param[in] input Original source tensor.
- * @param[in] output Original destination tensor.
- * @param[in] axis (Optional) Reduction axis. Defaults to -1.
- * Negative index is used to specify axis from the end (e.g. -1 for the last axis).Must be in range [-input_num_dimensions, input_num_dimensions).
- * It has the purpose of squashing the first @p axis dimensions together. For instance, given a [4x4x4x4] image,
- * when @p axis is 2, the Softmax reduction will be applied on each of the [4x4] planes of the input image.
- */
- void configure_reshape_input_kernel(const ITensor *input, const ITensor *output, int32_t axis);
-
- MemoryGroup _memory_group;
- NELogits1DMaxKernel _max_kernel;
- NELogits1DSoftmaxKernel<IS_LOG> _softmax_kernel;
- std::unique_ptr<INEKernel> _flat_or_reshape_kernel_ptr;
- NEFillBorderKernel _fill_border_kernel;
- NEReshapeLayerKernel _reshape_kernel;
- Tensor _max;
- Tensor _tmp;
- Tensor _input_flattened;
- Tensor _output_flattened;
- bool _needs_flattening;
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
using NESoftmaxLayer = NESoftmaxLayerGeneric<false>;
diff --git a/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h b/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h
index 75fa50c1b0..5dee61a4a8 100644
--- a/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h
+++ b/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,19 +24,21 @@
#ifndef ARM_COMPUTE_NESPACETOBATCHLAYER_H
#define ARM_COMPUTE_NESPACETOBATCHLAYER_H
+#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/NEON/kernels/NEMemsetKernel.h"
-#include "arm_compute/core/NEON/kernels/NESpaceToBatchLayerKernel.h"
-#include "arm_compute/core/Types.h"
+#include <memory>
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
+class NESpaceToBatchLayerKernel;
+class NEFill;
-/** Basic function to spatial divide a tensor. This function calls the following NEON kernels/functions:
+/** Basic function to spatial divide a tensor. This function calls the following kernels/functions:
*
- * -# @ref NEMemsetKernel
+ * -# @ref NEFill
* -# @ref NESpaceToBatchLayerKernel
*/
class NESpaceToBatchLayer : public IFunction
@@ -53,12 +55,21 @@ public:
/** Allow instances of this class to be moved */
NESpaceToBatchLayer &operator=(NESpaceToBatchLayer &&) = default;
/** Default destructor */
- virtual ~NESpaceToBatchLayer() = default;
+ ~NESpaceToBatchLayer();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:---------|:---------|:---------|:---------|
+ * |All |S32 |S32 |All |
+ *
* @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
- * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32
- * @param[in] paddings 2-D tensor with shape [2, M]. Data types supported: S32
+ * @param[in] block_shape 1-D tensor with shape [M]. Supported M: 2. Data types supported: S32
+ * @param[in] paddings 2-D tensor with shape [2, M] (First dimension is the fastest-changing dimension). Supported M: 2. Data types supported: S32
* @param[out] output Tensor output. Data types supported: same as @p input
*/
void configure(const ITensor *input, const ITensor *block_shape, const ITensor *paddings, ITensor *output);
@@ -67,41 +78,54 @@ public:
* @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
* @param[in] block_shape_x Block shape x value.
* @param[in] block_shape_y Block shape y value.
- * @param[in] padding_left The left padding of the output tensor.
- * @param[in] padding_right The right padding of the output tensor.
+ * @param[in] padding_left The padding at the beginning of every dimension of the output tensor.
+ * @param[in] padding_right The padding at the end of every dimension of the output tensor.
* @param[out] output Tensor output. Data types supported: same as @p input
*/
- void configure(const ITensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, ITensor *output);
+ void configure(const ITensor *input,
+ const int block_shape_x,
+ const int block_shape_y,
+ const Size2D &padding_left,
+ const Size2D &padding_right,
+ ITensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref NESpaceToBatchLayer
*
* @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All.
- * @param[in] block_shape block shape tensor info with shape [M]. Data types supported: S32
- * @param[in] paddings paddings tensor info with shape [2, M]. Data types supported: S32
+ * @param[in] block_shape 1-D tensor with shape [M]. Supported M: 2. Data types supported: S32
+ * @param[in] paddings 2-D tensor with shape [2, M] (First dimension is the fastest-changing dimension). Supported M: 2. Data types supported: S32
* @param[in] output Tensor output info. Data types supported: same as @p input
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *paddings, const ITensorInfo *output);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *block_shape,
+ const ITensorInfo *paddings,
+ const ITensorInfo *output);
/** Static function to check if given info will lead to a valid configuration of @ref NESpaceToBatchLayer (Static block shape and paddings)
*
* @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All.
* @param[in] block_shape_x Block shape x value.
* @param[in] block_shape_y Block shape y value.
- * @param[in] padding_left The left padding of the output tensor.
- * @param[in] padding_right The right padding of the output tensor.
+ * @param[in] padding_left The padding at the beginning of every dimension of the output tensor.
+ * @param[in] padding_right The padding at the end of every dimension of the output tensor.
* @param[in] output Tensor output info. Data types supported: same as @p input
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, const ITensorInfo *output);
+ static Status validate(const ITensorInfo *input,
+ const int block_shape_x,
+ const int block_shape_y,
+ const Size2D &padding_left,
+ const Size2D &padding_right,
+ const ITensorInfo *output);
// Inherited methods overridden:
void run() override;
private:
- NESpaceToBatchLayerKernel _space_to_batch_kernel; /**< SpaceToBatch kernel to run */
- NEMemsetKernel _memset_kernel; /**< Memset kernel to run */
- bool _has_padding; /**< Flag to check if the output has padding */
+ std::unique_ptr<NESpaceToBatchLayerKernel> _space_to_batch_kernel; /**< SpaceToBatch kernel to run */
+ std::unique_ptr<NEFill> _fill_f; /**< Fill function to run */
+ bool _has_padding; /**< Flag to check if the output has padding */
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NESPACETOBATCHLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h b/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h
index 6a7a9c83a1..1820cb8f6b 100644
--- a/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h
+++ b/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,20 +24,18 @@
#ifndef ARM_COMPUTE_NESPACETODEPTHLAYER_H
#define ARM_COMPUTE_NESPACETODEPTHLAYER_H
+#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/NEON/kernels/NEMemsetKernel.h"
-#include "arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernel.h"
-#include "arm_compute/core/Types.h"
+#include <memory>
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
+class NESpaceToDepthLayerKernel;
-/** This function calls the following NEON kernels/functions:
- *
- * -# @ref NESpaceToDepthLayerKernel
- */
+/** Basic function to run @ref NESpaceToDepthLayerKernel. */
class NESpaceToDepthLayer : public IFunction
{
public:
@@ -52,9 +50,18 @@ public:
/** Allow instances of this class to be moved */
NESpaceToDepthLayer &operator=(NESpaceToDepthLayer &&) = default;
/** Default destructor */
- virtual ~NESpaceToDepthLayer() = default;
+ ~NESpaceToDepthLayer();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |All |All |
+ *
* @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
* @param[out] output Tensor output. Data types supported: same as @p input
* @param[in] block_shape Block shape value
@@ -74,7 +81,7 @@ public:
void run() override;
private:
- NESpaceToDepthLayerKernel _space_to_depth_kernel; /**< SpaceToDepth kernel to run */
+ std::unique_ptr<NESpaceToDepthLayerKernel> _space_to_depth_kernel; /**< SpaceToDepth kernel to run */
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NESPACETODEPTHLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NESplit.h b/arm_compute/runtime/NEON/functions/NESplit.h
index 69aef793d5..36358a7094 100644
--- a/arm_compute/runtime/NEON/functions/NESplit.h
+++ b/arm_compute/runtime/NEON/functions/NESplit.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 ARM Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,7 +26,6 @@
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/Types.h"
-
#include "arm_compute/runtime/CPP/functions/CPPSplit.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/NEON/functions/NESlice.h"
@@ -40,6 +39,18 @@ namespace arm_compute
class NESplit : public CPPSplit<NESlice>
{
public:
+ /** NESplit
+ *
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |All |All |
+ *
+ */
+
// Inherited methods overridden:
void run() override;
};
diff --git a/arm_compute/runtime/NEON/functions/NEStackLayer.h b/arm_compute/runtime/NEON/functions/NEStackLayer.h
index 9288035060..98dacde0c1 100644
--- a/arm_compute/runtime/NEON/functions/NEStackLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEStackLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,20 +21,20 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_NESTACKLAYER_H
-#define ARM_COMPUTE_NESTACKLAYER_H
+#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NESTACKLAYER_H
+#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NESTACKLAYER_H
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/NEON/kernels/NEStackLayerKernel.h"
-
#include <memory>
#include <vector>
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
+class NEStackLayerKernel;
/** Basic function to stack tensors along an axis. This function calls the following kernel:
*
@@ -46,8 +46,26 @@ class NEStackLayer : public IFunction
public:
/** Default constructor */
NEStackLayer();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEStackLayer(const NEStackLayer &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEStackLayer &operator=(const NEStackLayer &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEStackLayer(NEStackLayer &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEStackLayer &operator=(NEStackLayer &&) = delete;
+ /** Default destructor */
+ ~NEStackLayer();
/** Initialise the kernel's inputs vector and output.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |All |All |
+ *
* @note Supported input tensor rank: up to 4
*
* @param[in] input The vectors containing all the tensors with the same shape to stack. Data types supported: All
@@ -73,9 +91,8 @@ public:
void run() override;
private:
- std::vector<ITensor *> _input;
- std::vector<NEStackLayerKernel> _stack_kernels;
- unsigned int _num_inputs;
+ std::unique_ptr<NEStackLayerKernel> _stack_kernel;
+ bool _is_prepared;
};
} // namespace arm_compute
-#endif /* ARM_COMPUTE_NESTACKLAYER_H */
+#endif // ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NESTACKLAYER_H
diff --git a/arm_compute/runtime/NEON/functions/NEStridedSlice.h b/arm_compute/runtime/NEON/functions/NEStridedSlice.h
index 6d5e6392f5..fa1113ffec 100644
--- a/arm_compute/runtime/NEON/functions/NEStridedSlice.h
+++ b/arm_compute/runtime/NEON/functions/NEStridedSlice.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,7 +24,8 @@
#ifndef ARM_COMPUTE_NE_STRIDED_SLICE_H
#define ARM_COMPUTE_NE_STRIDED_SLICE_H
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/NEON/INEOperator.h"
namespace arm_compute
{
@@ -32,11 +33,32 @@ namespace arm_compute
class ITensor;
/** Basic function to run @ref NEStridedSliceKernel */
-class NEStridedSlice : public INESimpleFunction
+class NEStridedSlice : public IFunction
{
public:
+ /** Default Constructor */
+ NEStridedSlice();
+ /** Default Destructor */
+ ~NEStridedSlice();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEStridedSlice(const NEStridedSlice &) = delete;
+ /** Default move constructor */
+ NEStridedSlice(NEStridedSlice &&);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEStridedSlice &operator=(const NEStridedSlice &) = delete;
+ /** Default move assignment operator */
+ NEStridedSlice &operator=(NEStridedSlice &&);
+
/** Configure kernel
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:------|:------|
+ * |All |All |
+ *
* @note Supported tensor rank: up to 4
*
* @param[in] input Source tensor. Data type supported: All
@@ -49,9 +71,74 @@ public:
* @param[in] shrink_axis_mask (Optional) If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1.
* A slice of size 1 starting from starts[i] in the dimension must be preserved.
*/
- void configure(const ITensor *input, ITensor *output,
- const Coordinates &starts, const Coordinates &ends, const BiStrides &strides,
- int32_t begin_mask = 0, int32_t end_mask = 0, int32_t shrink_axis_mask = 0);
+ void configure(const ITensor *input,
+ ITensor *output,
+ const Coordinates &starts,
+ const Coordinates &ends,
+ const BiStrides &strides,
+ int32_t begin_mask = 0,
+ int32_t end_mask = 0,
+ int32_t shrink_axis_mask = 0);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref NEStridedSlice
+ *
+ * @note Supported tensor rank: up to 4
+ *
+ * @param[in] input Source tensor info. Data type supported: All
+ * @param[in] output Destination tensor info. Data type supported: Same as @p input
+ * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
+ * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
+ * @param[in] strides The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input).
+ * @param[in] begin_mask (Optional) If the ith bit of begin_mask is set, starts[i] is ignored and the fullest possible range in that dimension is used instead.
+ * @param[in] end_mask (Optional) If the ith bit of end_mask is set, ends[i] is ignored and the fullest possible range in that dimension is used instead.
+ * @param[in] shrink_axis_mask (Optional) If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1.
+ * A slice of size 1 starting from starts[i] in the dimension must be preserved.
+ */
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *output,
+ const Coordinates &starts,
+ const Coordinates &ends,
+ const BiStrides &strides,
+ int32_t begin_mask = 0,
+ int32_t end_mask = 0,
+ int32_t shrink_axis_mask = 0);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
+};
+
+namespace experimental
+{
+/** Basic function to run @ref NEStridedSliceKernel */
+class NEStridedSlice : public INEOperator
+{
+public:
+ /** Configure kernel
+ *
+ * @note Supported tensor rank: up to 4
+ *
+ * @param[in] input Source tensor info. Data type supported: All
+ * @param[out] output Destination tensor info. Data type supported: Same as @p input
+ * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
+ * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
+ * @param[in] strides The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input).
+ * @param[in] begin_mask (Optional) If the ith bit of begin_mask is set, starts[i] is ignored and the fullest possible range in that dimension is used instead.
+ * @param[in] end_mask (Optional) If the ith bit of end_mask is set, ends[i] is ignored and the fullest possible range in that dimension is used instead.
+ * @param[in] shrink_axis_mask (Optional) If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1.
+ * A slice of size 1 starting from starts[i] in the dimension must be preserved.
+ */
+ void configure(const ITensorInfo *input,
+ ITensorInfo *output,
+ const Coordinates &starts,
+ const Coordinates &ends,
+ const BiStrides &strides,
+ int32_t begin_mask = 0,
+ int32_t end_mask = 0,
+ int32_t shrink_axis_mask = 0);
/** Static function to check if given info will lead to a valid configuration of @ref NEStridedSlice
*
@@ -67,9 +154,15 @@ public:
* @param[in] shrink_axis_mask (Optional) If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1.
* A slice of size 1 starting from starts[i] in the dimension must be preserved.
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output,
- const Coordinates &starts, const Coordinates &ends, const BiStrides &strides,
- int32_t begin_mask = 0, int32_t end_mask = 0, int32_t shrink_axis_mask = 0);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *output,
+ const Coordinates &starts,
+ const Coordinates &ends,
+ const BiStrides &strides,
+ int32_t begin_mask = 0,
+ int32_t end_mask = 0,
+ int32_t shrink_axis_mask = 0);
};
+} // namespace experimental
} // namespace arm_compute
#endif /* ARM_COMPUTE_NE_STRIDED_SLICE_H */
diff --git a/arm_compute/runtime/NEON/functions/NETableLookup.h b/arm_compute/runtime/NEON/functions/NETableLookup.h
deleted file mode 100644
index b0685afd5b..0000000000
--- a/arm_compute/runtime/NEON/functions/NETableLookup.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NETABLELOOKUP_H
-#define ARM_COMPUTE_NETABLELOOKUP_H
-
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
-namespace arm_compute
-{
-class ITensor;
-class ILut;
-
-/** Basic function to run @ref NETableLookupKernel */
-class NETableLookup : public INESimpleFunctionNoBorder
-{
-public:
- /** Initialise the kernel's inputs and output
- *
- * @param[in] input First tensor input. Data types supported: U8/S16
- * @param[in] lut Input lookup table.
- * @param[out] output Output tensor. Data types supported: same as @p input
- */
- void configure(const ITensor *input, const ILut *lut, ITensor *output);
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NETABLELOOKUP_H */
diff --git a/arm_compute/runtime/NEON/functions/NEThreshold.h b/arm_compute/runtime/NEON/functions/NEThreshold.h
deleted file mode 100644
index c955283e9e..0000000000
--- a/arm_compute/runtime/NEON/functions/NEThreshold.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NETHRESHOLD_H
-#define ARM_COMPUTE_NETHRESHOLD_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to run @ref NEThresholdKernel */
-class NEThreshold : public INESimpleFunctionNoBorder
-{
-public:
- /** Initialise the function's source, destination, thresholds and threshold type
- *
- * @param[in] input First tensor input. Data type supported: U8.
- * @param[out] output Output tensor. Data type supported: U8.
- * @param[in] threshold Threshold. If upper threshold is specified, this will be used as the lower threshold
- * @param[in] false_value Value to assign when the condition is false
- * @param[in] true_value value to assign when the condition is true
- * @param[in] type Thresholding type. Can either be BINARY or RANGE.
- * @param[in] upper Upper threshold. Only used with RANGE thresholding
- */
- void configure(const ITensor *input, ITensor *output, uint8_t threshold, uint8_t false_value = 0, uint8_t true_value = 0,
- ThresholdType type = ThresholdType::BINARY, uint8_t upper = 0);
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NETHRESHOLD_H */
diff --git a/arm_compute/runtime/NEON/functions/NETile.h b/arm_compute/runtime/NEON/functions/NETile.h
index 14d4f221f1..001a0a4128 100644
--- a/arm_compute/runtime/NEON/functions/NETile.h
+++ b/arm_compute/runtime/NEON/functions/NETile.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,13 +24,13 @@
#ifndef ARM_COMPUTE_NETILE_H
#define ARM_COMPUTE_NETILE_H
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
/** Basic function to run @ref NETileKernel */
class NETile : public INESimpleFunctionNoBorder
@@ -38,6 +38,14 @@ class NETile : public INESimpleFunctionNoBorder
public:
/** Set the source, destination of the kernel
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |All |All |
+ *
* @param[in] input Source tensor. Data type supported: All.
* @param[out] output Destination tensor. Same as @p input
* @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension.
diff --git a/arm_compute/runtime/NEON/functions/NETranspose.h b/arm_compute/runtime/NEON/functions/NETranspose.h
index 03c90e5b28..5d2d1f1b01 100644
--- a/arm_compute/runtime/NEON/functions/NETranspose.h
+++ b/arm_compute/runtime/NEON/functions/NETranspose.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,22 +25,42 @@
#define ARM_COMPUTE_NETRANSPOSE_H
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
namespace arm_compute
{
+// Forward declarations
class ITensor;
+class ITensorInfo;
-/** Basic function to transpose a matrix on NEON. This function calls the following NEON kernel:
- *
- * -# @ref NETransposeKernel
- *
- */
-class NETranspose : public INESimpleFunctionNoBorder
+/** Basic function to run @ref cpu::kernels::CpuTransposeKernel */
+class NETranspose : public IFunction
{
public:
+ /** Default Constructor */
+ NETranspose();
+ /** Default Destructor */
+ ~NETranspose();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NETranspose(const NETranspose &) = delete;
+ /** Default move constructor */
+ NETranspose(NETranspose &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NETranspose &operator=(const NETranspose &) = delete;
+ /** Default move assignment operator */
+ NETranspose &operator=(NETranspose &&) = default;
/** Initialise the kernel's inputs and output
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:------|:------|
+ * |All |All |
+ *
* @param[in] input Input tensor. Data types supported: All
* @param[out] output Output tensor. Data type supported: Same as @p input
*/
@@ -53,7 +73,13 @@ public:
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+
+ // Inherited methods overridden
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
-
#endif /* ARM_COMPUTE_NETRANSPOSE_H */
diff --git a/arm_compute/runtime/NEON/functions/NEUnstack.h b/arm_compute/runtime/NEON/functions/NEUnstack.h
index dbb04f08e5..e1af96d08d 100644
--- a/arm_compute/runtime/NEON/functions/NEUnstack.h
+++ b/arm_compute/runtime/NEON/functions/NEUnstack.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,7 +26,6 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
-
#include "arm_compute/runtime/NEON/functions/NEStridedSlice.h"
#include <memory>
@@ -45,10 +44,28 @@ class NEUnstack : public IFunction
public:
/** Default constructor */
NEUnstack();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEUnstack(const NEUnstack &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEUnstack &operator=(const NEUnstack &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEUnstack(NEUnstack &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEUnstack &operator=(NEUnstack &&) = delete;
+ /** Default destructor */
+ ~NEUnstack() = default;
/** Set the input, output and unstacking axis.
*
- * @param[in] input A tensor to be unstacked. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
- * @param[in,out] output_vector A vector of tensors. Data types supported: Same as @p input.
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |All |All |
+ *
+ * @param[in] input A tensor to be unstacked. Data type supported: All.
+ * @param[in,out] output_vector A vector of tensors. Data types supported: same as @p input.
* Note: The number of elements of the vector will be used as the number of slices to be taken from the axis.
* @param[in] axis The axis to unstack along. Valid values are [-R,R) where R is the input's rank. Negative values wrap around.
*
@@ -56,8 +73,8 @@ public:
void configure(const ITensor *input, const std::vector<ITensor *> &output_vector, int axis);
/** Static function to check if given info will lead to a valid configuration of @ref NEUnstack
*
- * @param[in] input Input tensor info. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
- * @param[in] output_vector Vector of output tensors' info. Data types supported: Same as @p input.
+ * @param[in] input Input tensor info. Data type supported: All.
+ * @param[in] output_vector Vector of output tensors' info. Data types supported: same as @p input.
* @param[in] axis The axis to unstack along. Valid values are [-R,R) where R is the input's rank. Negative values wrap around.
*
* @return a status
diff --git a/arm_compute/runtime/NEON/functions/NEUpsampleLayer.h b/arm_compute/runtime/NEON/functions/NEUpsampleLayer.h
deleted file mode 100644
index ff465e54a0..0000000000
--- a/arm_compute/runtime/NEON/functions/NEUpsampleLayer.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEUPSAMPLELAYER_H
-#define ARM_COMPUTE_NEUPSAMPLELAYER_H
-
-#include "arm_compute/core/NEON/kernels/NEUpsampleLayerKernel.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/NEON/NEScheduler.h"
-#include "arm_compute/runtime/Tensor.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Function to run upsample layer */
-class NEUpsampleLayer : public IFunction
-{
-public:
- /** Constructor */
- NEUpsampleLayer();
- /** Set the input output tensors.
- *
- * @param[in] input Source tensor. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
- * @param[out] output Destination tensor. Data types supported: same as @p input.
- * @param[in] info Contains stride information described in @ref Size2D.
- * @param[in] policy Defines the policy to fill the intermediate pixels.
- *
- */
- void configure(const ITensor *input, ITensor *output, const Size2D &info,
- const InterpolationPolicy &policy);
- /** Static function to check if given info will lead to a valid configuration of @ref NEUpsampleLayer
- *
- * @param[in] input Source tensor info. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
- * @param[out] output Destination tensor info. Data types supported: same as @p input.
- * @param[in] info Contains stride information described in @ref Size2D.
- * @param[in] policy Defines the policy to fill the intermediate pixels.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &info,
- const InterpolationPolicy &policy);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- NEUpsampleLayerKernel _kernel;
- DataLayout _data_layout;
-};
-} // arm_compute
-#endif /* ARM_COMPUTE_NEUPSAMPLELAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEWarpAffine.h b/arm_compute/runtime/NEON/functions/NEWarpAffine.h
deleted file mode 100644
index 768ef0c6d3..0000000000
--- a/arm_compute/runtime/NEON/functions/NEWarpAffine.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEWARPAFFINE_H
-#define ARM_COMPUTE_NEWARPAFFINE_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to run @ref NEWarpAffineKernel */
-class NEWarpAffine : public INESimpleFunction
-{
-public:
- /** Initialize the function's source, destination, interpolation policy and border_mode.
- *
- * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination tensor. Data type supported: U8
- * @param[in] matrix The perspective matrix. Must be 2x3 of type float.
- * The matrix argument requires 9 values, the last 3 values are ignored.
- * @param[in] policy The interpolation type.
- * @param[in] border_mode Strategy to use for borders.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(ITensor *input, ITensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0);
-};
-}
-#endif /*ARM_COMPUTE_NEWARPAFFINE_H */
diff --git a/arm_compute/runtime/NEON/functions/NEWarpPerspective.h b/arm_compute/runtime/NEON/functions/NEWarpPerspective.h
deleted file mode 100644
index 66fb9acc3f..0000000000
--- a/arm_compute/runtime/NEON/functions/NEWarpPerspective.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEWARPPERSPECTIVE_H
-#define ARM_COMPUTE_NEWARPPERSPECTIVE_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to run @ref NEWarpPerspectiveKernel */
-class NEWarpPerspective : public INESimpleFunction
-{
-public:
- /** Initialize the function's source, destination, interpolation policy and border_mode.
- *
- * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination tensor. Data type supported: U8
- * @param[in] matrix The perspective matrix. Must be 3x3 of type float.
- * @param[in] policy The interpolation type.
- * @param[in] border_mode Strategy to use for borders.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(ITensor *input, ITensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0);
-};
-}
-#endif /*ARM_COMPUTE_NEWARPPERSPECTIVE_H */
diff --git a/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h
index 4a8fe61614..6caa2aeb59 100644
--- a/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 ARM Limited.
+ * Copyright (c) 2017-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,18 +21,12 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_NEWINOGRADCONVOLUTIONLAYER_H
-#define ARM_COMPUTE_NEWINOGRADCONVOLUTIONLAYER_H
+#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEWINOGRADCONVOLUTIONLAYER_H
+#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEWINOGRADCONVOLUTIONLAYER_H
-#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/core/NEON/INEKernel.h"
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CPP/functions/CPPPermute.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMM.h"
-
+#include "arm_compute/function_info/ActivationLayerInfo.h"
+#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/Tensor.h"
#include <memory>
@@ -42,11 +36,11 @@ namespace arm_compute
// Forward declarations
class ITensor;
-/** Basic function to simulate a convolution layer. This function calls the following NEON kernels:
- * -# @ref NEWinogradLayerTransformWeightsKernel (executed only once in the first call to the run() method )
- * -# @ref NEWinogradLayerTransformInputKernel
- * -# @ref NEWinogradLayerTransformOutputKernel
- * -# @ref NEGEMMAssemblyDispatch
+/** Basic function to simulate a convolution layer. This function calls the following kernels:
+ *
+ * -# @ref cpu::CpuWinogradConv2dTransformInputKernel
+ * -# @ref cpu::CpuWinogradConv2dTransformOutputKernel
+ * -# @ref cpu::CpuGemmAssemblyDispatch
* -# @ref CPPPermute (three times: weights, input and output)
*
* @note Some Winograd configurations (i.e. F(2x2, 5x5), F(4x4, 5x5)) are supported only with enable_fast_math = true
@@ -56,14 +50,35 @@ class NEWinogradConvolutionLayer : public IFunction
public:
/** Constructor */
NEWinogradConvolutionLayer(const std::shared_ptr<IMemoryManager> &memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEWinogradConvolutionLayer(const NEWinogradConvolutionLayer &) = delete;
+ /** Default move constructor */
+ NEWinogradConvolutionLayer(NEWinogradConvolutionLayer &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEWinogradConvolutionLayer &operator=(const NEWinogradConvolutionLayer &) = delete;
+ /** Default move assignment operator */
+ NEWinogradConvolutionLayer &operator=(NEWinogradConvolutionLayer &&) = default;
+ /** Destructor */
+ ~NEWinogradConvolutionLayer();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:--------------|:--------------|:------|:--------------|
+ * |F16 |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |F32 |
+ *
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
* while every optional dimension from 4 and above represent a batch of inputs.
* Data types supported: F16/F32.
* @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: Same as @p input.
- * Currently only 3x3 and 5x5 kernels are supported.
+ * Supported kernel sizes: (height, width) -> 3x3, 1x3, 3x1, 5x5, 1x5, 5x1 for Fp32
+ * -> 3x3 for Fp16
* @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights.
* @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
* Data types supported: Same as @p input.
@@ -72,62 +87,35 @@ public:
* @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
* available which may introduce a drop of accuracy as well. Default is false
*/
- void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info = ActivationLayerInfo(),
- bool enable_fast_math = false);
+ void configure(const ITensor *input,
+ const ITensor *weights,
+ const ITensor *biases,
+ ITensor *output,
+ const PadStrideInfo &conv_info,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ bool enable_fast_math = false);
// Inherited methods overridden:
void run() override;
void prepare() override;
- /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMConvolutionLayer
+ /** Static function to check if given info will lead to a valid configuration of @ref NEWinogradConvolutionLayer
*
- * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: F16/F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
- * Currently only 3x3 and 5x5 kernels are supported.
- * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights.
- * @param[in] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
- * Data types supported: Same as @p input.
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. Currently only unit strides are supported.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
- * available which may introduce a drop of accuracy as well. Default is false
+ * Similar to @ref NEWinogradConvolutionLayer::configure()
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false);
-
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEWinogradConvolutionLayer(const NEWinogradConvolutionLayer &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEWinogradConvolutionLayer &operator=(const NEWinogradConvolutionLayer &) = delete;
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *biases,
+ const ITensorInfo *output,
+ const PadStrideInfo &conv_info,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ bool enable_fast_math = false);
private:
- MemoryGroup _memory_group;
- NEGEMM _gemm_function;
- std::unique_ptr<INEKernel> _transform_input_kernel;
- std::unique_ptr<INEKernel> _transform_output_kernel;
- std::unique_ptr<INEKernel> _transform_weights_kernel;
- NEActivationLayer _activationlayer_function;
-
- CPPPermute _permute_input;
- CPPPermute _permute_weights;
- CPPPermute _permute_output;
- Tensor _input_transformed;
- Tensor _output_transformed;
- Tensor _input_workspace;
- Tensor _output_workspace;
- Tensor _kernel_storage;
- Tensor _input_nhwc;
- Tensor _output_nhwc;
- Tensor _weights_hwio;
- const ITensor *_input;
- const ITensor *_weights;
- ITensor *_output;
- bool _is_prepared;
- bool _is_activationlayer_enabled;
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEWINOGRADCONVOLUTIONLAYER_H */
+#endif // ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEWINOGRADCONVOLUTIONLAYER_H
diff --git a/arm_compute/runtime/NEON/functions/NEYOLOLayer.h b/arm_compute/runtime/NEON/functions/NEYOLOLayer.h
deleted file mode 100644
index 5e0c34b9b1..0000000000
--- a/arm_compute/runtime/NEON/functions/NEYOLOLayer.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEYOLOLAYER_H
-#define ARM_COMPUTE_NEYOLOLAYER_H
-
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
-#include "arm_compute/core/NEON/kernels/NEYOLOLayerKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to run @ref NEYOLOLayerKernel */
-class NEYOLOLayer : public INESimpleFunctionNoBorder
-{
-public:
- /** Set the input and output tensor.
- *
- * @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place
- *
- * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
- * of the activation function. Data types supported: F16/F32.
- * @param[out] output Destination tensor. Data type supported: same as @p input
- * @param[in] act_info Activation layer parameters.
- * @param[in] num_classes Number of classes to activate (must be submultiple of @p input channels)
- */
- void configure(ITensor *input, ITensor *output, const ActivationLayerInfo &act_info, int32_t num_classes);
- /** Static function to check if given info will lead to a valid configuration of @ref NEYOLOLayer
- *
- * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result
- * of the activation function. Data types supported: F16/F32.
- * @param[in] output Destination tensor info. Data type supported: same as @p input
- * @param[in] act_info Activation layer information.
- * @param[in] num_classes Number of classes to activate (must be submultiple of @p input channels)
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info, int32_t num_classes);
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEYOLOLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h b/arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h
deleted file mode 100644
index f16bb46d35..0000000000
--- a/arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEDEPTHWISECONVOLUTIONASSEMBLYDISPATCH_H
-#define ARM_COMPUTE_NEDEPTHWISECONVOLUTIONASSEMBLYDISPATCH_H
-
-#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/Tensor.h"
-
-namespace arm_compute
-{
-/** Depthwise convolution assembly kernel glue */
-class NEDepthwiseConvolutionAssemblyDispatch : public IFunction
-{
-public:
- /** Default constructor
- *
- * @param[in,out] memory_manager Memory manager to use
- */
- NEDepthwiseConvolutionAssemblyDispatch(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDepthwiseConvolutionAssemblyDispatch(const NEDepthwiseConvolutionAssemblyDispatch &) = delete;
- /** Default move constructor */
- NEDepthwiseConvolutionAssemblyDispatch(NEDepthwiseConvolutionAssemblyDispatch &&) = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDepthwiseConvolutionAssemblyDispatch &operator=(const NEDepthwiseConvolutionAssemblyDispatch &) = delete;
- /** Default move assignment operator */
- NEDepthwiseConvolutionAssemblyDispatch &operator=(NEDepthwiseConvolutionAssemblyDispatch &&) = default;
- /** Default destructor */
- ~NEDepthwiseConvolutionAssemblyDispatch();
- /** Initialize the function's source, destination, kernels and border_size.
- *
- * @note Supports only NHWC format
- *
- * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
- * @param[in] weights Weights tensor. These are 3D tensors with shape [W, H, IFM]. Data type supported: Same as @p input.
- * @param[in] bias (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input.
- * @param[out] output Destination tensor. Data type supported: same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- */
- void configure(const ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output,
- const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(),
- const Size2D &dilation = Size2D(1, 1));
- /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionAssemblyDispatch
- *
- * @note Supports only NHWC format
- *
- * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
- * @param[in] weights Weights tensor. These are 3D tensors with shape [W, H, IFM]. Data type supported: Same as @p input.
- * @param[in] bias (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input.
- * @param[out] output Destination tensor. Data type supported: same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- *
- * @return An error status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output,
- const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(),
- const Size2D &dilation = Size2D(1, 1));
- /** Check if the optimized kernel can be used for the given kernel sizes and strides
- *
- * @warning Even if this return true the inputs and outputs might need to get permuted as the only layout supported is NHWC
- *
- * @param[in] input Input tensor info.
- * @param[in] weights Weights tensor info.
- * @param[in] conv_info Convolution layer metadata.
- * @param[in] depth_multiplier (Optional) Depth multiplier to be used.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- *
- * @return True if the assembly kernel could be used else false. Note that transformations of input/output could be needed.
- */
- static bool is_optimized_supported(const ITensorInfo *input, const ITensorInfo *weights, PadStrideInfo conv_info, unsigned int depth_multiplier = 1, const Size2D &dilation = Size2D(1, 1));
-
- // Inherited methods overridden:
- void run() override;
- void prepare() override;
-
-private:
- struct LocalImpl;
-
-private:
- MemoryGroup _memory_group;
- const ITensor *_input;
- const ITensor *_weights;
- const ITensor *_bias;
- ITensor *_output;
- Tensor _packed_weights;
- Tensor _workspace;
- bool _is_prepared;
- std::unique_ptr<LocalImpl> _pImpl;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEDEPTHWISECONVOLUTIONASSEMBLYDISPATCH_H */