aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/runtime/NEON/functions
diff options
context:
space:
mode:
Diffstat (limited to 'arm_compute/runtime/NEON/functions')
-rw-r--r--arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h65
-rw-r--r--arm_compute/runtime/NEON/functions/NEAccumulate.h122
-rw-r--r--arm_compute/runtime/NEON/functions/NEActivationLayer.h63
-rw-r--r--arm_compute/runtime/NEON/functions/NEAddMulAdd.h115
-rw-r--r--arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h24
-rw-r--r--arm_compute/runtime/NEON/functions/NEArithmeticAddition.h104
-rw-r--r--arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h97
-rw-r--r--arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h34
-rw-r--r--arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h33
-rw-r--r--arm_compute/runtime/NEON/functions/NEBitwiseAnd.h10
-rw-r--r--arm_compute/runtime/NEON/functions/NEBitwiseNot.h10
-rw-r--r--arm_compute/runtime/NEON/functions/NEBitwiseOr.h10
-rw-r--r--arm_compute/runtime/NEON/functions/NEBitwiseXor.h10
-rw-r--r--arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h27
-rw-r--r--arm_compute/runtime/NEON/functions/NEBox3x3.h60
-rw-r--r--arm_compute/runtime/NEON/functions/NECannyEdge.h106
-rw-r--r--arm_compute/runtime/NEON/functions/NECast.h55
-rw-r--r--arm_compute/runtime/NEON/functions/NEChannelCombine.h62
-rw-r--r--arm_compute/runtime/NEON/functions/NEChannelExtract.h60
-rw-r--r--arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h11
-rw-r--r--arm_compute/runtime/NEON/functions/NECol2Im.h61
-rw-r--r--arm_compute/runtime/NEON/functions/NEColorConvert.h71
-rw-r--r--arm_compute/runtime/NEON/functions/NEComputeAllAnchors.h63
-rw-r--r--arm_compute/runtime/NEON/functions/NEConcatenateLayer.h93
-rw-r--r--arm_compute/runtime/NEON/functions/NEConv3D.h100
-rw-r--r--arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h71
-rw-r--r--arm_compute/runtime/NEON/functions/NEConvolution.h178
-rw-r--r--arm_compute/runtime/NEON/functions/NEConvolutionLayer.h100
-rw-r--r--arm_compute/runtime/NEON/functions/NECopy.h41
-rw-r--r--arm_compute/runtime/NEON/functions/NECropResize.h29
-rw-r--r--arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h91
-rw-r--r--arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h60
-rw-r--r--arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h33
-rw-r--r--arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h155
-rw-r--r--arm_compute/runtime/NEON/functions/NEDequantizationLayer.h42
-rw-r--r--arm_compute/runtime/NEON/functions/NEDerivative.h82
-rw-r--r--arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h34
-rw-r--r--arm_compute/runtime/NEON/functions/NEDilate.h57
-rw-r--r--arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h55
-rw-r--r--arm_compute/runtime/NEON/functions/NEElementwiseOperations.h356
-rw-r--r--arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h167
-rw-r--r--arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h89
-rw-r--r--arm_compute/runtime/NEON/functions/NEErode.h57
-rw-r--r--arm_compute/runtime/NEON/functions/NEFFT1D.h15
-rw-r--r--arm_compute/runtime/NEON/functions/NEFFT2D.h15
-rw-r--r--arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h71
-rw-r--r--arm_compute/runtime/NEON/functions/NEFastCorners.h96
-rw-r--r--arm_compute/runtime/NEON/functions/NEFill.h37
-rw-r--r--arm_compute/runtime/NEON/functions/NEFillBorder.h17
-rw-r--r--arm_compute/runtime/NEON/functions/NEFlattenLayer.h34
-rw-r--r--arm_compute/runtime/NEON/functions/NEFloor.h40
-rw-r--r--arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h179
-rw-r--r--arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h39
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMM.h104
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h125
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMConv2d.h45
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h366
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h49
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h123
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h258
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h72
-rw-r--r--arm_compute/runtime/NEON/functions/NEGather.h21
-rw-r--r--arm_compute/runtime/NEON/functions/NEGaussian3x3.h57
-rw-r--r--arm_compute/runtime/NEON/functions/NEGaussian5x5.h88
-rw-r--r--arm_compute/runtime/NEON/functions/NEGaussianPyramid.h154
-rw-r--r--arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h57
-rw-r--r--arm_compute/runtime/NEON/functions/NEHOGDescriptor.h90
-rw-r--r--arm_compute/runtime/NEON/functions/NEHOGDetector.h73
-rw-r--r--arm_compute/runtime/NEON/functions/NEHOGGradient.h88
-rw-r--r--arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h119
-rw-r--r--arm_compute/runtime/NEON/functions/NEHarrisCorners.h119
-rw-r--r--arm_compute/runtime/NEON/functions/NEHistogram.h80
-rw-r--r--arm_compute/runtime/NEON/functions/NEIm2Col.h97
-rw-r--r--arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayer.h20
-rw-r--r--arm_compute/runtime/NEON/functions/NEIntegralImage.h61
-rw-r--r--arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h14
-rw-r--r--arm_compute/runtime/NEON/functions/NELSTMLayer.h69
-rw-r--r--arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h107
-rw-r--r--arm_compute/runtime/NEON/functions/NELaplacianPyramid.h98
-rw-r--r--arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h104
-rw-r--r--arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h111
-rw-r--r--arm_compute/runtime/NEON/functions/NELogical.h54
-rw-r--r--arm_compute/runtime/NEON/functions/NEMagnitude.h64
-rw-r--r--arm_compute/runtime/NEON/functions/NEMatMul.h145
-rw-r--r--arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h35
-rw-r--r--arm_compute/runtime/NEON/functions/NEMeanStdDev.h77
-rw-r--r--arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h12
-rw-r--r--arm_compute/runtime/NEON/functions/NEMedian3x3.h58
-rw-r--r--arm_compute/runtime/NEON/functions/NEMinMaxLocation.h86
-rw-r--r--arm_compute/runtime/NEON/functions/NENonLinearFilter.h64
-rw-r--r--arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h58
-rw-r--r--arm_compute/runtime/NEON/functions/NENormalizationLayer.h30
-rw-r--r--arm_compute/runtime/NEON/functions/NEOpticalFlow.h108
-rw-r--r--arm_compute/runtime/NEON/functions/NEPReluLayer.h46
-rw-r--r--arm_compute/runtime/NEON/functions/NEPadLayer.h40
-rw-r--r--arm_compute/runtime/NEON/functions/NEPermute.h41
-rw-r--r--arm_compute/runtime/NEON/functions/NEPhase.h53
-rw-r--r--arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h162
-rw-r--r--arm_compute/runtime/NEON/functions/NEPooling3dLayer.h (renamed from arm_compute/runtime/NEON/functions/NEUpsampleLayer.h)77
-rw-r--r--arm_compute/runtime/NEON/functions/NEPoolingLayer.h43
-rw-r--r--arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h16
-rw-r--r--arm_compute/runtime/NEON/functions/NEQLSTMLayer.h381
-rw-r--r--arm_compute/runtime/NEON/functions/NEQuantizationLayer.h46
-rw-r--r--arm_compute/runtime/NEON/functions/NERNNLayer.h49
-rw-r--r--arm_compute/runtime/NEON/functions/NEROIAlignLayer.h29
-rw-r--r--arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h44
-rw-r--r--arm_compute/runtime/NEON/functions/NERange.h18
-rw-r--r--arm_compute/runtime/NEON/functions/NEReduceMean.h26
-rw-r--r--arm_compute/runtime/NEON/functions/NEReductionOperation.h28
-rw-r--r--arm_compute/runtime/NEON/functions/NERemap.h63
-rw-r--r--arm_compute/runtime/NEON/functions/NEReorderLayer.h94
-rw-r--r--arm_compute/runtime/NEON/functions/NEReorgLayer.h11
-rw-r--r--arm_compute/runtime/NEON/functions/NEReshapeLayer.h48
-rw-r--r--arm_compute/runtime/NEON/functions/NEReverse.h46
-rw-r--r--arm_compute/runtime/NEON/functions/NEScale.h56
-rw-r--r--arm_compute/runtime/NEON/functions/NEScharr3x3.h61
-rw-r--r--arm_compute/runtime/NEON/functions/NESelect.h10
-rw-r--r--arm_compute/runtime/NEON/functions/NESlice.h78
-rw-r--r--arm_compute/runtime/NEON/functions/NESobel3x3.h61
-rw-r--r--arm_compute/runtime/NEON/functions/NESobel5x5.h92
-rw-r--r--arm_compute/runtime/NEON/functions/NESobel7x7.h92
-rw-r--r--arm_compute/runtime/NEON/functions/NESoftmaxLayer.h59
-rw-r--r--arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h40
-rw-r--r--arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h16
-rw-r--r--arm_compute/runtime/NEON/functions/NESplit.h15
-rw-r--r--arm_compute/runtime/NEON/functions/NEStackLayer.h21
-rw-r--r--arm_compute/runtime/NEON/functions/NEStridedSlice.h112
-rw-r--r--arm_compute/runtime/NEON/functions/NETableLookup.h47
-rw-r--r--arm_compute/runtime/NEON/functions/NEThreshold.h78
-rw-r--r--arm_compute/runtime/NEON/functions/NETile.h13
-rw-r--r--arm_compute/runtime/NEON/functions/NETranspose.h43
-rw-r--r--arm_compute/runtime/NEON/functions/NEUnstack.h11
-rw-r--r--arm_compute/runtime/NEON/functions/NEWarpAffine.h57
-rw-r--r--arm_compute/runtime/NEON/functions/NEWarpPerspective.h56
-rw-r--r--arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h120
-rw-r--r--arm_compute/runtime/NEON/functions/NEYOLOLayer.h64
-rw-r--r--arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h123
137 files changed, 3263 insertions, 6689 deletions
diff --git a/arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h b/arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h
deleted file mode 100644
index f00b144475..0000000000
--- a/arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEABSOLUTEDIFFERENCE_H
-#define ARM_COMPUTE_NEABSOLUTEDIFFERENCE_H
-
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to run @ref NEAbsoluteDifferenceKernel
- *
- * @note The image data type for the inputs must be U8 or S16
- * @note The function calculates the absolute difference also when the 2 inputs have different image data types
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class NEAbsoluteDifference : public INESimpleFunctionNoBorder
-{
-public:
- /** Default constructor */
- NEAbsoluteDifference() = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEAbsoluteDifference(const NEAbsoluteDifference &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEAbsoluteDifference &operator=(const NEAbsoluteDifference &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEAbsoluteDifference(NEAbsoluteDifference &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEAbsoluteDifference &operator=(NEAbsoluteDifference &&) = delete;
- /** Default destructor */
- ~NEAbsoluteDifference();
- /** Set the inputs and output images
- *
- * @param[in] input1 Source tensor. Data types supported: U8/S16.
- * @param[in] input2 Source tensor. Data types supported: U8/S16.
- * @param[out] output Destination tensor. Data types supported: U8/S16.
- */
- void configure(const ITensor *input1, const ITensor *input2, ITensor *output);
-};
-}
-#endif /* ARM_COMPUTE_NEABSOLUTEDIFFERENCE_H */
diff --git a/arm_compute/runtime/NEON/functions/NEAccumulate.h b/arm_compute/runtime/NEON/functions/NEAccumulate.h
deleted file mode 100644
index 1881411880..0000000000
--- a/arm_compute/runtime/NEON/functions/NEAccumulate.h
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEACCUMULATE_H
-#define ARM_COMPUTE_NEACCUMULATE_H
-
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to run @ref NEAccumulateKernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
-*/
-class NEAccumulate : public INESimpleFunctionNoBorder
-{
-public:
- /** Default constructor */
- NEAccumulate() = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEAccumulate(const NEAccumulate &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEAccumulate &operator=(const NEAccumulate &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEAccumulate(NEAccumulate &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEAccumulate &operator=(NEAccumulate &&) = delete;
- /** Default destructor */
- ~NEAccumulate();
- /** Set the input and accumulation tensors
- *
- * @param[in] input Source tensor. Data type supported: U8.
- * @param[out] output Destination tensor. Data type supported: S16.
- */
- void configure(const ITensor *input, ITensor *output);
-};
-
-/** Basic function to run @ref NEAccumulateWeightedKernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
-*/
-class NEAccumulateWeighted : public INESimpleFunctionNoBorder
-{
-public:
- /** Default constructor */
- NEAccumulateWeighted() = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEAccumulateWeighted(const NEAccumulateWeighted &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEAccumulateWeighted &operator=(const NEAccumulateWeighted &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEAccumulateWeighted(NEAccumulateWeighted &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEAccumulateWeighted &operator=(NEAccumulateWeighted &&) = delete;
- /** Default destructor */
- ~NEAccumulateWeighted();
- /** Set the input and accumulation tensors, and the scale value
- *
- * @param[in] input Source tensor. Data type supported: U8.
- * @param[in] alpha The input scalar value with a value input the range of [0, 1.0]
- * @param[in,out] output Accumulated tensor. Data type supported: U8.
- * @param[in] use_fp16 (Optional) If true the FP16 kernels will be used. If false F32 kernels are used.
- */
- void configure(const ITensor *input, float alpha, ITensor *output, bool use_fp16 = false);
-};
-
-/** Basic function to run @ref NEAccumulateSquaredKernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
-*/
-class NEAccumulateSquared : public INESimpleFunctionNoBorder
-{
-public:
- /** Default constructor */
- NEAccumulateSquared() = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEAccumulateSquared(const NEAccumulateSquared &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEAccumulateSquared &operator=(const NEAccumulateSquared &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEAccumulateSquared(NEAccumulateSquared &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEAccumulateSquared &operator=(NEAccumulateSquared &&) = delete;
- /** Default destructor */
- ~NEAccumulateSquared();
- /** Set the input and accumulation tensors and the shift value.
- *
- * @param[in] input Source tensor. Data type supported: U8.
- * @param[in] shift The input with a value input the range of [0, 15]
- * @param[in,out] output Accumulated tensor. Data type supported: S16.
- */
- void configure(const ITensor *input, uint32_t shift, ITensor *output);
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEACCUMULATE_H */
diff --git a/arm_compute/runtime/NEON/functions/NEActivationLayer.h b/arm_compute/runtime/NEON/functions/NEActivationLayer.h
index 3f410fcd8c..5584fdc783 100644
--- a/arm_compute/runtime/NEON/functions/NEActivationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEActivationLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,17 +24,20 @@
#ifndef ARM_COMPUTE_NEACTIVATIONLAYER_H
#define ARM_COMPUTE_NEACTIVATIONLAYER_H
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INEOperator.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IRuntimeContext.h"
+
+#include <memory>
namespace arm_compute
{
// Forward declarations
class ITensor;
+class ITensorInfo;
-/** Basic function to run @ref NEActivationLayerKernel
+/** Basic function to run @ref cpu::kernels::CpuActivationKernel
*
* @note The function simulates an activation layer with the specified activation function.
*/
@@ -59,6 +62,18 @@ public:
/** [NEActivationLayer snippet] **/
/** Set the input and output tensor.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |QSYMM16 |QSYMM16 |
+ * |F16 |F16 |
+ * |F32 |F32 |
+ *
* @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place
*
* @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
@@ -86,43 +101,5 @@ private:
struct Impl;
std::unique_ptr<Impl> _impl;
};
-
-namespace experimental
-{
-/** Basic function to run @ref NEActivationLayerKernel */
-class NEActivationLayer : public INEOperator
-{
-public:
- /** Constructor */
- NEActivationLayer() = default;
- /** Prevent instances of this class from being copied */
- NEActivationLayer(const NEActivationLayer &) = delete;
- /** Default move constructor */
- NEActivationLayer(NEActivationLayer &&) = default;
- /** Prevent instances of this class from being copied */
- NEActivationLayer &operator=(const NEActivationLayer &) = delete;
- /** Default move assignment operator */
- NEActivationLayer &operator=(NEActivationLayer &&) = default;
- /** Destructor */
- ~NEActivationLayer();
-
- /** Set the input and output tensor.
- *
- * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32.
- * @param[out] output Destination tensor info. Data type supported: same as @p input
- * @param[in] activation_info Activation layer parameters.
- */
- void configure(const ITensorInfo *input, ITensorInfo *output, const ActivationLayerInfo &activation_info);
- /** Static function to check if given info will lead to a valid configuration of @ref NEActivationLayer
- *
- * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32.
- * @param[in] output Destination tensor info. Data type supported: same as @p input
- * @param[in] act_info Activation layer information.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info);
-};
-} // namespace experimental
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEACTIVATIONLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEAddMulAdd.h b/arm_compute/runtime/NEON/functions/NEAddMulAdd.h
new file mode 100644
index 0000000000..6c65c055dd
--- /dev/null
+++ b/arm_compute/runtime/NEON/functions/NEAddMulAdd.h
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEADDMULADD
+#define ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEADDMULADD
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IMemoryManager.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+class ITensor;
+class ITensorInfo;
+class ActivationLayerInfo;
+
+/** Function to compute Add+Mul+Add fused operation */
+class NEAddMulAdd : public IFunction
+{
+public:
+ /** Constructor */
+ NEAddMulAdd(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEAddMulAdd(const NEAddMulAdd &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEAddMulAdd(NEAddMulAdd &&) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEAddMulAdd &operator=(const NEAddMulAdd &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEAddMulAdd &operator=(NEAddMulAdd &&) = delete;
+ /** Destructor */
+ ~NEAddMulAdd();
+ /** Initialize the function's inputs and outputs.
+ *
+ * Valid data layouts:
+ * - Any
+ *
+ * Valid data type configurations:
+ * |input1 |input2 |bn_mul |bn_add |add_output |final_output |
+ * |:--------------|:--------------|:--------------|:--------------|:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |QASYMM8 |QASYMM8 |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |F16 |F16 |F16 |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |F32 |F32 |F32 |
+ *
+ * This is what this composite function (tailored for add followed by a batch norm operation) does:
+ * add_output <- input1 + input2 (add)
+ * final_output <- add_output * bn_mul + bn_add (batch norm = mul+add)
+ *
+ * @param[in] input1 First tensor input. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] input2 Second tensor input. Data types supported: Same as @p input.
+ * @param[in] bn_mul The multiplication coefficient on the feature dimension. Data types supported: Same as @p input.
+ * It's one dimensional tensor with size equal to the feature maps [FM]
+ * @param[in] bn_add The addition coefficient on the feature dimension. Data types supported: Same as @p input.
+ * It's one dimensional tensor with size equal to the feature maps [FM]
+ * @param[out] add_output Output of the first add. Data type supported: Same as @p input.
+ * @param[out] final_output Output of the add+mul+add+act composite operation. Data type supported: Same as @p input.
+ * @param[in] policy Policy to handle overflow
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ *
+ */
+ void configure(ITensor *input1,
+ ITensor *input2,
+ ITensor *bn_mul,
+ ITensor *bn_add,
+ ITensor *add_output,
+ ITensor *final_output,
+ ConvertPolicy policy,
+ const ActivationLayerInfo &act_info);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEAddMulAdd
+ *
+ * Similar to @ref NEAddMulAdd::configure() except the arguments are @ref ITensorInfo * instead of @ref ITensor *
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input1,
+ const ITensorInfo *input2,
+ const ITensorInfo *bn_mul,
+ const ITensorInfo *bn_add,
+ const ITensorInfo *add_output,
+ const ITensorInfo *final_output,
+ ConvertPolicy policy,
+ const ActivationLayerInfo &act_info);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEADDMULADD */
diff --git a/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h b/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h
index 4b13d1f44e..3bb50a0f90 100644
--- a/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,8 +24,6 @@
#ifndef ARM_COMPUTE_NEARGMINMAXLAYER_H
#define ARM_COMPUTE_NEARGMINMAXLAYER_H
-#include "arm_compute/runtime/NEON/functions/NEReductionOperation.h"
-
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/NEON/INESimpleFunction.h"
@@ -33,11 +31,10 @@
namespace arm_compute
{
class ITensor;
-
/** Function to calculate the index of the minimum or maximum values in a
* tensor based on an axis.
*
- * This function calls the following NEON kernels:
+ * This function calls the following kernels:
*
* -# @ref NEReductionOperationKernel
* -# @ref NEFillBorderKernel
@@ -64,6 +61,18 @@ public:
~NEArgMinMaxLayer();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:-------------|
+ * |QASYMM8 |U32, S32 |
+ * |QASYMM8_SIGNED |U32, S32 |
+ * |S32 |U32, S32, S64 |
+ * |F16 |U32, S32 |
+ * |F32 |U32, S32 |
+ *
* @param[in] input Input source tensor. Data types supported: QASYMM8_SIGNED/QASYMM8/S32/F16/F32.
* @param[in] axis Axis to find max/min index.
* @param[out] output Output source tensor. Data types supported: U32/S32.
@@ -74,7 +83,7 @@ public:
*
* @param[in] input Input source tensor info. Data types supported: QASYMM8_SIGNED/QASYMM8/S32/F16/F32.
* @param[in] axis Axis to find max/min index.
- * @param[in] output Output source tensor info. Data types supported: U32/S32.
+ * @param[in] output Output source tensor info. Data types supported: U32/S32/S64.
* @param[in] op Operation to perform: min or max
*
* @return a status
@@ -85,7 +94,8 @@ public:
void run() override;
private:
- std::unique_ptr<NEReductionOperation> _reduction_function;
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEARGMINMAXLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h
index 6aaa5ff4f7..73a43dbc44 100644
--- a/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h
+++ b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,69 +25,17 @@
#define ARM_COMPUTE_NEARITHMETICADDITION_H
#include "arm_compute/core/Types.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/NEON/INEOperator.h"
+
+#include <memory>
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
-namespace experimental
-{
-/** Basic function to run @ref NEArithmeticAdditionKernel */
-class NEArithmeticAddition : public INEOperator
-{
-public:
- /** Constructor */
- NEArithmeticAddition() = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEArithmeticAddition(const NEArithmeticAddition &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEArithmeticAddition &operator=(const NEArithmeticAddition &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEArithmeticAddition(NEArithmeticAddition &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEArithmeticAddition &operator=(NEArithmeticAddition &&) = delete;
- /** Default destructor */
- ~NEArithmeticAddition();
- /** Initialise the kernel's inputs, output and conversion policy.
- *
- * Valid configurations (Input1,Input2) -> Output :
- *
- * - (U8,U8) -> U8
- * - (U8,U8) -> S16
- * - (S16,U8) -> S16
- * - (U8,S16) -> S16
- * - (S16,S16) -> S16
- * - (S32,S32) -> S32
- * - (F16,F16) -> F16
- * - (F32,F32) -> F32
- * - (QASYMM8,QASYMM8) -> QASYMM8
- * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
- * - (QSYMM16,QSYMM16) -> QSYMM16
- *
- * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32
- * @param[in] input2 Second tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32
- * @param[out] output Output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32
- * @param[in] policy Policy to use to handle overflow.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
- */
- void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
- /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticAddition
- *
- * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32
- * @param[in] input2 Second tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32
- * @param[in] output Output tensor info. Data types supported: U8/SQASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32
- * @param[in] policy Policy to use to handle overflow
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
-};
-} // namespace experimental
-
-/** Basic function to run @ref NEArithmeticAdditionKernel */
+/** Basic function to run @ref cpu::kernels::CpuAddKernel */
class NEArithmeticAddition : public IFunction
{
public:
@@ -105,19 +53,21 @@ public:
NEArithmeticAddition &operator=(NEArithmeticAddition &&);
/** Initialise the kernel's inputs, output and conversion policy.
*
- * Valid configurations (Input1,Input2) -> Output :
+ * Valid data layouts:
+ * - All
*
- * - (U8,U8) -> U8
- * - (U8,U8) -> S16
- * - (S16,U8) -> S16
- * - (U8,S16) -> S16
- * - (S16,S16) -> S16
- * - (S32,S32) -> S32
- * - (F16,F16) -> F16
- * - (F32,F32) -> F32
- * - (QASYMM8,QASYMM8) -> QASYMM8
- * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
- * - (QSYMM16,QSYMM16) -> QSYMM16
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |QSYMM16 |QSYMM16 |QASYMM16 |
+ * |QSYMM16 |QSYMM16 |S32 |
+ * |U8 |U8 |U8 |
+ * |S16 |S16 |S16 |
+ * |S32 |S32 |S32 |
+ * |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |
*
* @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32
* @param[in] input2 Second tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32
@@ -125,7 +75,11 @@ public:
* @param[in] policy Policy to use to handle overflow.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
*/
- void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ void configure(const ITensor *input1,
+ const ITensor *input2,
+ ITensor *output,
+ ConvertPolicy policy,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
/** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticAddition
*
* @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32
@@ -136,7 +90,11 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ static Status validate(const ITensorInfo *input1,
+ const ITensorInfo *input2,
+ const ITensorInfo *output,
+ ConvertPolicy policy,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
// Inherited methods overridden:
void run() override;
@@ -146,4 +104,4 @@ private:
std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEARITHMETICADDITION_H */
+#endif /* ARM_COMPUTE_NEARITHMETICADDITION_H */
diff --git a/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h b/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h
index 5d2475b3a4..3e4f6356c5 100644
--- a/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h
+++ b/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,6 +25,7 @@
#define ARM_COMPUTE_NEARITHMETICSUBTRACTION_H
#include "arm_compute/core/Types.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/NEON/INEOperator.h"
@@ -32,75 +33,13 @@ namespace arm_compute
{
class ITensor;
-namespace experimental
-{
-/** Basic function to run @ref NEArithmeticSubtractionKernel
- *
- * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/F16/F32.
- * @note The function performs an arithmetic subtraction between two tensors.
- *
- * This function calls the following kernels:
- * -# @ref NEArithmeticSubtractionKernel
- */
-class NEArithmeticSubtraction : public INEOperator
-{
-public:
- /** Initialise the kernel's inputs, output and conversion policy.
- *
- * Valid configurations (Input1,Input2) -> Output :
- *
- * - (U8,U8) -> U8
- * - (U8,U8) -> S16
- * - (QASYMM8, QASYMM8) -> QASYMM8
- * - (QASYMM8_SIGNED, QASYMM8_SIGNED) -> QASYMM8_SIGNED
- * - (S16,U8) -> S16
- * - (U8,S16) -> S16
- * - (S16,S16) -> S16
- * - (S32,S32) -> S32
- * - (F16,F16) -> F16
- * - (F32,F32) -> F32
- *
- * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32
- * @param[in] input2 Second tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32
- * @param[out] output Output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32
- * @param[in] policy Policy to use to handle overflow. Convert policy cannot be WRAP if datatype is quantized.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
- */
- void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
- /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticSubtraction
- *
- * Valid configurations (Input1,Input2) -> Output :
- *
- * - (U8,U8) -> U8
- * - (U8,U8) -> S16
- * - (QASYMM8, QASYMM8) -> QASYMM8
- * - (QASYMM8_SIGNED, QASYMM8_SIGNED) -> QASYMM8_SIGNED
- * - (S16,U8) -> S16
- * - (U8,S16) -> S16
- * - (S16,S16) -> S16
- * - (S32,S32) -> S32
- * - (F16,F16) -> F16
- * - (F32,F32) -> F32
- *
- * @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/F16/F32
- * @param[in] input2 Second tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/F16/F32
- * @param[in] output Output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/F16/F32
- * @param[in] policy Policy to use to handle overflow. Convert policy cannot be WRAP if datatype is quantized.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
-};
-} // namespace experimental
-
-/** Basic function to run @ref NEArithmeticSubtractionKernel
+/** Basic function to run @ref cpu::kernels::CpuSubKernel
*
* @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/F16/F32.
* @note The function performs an arithmetic subtraction between two tensors.
*
* This function calls the following kernels:
- * -# @ref NEArithmeticSubtractionKernel
+ * -# @ref cpu::kernels::CpuSubKernel
*/
class NEArithmeticSubtraction : public IFunction
{
@@ -119,13 +58,33 @@ public:
NEArithmeticSubtraction &operator=(NEArithmeticSubtraction &&);
/** Initialise the kernel's inputs, output and conversion policy.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |QSYMM16 |QSYMM16 |QASYMM16 |
+ * |QSYMM16 |QSYMM16 |S32 |
+ * |U8 |U8 |U8 |
+ * |S16 |S16 |S16 |
+ * |S32 |S32 |S32 |
+ * |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |
+ *
* @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32
* @param[in] input2 Second tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32
* @param[out] output Output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32
* @param[in] policy Policy to use to handle overflow. Convert policy cannot be WRAP if datatype is quantized.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
*/
- void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ void configure(const ITensor *input1,
+ const ITensor *input2,
+ ITensor *output,
+ ConvertPolicy policy,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
/** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticSubtraction
*
* @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/F16/F32
@@ -136,7 +95,11 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ static Status validate(const ITensorInfo *input1,
+ const ITensorInfo *input2,
+ const ITensorInfo *output,
+ ConvertPolicy policy,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
// Inherited methods overridden:
void run() override;
diff --git a/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h b/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h
index 6d56a267a7..99e2dcadbb 100644
--- a/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -58,6 +58,16 @@ public:
~NEBatchNormalizationLayer();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |F32 |F32 |
+ * |F16 |F16 |
+ *
* @note If the output tensor is a nullptr or is equal to the input, the batch normalization function will be performed in-place
*
* @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result.
@@ -71,7 +81,13 @@ public:
* @param[in] epsilon (Optional) Small value to avoid division with zero. Default value is 0.001f.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
*/
- void configure(ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta = nullptr, const ITensor *gamma = nullptr, float epsilon = 0.001f,
+ void configure(ITensor *input,
+ ITensor *output,
+ const ITensor *mean,
+ const ITensor *var,
+ const ITensor *beta = nullptr,
+ const ITensor *gamma = nullptr,
+ float epsilon = 0.001f,
ActivationLayerInfo act_info = ActivationLayerInfo());
/** Static function to check if given info will lead to a valid configuration of @ref NEBatchNormalizationLayer
*
@@ -88,10 +104,14 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output,
- const ITensorInfo *mean, const ITensorInfo *var,
- const ITensorInfo *beta = nullptr, const ITensorInfo *gamma = nullptr,
- float epsilon = 0.001f, ActivationLayerInfo act_info = ActivationLayerInfo());
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *output,
+ const ITensorInfo *mean,
+ const ITensorInfo *var,
+ const ITensorInfo *beta = nullptr,
+ const ITensorInfo *gamma = nullptr,
+ float epsilon = 0.001f,
+ ActivationLayerInfo act_info = ActivationLayerInfo());
// Inherited methods overridden:
void run() override;
@@ -99,5 +119,5 @@ public:
private:
std::unique_ptr<NEBatchNormalizationLayerKernel> _norm_kernel; /**< Batch normalization layer kernel */
};
-}
+} // namespace arm_compute
#endif /* ARM_COMPUTE_NEBATCHNORMALIZATIONLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h b/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h
index c2fd26d34c..ebed0bea29 100644
--- a/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,9 +24,8 @@
#ifndef ARM_COMPUTE_NEBATCHTOSPACELAYER_H
#define ARM_COMPUTE_NEBATCHTOSPACELAYER_H
-#include "arm_compute/runtime/IFunction.h"
-
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
namespace arm_compute
@@ -52,10 +51,22 @@ public:
~NEBatchToSpaceLayer() = default;
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:---------|:---------|:----------|
+ * |All |s32 |All |
+ *
* @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
* @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32
* @param[out] output Tensor output. Data types supported: same as @p input
+ *
+ * @deprecated This method for dynamic block shape is not fully mature and will be removed in 23.08 release
*/
+ ARM_COMPUTE_DEPRECATED_REL(23.05)
void configure(const ITensor *input, const ITensor *block_shape, ITensor *output);
/** Set the input and output tensors. (Static block shape).
*
@@ -63,8 +74,13 @@ public:
* @param[in] block_shape_x Block shape x value.
* @param[in] block_shape_y Block shape y value.
* @param[out] output Tensor output. Data types supported: same as @p input
+ * @param[in] crop_info Specifies how the output shape is cropped after batch to space is performed
*/
- void configure(const ITensor *input, int32_t block_shape_x, int32_t block_shape_y, ITensor *output);
+ void configure(const ITensor *input,
+ int32_t block_shape_x,
+ int32_t block_shape_y,
+ ITensor *output,
+ const CropInfo &crop_info = CropInfo{});
/** Static function to check if given info will lead to a valid configuration of @ref CLBatchToSpaceLayer
*
* @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All.
@@ -72,7 +88,9 @@ public:
* @param[out] output Tensor output info. Data types supported: same as @p input
*
* @return a status
+ * @deprecated This method for dynamic block shape is not fully mature and will be removed in 23.08 release
*/
+ ARM_COMPUTE_DEPRECATED_REL(23.05)
static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *output);
/** Static function to check if given info will lead to a valid configuration of @ref CLBatchToSpaceLayer (Static block shape).
*
@@ -80,10 +98,15 @@ public:
* @param[in] block_shape_x Block shape x value.
* @param[in] block_shape_y Block shape y value.
* @param[out] output Tensor output info. Data types supported: same as @p input
+ * @param[in] crop_info Specifies how the output shape is cropped after batch to space is performed
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, int32_t block_shape_x, int32_t block_shape_y, const ITensorInfo *output);
+ static Status validate(const ITensorInfo *input,
+ int32_t block_shape_x,
+ int32_t block_shape_y,
+ const ITensorInfo *output,
+ const CropInfo &crop_info = CropInfo{});
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEBATCHTOSPACELAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h b/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h
index 3203d2b9a7..1f95f193d3 100644
--- a/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h
+++ b/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -48,6 +48,14 @@ public:
~NEBitwiseAnd() = default;
/** Initialise the kernel's inputs and output
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |U8 |U8 |
+ *
* @param[in] input1 First tensor input. Data type supported: U8.
* @param[in] input2 Second tensor input. Data type supported: U8.
* @param[out] output Output tensor. Data type supported: U8.
diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseNot.h b/arm_compute/runtime/NEON/functions/NEBitwiseNot.h
index 9fa0d38caf..c66bebf7cc 100644
--- a/arm_compute/runtime/NEON/functions/NEBitwiseNot.h
+++ b/arm_compute/runtime/NEON/functions/NEBitwiseNot.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -36,6 +36,14 @@ class NEBitwiseNot : public INESimpleFunctionNoBorder
public:
/** Initialise the kernel's input and output
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |U8 |U8 |
+ *
* @param[in] input Input tensor. Data type supported: U8.
* @param[out] output Output tensor. Data type supported: U8.
*/
diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseOr.h b/arm_compute/runtime/NEON/functions/NEBitwiseOr.h
index fba6b784de..183df212e4 100644
--- a/arm_compute/runtime/NEON/functions/NEBitwiseOr.h
+++ b/arm_compute/runtime/NEON/functions/NEBitwiseOr.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -36,6 +36,14 @@ class NEBitwiseOr : public INESimpleFunctionNoBorder
public:
/** Initialise the kernel's inputs and output
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |U8 |U8 |
+ *
* @param[in] input1 First tensor input. Data type supported: U8.
* @param[in] input2 Second tensor input. Data type supported: U8.
* @param[out] output Output tensor. Data type supported: U8.
diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseXor.h b/arm_compute/runtime/NEON/functions/NEBitwiseXor.h
index c6cb584284..126aaa6ddd 100644
--- a/arm_compute/runtime/NEON/functions/NEBitwiseXor.h
+++ b/arm_compute/runtime/NEON/functions/NEBitwiseXor.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -36,6 +36,14 @@ class NEBitwiseXor : public INESimpleFunctionNoBorder
public:
/** Initialise the kernel's inputs and output
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |U8 |U8 |
+ *
* @param[in] input1 First tensor input. Data type supported: U8.
* @param[in] input2 Second tensor input. Data type supported: U8.
* @param[out] output Output tensor. Data type supported: U8.
diff --git a/arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h b/arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h
index de8dfef4ed..aa41fc0df2 100644
--- a/arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h
+++ b/arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -32,16 +32,23 @@ namespace arm_compute
class ITensor;
class ITensorInfo;
-/** Basic function to run @ref NEBoundingBoxTransformKernel.
- *
- * This function calls the following Neon kernels:
- * -# @ref NEBoundingBoxTransformKernel
- */
+/** Basic function to run @ref NEBoundingBoxTransformKernel. */
class NEBoundingBoxTransform : public INESimpleFunctionNoBorder
{
public:
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:--------------|:--------------|
+ * |QASYMM16 |QASYMM8 |QASYMM16 |
+ * |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |
+ *
* @param[in] boxes Source tensor. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32.
* @param[out] pred_boxes Destination tensor. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input
* @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes.
@@ -50,7 +57,8 @@ public:
*
* @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct.
*/
- void configure(const ITensor *boxes, ITensor *pred_boxes, const ITensor *deltas, const BoundingBoxTransformInfo &info);
+ void
+ configure(const ITensor *boxes, ITensor *pred_boxes, const ITensor *deltas, const BoundingBoxTransformInfo &info);
/** Static function to check if given info will lead to a valid configuration of @ref NEBoundingBoxTransform
*
@@ -64,7 +72,10 @@ public:
*
* @return a Status
*/
- static Status validate(const ITensorInfo *boxes, const ITensorInfo *pred_boxes, const ITensorInfo *deltas, const BoundingBoxTransformInfo &info);
+ static Status validate(const ITensorInfo *boxes,
+ const ITensorInfo *pred_boxes,
+ const ITensorInfo *deltas,
+ const BoundingBoxTransformInfo &info);
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEBOUNDINGBOXTRANSFORM_H */
diff --git a/arm_compute/runtime/NEON/functions/NEBox3x3.h b/arm_compute/runtime/NEON/functions/NEBox3x3.h
deleted file mode 100644
index d65c2be885..0000000000
--- a/arm_compute/runtime/NEON/functions/NEBox3x3.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEBOX3x3_H
-#define ARM_COMPUTE_NEBOX3x3_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to execute box filter 3x3. This function calls the following NEON kernels:
- *
- * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref NEBox3x3Kernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class NEBox3x3 : public INESimpleFunction
-{
-public:
- /** Initialise the function's input, output and border mode.
- *
- * @note The border handler is run on the input tensor.
- *
- * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination tensor, Data type supported: U8.
- * @param[in] border_mode Strategy to use for borders.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- * @param[in] use_fp16 (Optional) If true the FP16 kernels will be used. If false F32 kernels are used.
- */
- void configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value = 0, bool use_fp16 = false);
-};
-}
-#endif /*ARM_COMPUTE_NEBOX3x3_H */
diff --git a/arm_compute/runtime/NEON/functions/NECannyEdge.h b/arm_compute/runtime/NEON/functions/NECannyEdge.h
deleted file mode 100644
index 7cdb8ee38e..0000000000
--- a/arm_compute/runtime/NEON/functions/NECannyEdge.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NECANNYEDGE_H
-#define ARM_COMPUTE_NECANNYEDGE_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/Tensor.h"
-
-#include <cstdint>
-#include <memory>
-
-namespace arm_compute
-{
-class ITensor;
-class NEGradientKernel;
-class NEFillBorderKernel;
-class NEEdgeNonMaxSuppressionKernel;
-class NEEdgeTraceKernel;
-
-/** Basic function to execute canny edge on NEON. This function calls the following NEON kernels and functions:
- *
- * -# @ref NEFillBorderKernel (if border_mode == REPLICATE or border_mode == CONSTANT)
- * -# @ref NESobel3x3 (if gradient_size == 3) or
- * @ref NESobel5x5 (if gradient_size == 5) or
- * @ref NESobel7x7 (if gradient_size == 7)
- * -# @ref NEGradientKernel
- * -# @ref NEEdgeNonMaxSuppressionKernel
- * -# @ref NEEdgeTraceKernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- *
- */
-class NECannyEdge : public IFunction
-{
-public:
- /** Constructor
- *
- * Initialize Sobel kernel to nullptr.
- *
- * @param[in] memory_manager (Optional) Memory manager.
- */
- NECannyEdge(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NECannyEdge(const NECannyEdge &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NECannyEdge &operator=(const NECannyEdge &) = delete;
- /** Default destructor */
- ~NECannyEdge();
- /** Initialise the function's source, destination, thresholds, gradient size, normalization type and border mode.
- *
- * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination tensor. Data type supported: U8.
- * @param[in] upper_thr Upper threhold used for the hysteresis
- * @param[in] lower_thr Lower threshold used for the hysteresis.
- * @param[in] gradient_size Gradient size (3, 5 or 7)
- * @param[in] norm_type Normalization type. If 1, L1-Norm otherwise L2-Norm
- * @param[in] border_mode Border mode to use for the convolution.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(ITensor *input, ITensor *output, int32_t upper_thr, int32_t lower_thr, int32_t gradient_size, int32_t norm_type, BorderMode border_mode, uint8_t constant_border_value = 0);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- MemoryGroup _memory_group; /**< Function's memory group */
- std::unique_ptr<IFunction> _sobel; /**< Pointer to Sobel kernel */
- std::unique_ptr<NEGradientKernel> _gradient; /**< Gradient kernel */
- std::unique_ptr<NEEdgeNonMaxSuppressionKernel> _non_max_suppr; /**< Non-Maxima suppression kernel */
- std::unique_ptr<NEEdgeTraceKernel> _edge_trace; /**< Edge tracing kernel */
- std::unique_ptr<NEFillBorderKernel> _border_mag_gradient; /**< Fill border on magnitude tensor kernel */
- std::unique_ptr<NEFillBorderKernel> _border_edge_trace; /**< Fill border before edge trace */
- Tensor _gx; /**< Source tensor - Gx component */
- Tensor _gy; /**< Source tensor - Gy component */
- Tensor _magnitude; /**< Source tensor - Magnitude */
- Tensor _phase; /**< Source tensor - Phase */
- Tensor _nonmax; /**< Source tensor - Non-Maxima suppressed */
- ITensor *_output; /**< Output tensor provided by the user. */
-};
-}
-#endif /* ARM_COMPUTE_NECANNYEDGE_H */
diff --git a/arm_compute/runtime/NEON/functions/NECast.h b/arm_compute/runtime/NEON/functions/NECast.h
index e536317660..43cae777f6 100644
--- a/arm_compute/runtime/NEON/functions/NECast.h
+++ b/arm_compute/runtime/NEON/functions/NECast.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,33 +25,51 @@
#define ARM_COMPUTE_NECAST_H
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
namespace arm_compute
{
class ITensor;
class ITensorInfo;
-/** Basic function to run @ref NEDepthConvertLayerKernel.
+/** Basic function to run @ref cpu::kernels::CpuCastKernel.
* This function ignores the scale and zeroPoint of quanized tensors,so QASYMM8 input is treated as uint8 values.
*/
-class NECast : public INESimpleFunctionNoBorder
+class NECast : public IFunction
{
public:
+ /** Constructor */
+ NECast();
+ /** Destructor */
+ ~NECast();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NECast(const NECast &) = delete;
+ /** Default move constructor */
+ NECast(NECast &&);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NECast &operator=(const NECast &) = delete;
+ /** Default move assignment operator */
+ NECast &operator=(NECast &&);
/** Initialize the function's source, destination
*
- * Input data type must be different than output data type.
+ * Valid data layouts:
+ * - All
*
- * Valid conversions Input -> Output :
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:-----------------------------------------------|
+ * |QASYMM8_SIGNED | S16, S32, F32, F16 |
+ * |QASYMM8 | U16, S16, S32, F32, F16 |
+ * |U8 | U16, S16, S32, F32, F16 |
+ * |U16 | U8, U32 |
+ * |S16 | QASYMM8_SIGNED, U8, S32 |
+ * |F16 | QASYMM8_SIGNED, QASYMM8, F32, S32, U8 |
+ * |S32 | QASYMM8_SIGNED, QASYMM8, F16, F32, U8 |
+ * |F32 | QASYMM8_SIGNED, QASYMM8, BFLOAT16, F16, S32, U8|
*
- * - QASYMM8_SIGNED -> S16, S32, F32, F16
- * - QASYMM8 -> U16, S16, S32, F32, F16
- * - U8 -> U16, S16, S32, F32, F16
- * - U16 -> U8, U32
- * - S16 -> QASYMM8_SIGNED, U8, S32
- * - F16 -> QASYMM8_SIGNED, QASYMM8, F32, S32, U8
- * - S32 -> QASYMM8_SIGNED, QASYMM8, F16, F32, U8
- * - F32 -> QASYMM8_SIGNED, QASYMM8, BFLOAT16, F16, S32, U8
+ * Input data type must be different than output data type.
*
* @param[in] input The input tensor to convert. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/F16/S32/F32.
* @param[out] output The output tensor. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/S8/U16/S16/U32/S32/BFLOAT16/F16/F32.
@@ -66,7 +84,14 @@ public:
*
* @return a status
*/
- static Status validate(ITensorInfo *input, ITensorInfo *output, ConvertPolicy policy);
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy);
+
+ // Inherited methods overridden
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_NECAST_H*/
diff --git a/arm_compute/runtime/NEON/functions/NEChannelCombine.h b/arm_compute/runtime/NEON/functions/NEChannelCombine.h
deleted file mode 100644
index c4ead73343..0000000000
--- a/arm_compute/runtime/NEON/functions/NEChannelCombine.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NECHANNELCOMBINE_H
-#define ARM_COMPUTE_NECHANNELCOMBINE_H
-
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
-namespace arm_compute
-{
-class IMultiImage;
-class ITensor;
-using IImage = ITensor;
-
-/**Basic function to run @ref NEChannelCombineKernel to perform channel combination.
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
-*/
-class NEChannelCombine : public INESimpleFunctionNoBorder
-{
-public:
- /** Initialize function's inputs and outputs.
- *
- * @param[in] plane0 The 2D plane that forms channel 0. Data type supported: U8
- * @param[in] plane1 The 2D plane that forms channel 1. Data type supported: U8
- * @param[in] plane2 The 2D plane that forms channel 2. Data type supported: U8
- * @param[in] plane3 The 2D plane that forms channel 3. Data type supported: U8
- * @param[out] output The single planar output tensor. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422
- */
- void configure(const ITensor *plane0, const ITensor *plane1, const ITensor *plane2, const ITensor *plane3, ITensor *output);
- /** Initialize function's inputs and outputs.
- *
- * @param[in] plane0 The 2D plane that forms channel 0. Data type supported: U8
- * @param[in] plane1 The 2D plane that forms channel 1. Data type supported: U8
- * @param[in] plane2 The 2D plane that forms channel 2. Data type supported: U8
- * @param[out] output The multi planar output image. Formats supported: NV12/NV21/IYUV/YUV444
- */
- void configure(const IImage *plane0, const IImage *plane1, const IImage *plane2, IMultiImage *output);
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NECHANNELCOMBINE_H*/
diff --git a/arm_compute/runtime/NEON/functions/NEChannelExtract.h b/arm_compute/runtime/NEON/functions/NEChannelExtract.h
deleted file mode 100644
index 99522d2d74..0000000000
--- a/arm_compute/runtime/NEON/functions/NEChannelExtract.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NECHANNELEXTRACT_H
-#define ARM_COMPUTE_NECHANNELEXTRACT_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
-namespace arm_compute
-{
-class IMultiImage;
-class ITensor;
-using IImage = ITensor;
-
-/**Basic function to run @ref NEChannelExtractKernel to perform channel extraction.
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
-*/
-class NEChannelExtract : public INESimpleFunctionNoBorder
-{
-public:
- /** Initialize the function's source, destination
- *
- * @param[in] input The input tensor to extract the channel from. Formats supported: Formats supported: RGB888/RGBA8888/YUYV422/UYVY422
- * @param[in] channel The channel to extract.
- * @param[out] output The extracted channel. Format supported: U8
- */
- void configure(const ITensor *input, Channel channel, ITensor *output);
- /** Initialize the function's source, destination
- *
- * @param[in] input The multi-planar input image to extract channel from. Formats supported: NV12/NV21/IYUV/YUV444
- * @param[in] channel The channel to extract.
- * @param[out] output The extracted channel. Format supported: U8
- */
- void configure(const IMultiImage *input, Channel channel, IImage *output);
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NECHANNELEXTRACT_H*/
diff --git a/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h b/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h
index aa11396c20..bc19e1a4af 100644
--- a/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -44,6 +44,15 @@ class NEChannelShuffleLayer : public INESimpleFunctionNoBorder
public:
/** Initialize the function
*
+ * Valid data layouts:
+ * - NCHW
+ * - NHWC
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |All |All |
+ *
* @param[in] input Input tensor. Data types supported: All
* @param[out] output Output tensor. Data type supported: Same as @p input
* @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups.
diff --git a/arm_compute/runtime/NEON/functions/NECol2Im.h b/arm_compute/runtime/NEON/functions/NECol2Im.h
deleted file mode 100644
index 69459a83c1..0000000000
--- a/arm_compute/runtime/NEON/functions/NECol2Im.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NECOL2IM_H
-#define ARM_COMPUTE_NECOL2IM_H
-
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Size2D.h"
-
-namespace arm_compute
-{
-class ITensor;
-class ITensorInfo;
-
-/** Basic function to run @ref NECol2Im */
-class NECol2Im : public INESimpleFunctionNoBorder
-{
-public:
- /** Configure the col2im NEON kernel
- *
- * @param[in] input The input tensor to convert. Data types supported: All
- * @param[out] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM],
- * while the rest represent batch of outputs. Data types supported: Same as @p input
- * @param[in] convolved_dims Output convolved dimensions.
- */
- void configure(const ITensor *input, ITensor *output, const Size2D &convolved_dims);
- /** Static function to check if given info will lead to a valid configuration of @ref NECol2Im
- *
- * @param[in] input The input tensor to convert. Data types supported: All
- * @param[in] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM],
- * while the rest represent batch of outputs. Data types supported: Same as @p input
- * @param[in] convolved_dims Output convolved dimensions.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &convolved_dims);
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NECOL2IM_H */
diff --git a/arm_compute/runtime/NEON/functions/NEColorConvert.h b/arm_compute/runtime/NEON/functions/NEColorConvert.h
deleted file mode 100644
index 8974aa63a1..0000000000
--- a/arm_compute/runtime/NEON/functions/NEColorConvert.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NECOLORCONVERT_H
-#define ARM_COMPUTE_NECOLORCONVERT_H
-
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
-namespace arm_compute
-{
-class ITensor;
-class IMultiImage;
-using IImage = ITensor;
-
-/**Basic function to run @ref NEColorConvertKernel to perform color conversion
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class NEColorConvert : public INESimpleFunctionNoBorder
-{
-public:
- /** Initialize the function's source, destination
- *
- * @param[in] input Source tensor. Formats supported: RGBA8888/UYVY422/YUYV422/RGB888
- * @param[out] output Destination tensor. Formats supported: RGB888 (if the formats of @p input are RGBA8888/UYVY422/YUYV422),
- * RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/),
- * U8 (if the formats of @p input is RGB888)
- */
- void configure(const ITensor *input, ITensor *output);
- /** Initialize the function's source, destination
- *
- * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV
- * @param[out] output Single-planar destination image. Formats supported: RGB888/RGBA8888
- */
- void configure(const IMultiImage *input, IImage *output);
- /** Initialize the function's source, destination
- *
- * @param[in] input Single-planar source image. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422
- * @param[out] output Multi-planar destination image. Formats supported: NV12/IYUV/YUV444 (if the formats of @p input are RGB888/RGB8888)
- */
- void configure(const IImage *input, IMultiImage *output);
- /** Initialize the function's source, destination
- *
- * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV
- * @param[out] output Multi-planar destination image. Formats supported: YUV444/IYUV (if the formats of @p input are NV12/NV21)/NV12 (if the format of @p input is IYUV)
- */
- void configure(const IMultiImage *input, IMultiImage *output);
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NECOLORCONVERT_H*/
diff --git a/arm_compute/runtime/NEON/functions/NEComputeAllAnchors.h b/arm_compute/runtime/NEON/functions/NEComputeAllAnchors.h
deleted file mode 100644
index b63243fec6..0000000000
--- a/arm_compute/runtime/NEON/functions/NEComputeAllAnchors.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NECOMPUTEALLANCHORS_H
-#define ARM_COMPUTE_NECOMPUTEALLANCHORS_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
-namespace arm_compute
-{
-class ITensor;
-class ITensorInfo;
-
-/** Basic function to run @ref NEComputeAllAnchorsKernel.
- *
- * This function calls the following NEON kernels:
- * -# @ref NEComputeAllAnchorsKernel
- */
-class NEComputeAllAnchors : public INESimpleFunctionNoBorder
-{
-public:
- /** Set the input and output tensors.
- *
- * @param[in] anchors Source tensor. Original set of anchors of size (4, A) where A is the number of anchors. Data types supported: F16/F32
- * @param[out] all_anchors Destination tensor. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input
- * @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo
- *
- */
- void configure(const ITensor *anchors, ITensor *all_anchors, const ComputeAnchorsInfo &info);
-
- /** Static function to check if given info will lead to a valid configuration of @ref NEComputeAllAnchorsKernel
- *
- * @param[in] anchors Source tensor info. Original set of anchors of size (4, A) where A is the number of anchors. Data types supported: F16/F32
- * @param[in] all_anchors Destination tensor info. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input
- * @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo
- *
- * @return a Status
- */
- static Status validate(const ITensorInfo *anchors, const ITensorInfo *all_anchors, const ComputeAnchorsInfo &info);
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NECOMPUTEALLANCHORS_H */
diff --git a/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h b/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h
index fd35d0bc46..1600f85488 100644
--- a/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,14 +24,10 @@
#ifndef ARM_COMPUTE_NECONCATENATELAYER_H
#define ARM_COMPUTE_NECONCATENATELAYER_H
-#include "arm_compute/runtime/IFunction.h"
-
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INEOperator.h"
-#include "support/Requires.h"
+#include "arm_compute/runtime/IFunction.h"
#include <memory>
-#include <vector>
namespace arm_compute
{
@@ -40,13 +36,7 @@ class ITensor;
class ITensorInfo;
class Status;
-/** Basic function to execute concatenate tensors along a given axis. This function calls the following kernels:
- *
- * -# @ref NEWidthConcatenateLayerKernel (if underlying concatenation axis is 0).
- * -# @ref NEHeightConcatenateLayerKernel (if underlying concatenation axis is 1).
- * -# @ref NEDepthConcatenateLayerKernel (if underlying concatenation axis is 2).
- * -# @ref NEBatchConcatenateLayerKernel (if underlying concatenation axis is 3).
- */
+/** Basic function to execute concatenate tensors along a given axis */
class NEConcatenateLayer : public IFunction
{
public:
@@ -64,8 +54,20 @@ public:
NEConcatenateLayer &operator=(NEConcatenateLayer &&);
/** Initialise the kernel's inputs vector and output.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |F16 |F16 |
+ * |F32 |F32 |
+ *
* @note Input and output tensor dimensions preconditions defer depending on the concatenation axis.
- * @note Preconditions can be found respectively at @ref NEWidthConcatenateLayerKernel, @ref NEHeightConcatenateLayerKernel and @ref NEDepthConcatenateLayerKernel.
+ * @note Preconditions can be found respectively at @ref cpu::kernels::CpuConcatenateWidthKernel, @ref cpu::kernels::CpuConcatenateHeightKernel,
+ * @ref cpu::kernels::CpuConcatenateDepthKernel and @ref cpu::kernels::CpuConcatenateBatchKernel.
*
* @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
* @param[out] output Output tensor. Data types supported: Same as @p input.
@@ -75,7 +77,8 @@ public:
/** Static function to check if given info will lead to a valid configuration of @ref NEConcatenateLayer
*
* @note Input and output tensor dimensions preconditions defer depending on the concatenation axis.
- * @note Preconditions can be found respectively at @ref NEWidthConcatenateLayerKernel, @ref NEHeightConcatenateLayerKernel and @ref NEDepthConcatenateLayerKernel.
+ * @note Preconditions can be found respectively at @ref cpu::kernels::CpuConcatenateWidthKernel, @ref cpu::kernels::CpuConcatenateHeightKernel,
+ * @ref cpu::kernels::CpuConcatenateDepthKernel and @ref cpu::kernels::CpuConcatenateBatchKernel.
*
* @param[in] inputs_vector The vectors containing all the tensors info to concatenate. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
* @param[in] output Output tensor info. Data types supported: Same as @p input.
@@ -83,7 +86,8 @@ public:
*
* @return a status
*/
- static Status validate(const std::vector<const ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis);
+ static Status
+ validate(const std::vector<const ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis);
// Inherited methods overridden:
void run() override;
@@ -92,62 +96,5 @@ private:
struct Impl;
std::unique_ptr<Impl> _impl;
};
-
-namespace experimental
-{
-/** Basic function to execute concatenate tensors along a given axis. This function calls the following kernels:
- *
- * -# @ref NEWidthConcatenateLayerKernel (if underlying concatenation axis is 0).
- * -# @ref NEHeightConcatenateLayerKernel (if underlying concatenation axis is 1).
- * -# @ref NEDepthConcatenateLayerKernel (if underlying concatenation axis is 2).
- * -# @ref NEBatchConcatenateLayerKernel (if underlying concatenation axis is 3).
- */
-class NEConcatenation : public INEOperator
-{
-public:
- /** Constructor */
- NEConcatenation();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEConcatenation(const NEConcatenation &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEConcatenation &operator=(const NEConcatenation &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEConcatenation(NEConcatenation &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEConcatenation &operator=(NEConcatenation &&) = delete;
- /** Default destructor */
- ~NEConcatenation() = default;
- /** Initialise the kernel's inputs vector and output.
- *
- * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis.
- * @note Preconditions can be found respectively at @ref NEWidthConcatenateLayerKernel, @ref NEHeightConcatenateLayerKernel and @ref NEDepthConcatenateLayerKernel.
- *
- * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[out] output Output tensor. Data types supported: Same as @p input.
- * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3.
- */
- void configure(const std::vector<const ITensorInfo *> &inputs_vector, ITensorInfo *output, size_t axis);
- /** Static function to check if given info will lead to a valid configuration of @ref NEConcatenateLayer
- *
- * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis.
- * @note Preconditions can be found respectively at @ref NEWidthConcatenateLayerKernel, @ref NEHeightConcatenateLayerKernel and @ref NEDepthConcatenateLayerKernel.
- *
- * @param[in] inputs_vector The vectors containing all the tensors info to concatenate. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3.
- *
- * @return a status
- */
- static Status validate(const std::vector<const ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis);
-
- // Inherited methods overridden:
- void run(ITensorPack &tensors) override;
-
-private:
- std::vector<std::unique_ptr<ICPPKernel>> _concat_kernels;
- unsigned int _num_inputs;
- unsigned int _axis;
-};
-} // namespace experimental
} // namespace arm_compute
#endif /* ARM_COMPUTE_NECONCATENATELAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEConv3D.h b/arm_compute/runtime/NEON/functions/NEConv3D.h
new file mode 100644
index 0000000000..525f37f3e7
--- /dev/null
+++ b/arm_compute/runtime/NEON/functions/NEConv3D.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NECONV3D_H
+#define ARM_COMPUTE_NECONV3D_H
+
+#include "arm_compute/core/ITensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/FunctionDescriptors.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+// Forward declarations
+class ITensor;
+
+/** Basic function to simulate a 3d convolution. This function calls one of the following functions:
+ * -# @ref cpu::CpuDirectConv3d
+ *
+ */
+class NEConv3D : public IFunction
+{
+public:
+ /** Constructor */
+ NEConv3D();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEConv3D(const NEConv3D &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEConv3D &operator=(const NEConv3D &) = delete;
+ /** Default move constructor */
+ NEConv3D(NEConv3D &&) = default;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEConv3D &operator=(NEConv3D &&) = default;
+ /** Default destructor */
+ ~NEConv3D();
+ /** Set the input and output tensors.
+ *
+ * Valid data layouts:
+ * - NDHWC
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:--------------|:------------------|:------|:--------------|
+ * |F16 |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |F32 |
+ * |QASYMM8 |QASYMM8 |S32 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED |
+ *
+ * @param[in] input Source tensor. 4 lower dimensions represent a single input [IFM, width, height, depth],
+ * while every optional dimension from 5 and above represent a batch of inputs.
+ * @param[in] weights Weights tensor. Weights are 5D tensor with dimensions [OFM, IFM, kernel_x, kernel_y, kernel_z].
+ * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
+ * @param[out] output Destination tensor. 4 lower dimensions represent a single output [OFM, width, height, depth], while the rest represent batch of outputs.
+ * @param[in] conv_info Contains padding, stride, acitvation information described in @ref Conv3dInfo.
+ */
+ void configure(
+ ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const Conv3dInfo &conv_info);
+ /** Static function to check if given info will lead to a valid configuration
+ *
+ * Similar to NEConv3D::configure()
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *biases,
+ const ITensorInfo *output,
+ const Conv3dInfo &conv_info);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NECONV3D_H */
diff --git a/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h b/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h
index 984e8d68c0..dc6b22d717 100644
--- a/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h
+++ b/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,19 +24,16 @@
#ifndef ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTS_H
#define ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTS_H
+#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/ITransformWeights.h"
-#include "arm_compute/runtime/NEON/NEScheduler.h"
-#include "arm_compute/runtime/Tensor.h"
-#include <memory>
namespace arm_compute
{
// Forward declarations
class ITensor;
-class NEConvertFullyConnectedWeightsKernel;
+class ITensorInfo;
-/** Basic function to run @ref NEConvertFullyConnectedWeightsKernel. */
+/** Basic function to run @ref cpu::kernels::CpuConvertFullyConnectedWeightsKernel. */
class NEConvertFullyConnectedWeights : public IFunction
{
public:
@@ -54,12 +51,22 @@ public:
~NEConvertFullyConnectedWeights();
/** Initialize the function.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |All |All |
+ *
* @param[in] input Source weights tensor to convert. Must be 2 dimensional. Data types supported: All.
* @param[out] output The converted weights tensor. Shape and Data Type: Same as @p input.
* @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer).
* @param[in] data_layout The data layout the weights have been trained in.
*/
- void configure(const ITensor *input, ITensor *output, const TensorShape &original_input_shape, DataLayout data_layout);
+ void
+ configure(const ITensor *input, ITensor *output, const TensorShape &original_input_shape, DataLayout data_layout);
/** Static function to check if given info will lead to a valid configuration of @ref NEConvertFullyConnectedWeights
*
* @param[in] input Source weights tensor info to convert. Must be 2 dimensional. Data types supported: All.
@@ -69,53 +76,17 @@ public:
*
* @return A Status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const TensorShape &original_input_shape, DataLayout data_layout);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *output,
+ const TensorShape &original_input_shape,
+ DataLayout data_layout);
// Inherited methods overriden:
void run() override;
private:
- std::unique_ptr<NEConvertFullyConnectedWeightsKernel> _kernel;
-};
-
-namespace weights_transformations
-{
-/** Basic function to run @ref NEConvertFullyConnectedWeightsKernel. */
-class NEConvertFullyConnectedWeightsManaged : public ITransformWeights
-{
-public:
- void run() override
- {
- _output.allocator()->allocate();
- _func.run();
- _reshape_run = true;
- }
-
- void release() override
- {
- _output.allocator()->free();
- }
-
- ITensor *get_weights() override
- {
- return &_output;
- }
-
- uint32_t uid() override
- {
- return _uid;
- }
-
- void configure(const ITensor *input, const TensorShape &original_input_shape, DataLayout data_layout)
- {
- _func.configure(input, &_output, original_input_shape, data_layout);
- }
-
-private:
- static constexpr uint32_t _uid = 0x4;
- Tensor _output{};
- NEConvertFullyConnectedWeights _func{};
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
-} // namespace weights_transformations
} // namespace arm_compute
#endif /* ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTS_H */
diff --git a/arm_compute/runtime/NEON/functions/NEConvolution.h b/arm_compute/runtime/NEON/functions/NEConvolution.h
deleted file mode 100644
index afd654a595..0000000000
--- a/arm_compute/runtime/NEON/functions/NEConvolution.h
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NECONVOLUTION_H
-#define ARM_COMPUTE_NECONVOLUTION_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
-#include "arm_compute/runtime/Tensor.h"
-
-#include <cstdint>
-#include <memory>
-
-namespace arm_compute
-{
-class ITensor;
-class NEFillBorderKernel;
-template <unsigned int matrix_size>
-class NEConvolutionKernel;
-template <unsigned int matrix_size>
-class NESeparableConvolutionHorKernel;
-template <unsigned int matrix_size>
-class NESeparableConvolutionVertKernel;
-
-/** Basic function to execute convolution of size 3x3. This function calls the following NEON kernels:
- *
- * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref NEConvolution3x3Kernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class NEConvolution3x3 : public INESimpleFunction
-{
-public:
- /** Constructor */
- NEConvolution3x3() = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEConvolution3x3(const NEConvolution3x3 &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEConvolution3x3 &operator=(const NEConvolution3x3 &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEConvolution3x3(NEConvolution3x3 &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEConvolution3x3 &operator=(NEConvolution3x3 &&) = delete;
- /** Default destructor */
- ~NEConvolution3x3();
- /** Initialize the function's source, destination, conv and border_mode.
- *
- * @param[in,out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination tensor, Data types supported: U8/S16.
- * @param[in] conv Matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer.
- * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
- * @param[in] border_mode Strategy to use for borders.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0);
-};
-
-/** Basic function to execute convolution of size 5x5, 7x7, 9x9. This function calls the following NEON kernels:
- *
- * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref NEConvolutionKernel or<br/>
- * @ref NESeparableConvolutionHorKernel and @ref NESeparableConvolutionVertKernel (if convolution matrix is separable)
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-template <unsigned int matrix_size>
-class NEConvolutionSquare : public IFunction
-{
-public:
- /** Default constructor */
- NEConvolutionSquare(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEConvolutionSquare(const NEConvolutionSquare &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEConvolutionSquare &operator=(const NEConvolutionSquare &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEConvolutionSquare(NEConvolutionSquare &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEConvolutionSquare &operator=(NEConvolutionSquare &&) = delete;
- /** Default destructor */
- ~NEConvolutionSquare();
- /** Initialize the function's source, destination, conv and border_mode.
- *
- * @param[in,out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination tensor, Data types supported: U8 or S16.
- * @param[in] conv matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer.
- * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
- * @param[in] border_mode Strategy to use for borders.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- MemoryGroup _memory_group; /**< Function memory group */
- Tensor _tmp; /**< temporary buffer for output of horizontal pass */
- bool _is_separable; /**< true if the convolution can be separated */
- std::unique_ptr<NESeparableConvolutionHorKernel<matrix_size>> _kernel_hor; /**< kernel for horizontal pass of separated convolution */
- std::unique_ptr<NESeparableConvolutionVertKernel<matrix_size>> _kernel_vert; /**< kernel for vertical pass of separated convolution */
- std::unique_ptr<NEConvolutionKernel<matrix_size>> _kernel; /**< kernel for non-separated convolution **/
- std::unique_ptr<NEFillBorderKernel> _border_handler; /**< kernel for border handling */
-};
-
-/** Basic function to run 5x5 convolution. */
-using NEConvolution5x5 = NEConvolutionSquare<5>;
-/** Basic function to run 7x7 convolution. */
-using NEConvolution7x7 = NEConvolutionSquare<7>;
-/** Basic function to run 9x9 convolution. */
-using NEConvolution9x9 = NEConvolutionSquare<9>;
-
-/** Basic function to execute non-square convolution. This function calls the following NEON kernels:
- *
- * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref NEConvolutionRectangleKernel or<br/>
- *
- * @note Convolution rectangle should have dimensions of 3, 5, 7, 9
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class NEConvolutionRectangle : public INESimpleFunction
-{
-public:
- /** Constructor */
- NEConvolutionRectangle() = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEConvolutionRectangle(const NEConvolutionRectangle &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEConvolutionRectangle &operator=(const NEConvolutionRectangle &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEConvolutionRectangle(NEConvolutionRectangle &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEConvolutionRectangle &operator=(NEConvolutionRectangle &&) = delete;
- /** Default destructor */
- ~NEConvolutionRectangle();
- /** Initialize the function's source, destination, conv and border_mode.
- *
- * @param[in,out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination tensor, Data types supported: U8 or S16.
- * @param[in] conv Matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer.
- * @param[in] rows Rows of convolution kernel.
- * @param[in] cols Columns of convolution kernel.
- * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
- * @param[in] border_mode Strategy to use for borders.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t rows, uint32_t cols, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0);
-};
-}
-#endif /*ARM_COMPUTE_NECONVOLUTION_H */
diff --git a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
index a061dc7b04..2d07980ade 100644
--- a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,13 +21,13 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_NECONVOLUTIONLAYER_H
-#define ARM_COMPUTE_NECONVOLUTIONLAYER_H
-
-#include "arm_compute/runtime/IFunction.h"
+#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NECONVOLUTIONLAYER_H
+#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NECONVOLUTIONLAYER_H
#include "arm_compute/core/ITensorInfo.h"
#include "arm_compute/core/Types.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
+#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/MemoryGroup.h"
#include <memory>
@@ -37,10 +37,10 @@ namespace arm_compute
// Forward declarations
class ITensor;
-/** Basic function to simulate a convolution layer. This function calls one of the following NEON functions:
- * -# @ref NEGEMMConvolutionLayer (executed only in case GEMM is required for the operation)
- * -# @ref NEWinogradConvolutionLayer (executed only in case Winograd is required for the operation)
- * -# @ref NEDirectConvolutionLayer (executed only in case Direct Convolution is required for the operation)
+/** Basic function to simulate a convolution layer. This function calls one of the following functions:
+ * -# @ref cpu::CpuGemmConv2d (executed only in case GEMM is required for the operation)
+ * -# @ref cpu::CpuWinogradConv2d (executed only in case Winograd is required for the operation)
+ * -# @ref cpu::CpuDirectConv2d (executed only in case Direct Convolution is required for the operation)
* -# @ref NEFFTConvolutionLayer (executed only in case FFT is required for the operation)
*
*
@@ -78,46 +78,70 @@ public:
NEConvolutionLayer(const NEConvolutionLayer &) = delete;
/** Prevent instances of this class from being copied (As this class contains pointers) */
NEConvolutionLayer &operator=(const NEConvolutionLayer &) = delete;
+ /** Default move constructor */
+ NEConvolutionLayer(NEConvolutionLayer &&) = default;
/** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEConvolutionLayer(NEConvolutionLayer &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEConvolutionLayer &operator=(NEConvolutionLayer &&) = delete;
+ NEConvolutionLayer &operator=(NEConvolutionLayer &&) = default;
/** Default destructor */
- ~NEConvolutionLayer() = default;
+ ~NEConvolutionLayer();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:--------------|:------------------|:------|:--------------|
+ * |F16 |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |F32 |
+ * |QASYMM8 |QASYMM8 |S32 |QASYMM8 |
+ * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED |
+ * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED |
+ *
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
* while every optional dimension from 4 and above represent a batch of inputs.
* Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: Same as @p input.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
+ * Data type supported: Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED.
* @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
- * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
+ * Data type supported: Same as @p input, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
* @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
* Data types supported: Same as @p input.
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
* @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights
- * tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input.
+ * tensor has also been transposed with cpu::kernels::CpuGemmTranspose1xWKernel. Data type supported: Same as @p input.
* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
* @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
* available which may introduce a drop of accuracy as well. Default is false
* @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported
*/
- void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(),
- const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false, unsigned int num_groups = 1);
+ void configure(ITensor *input,
+ const ITensor *weights,
+ const ITensor *biases,
+ ITensor *output,
+ const PadStrideInfo &conv_info,
+ const WeightsInfo &weights_info = WeightsInfo(),
+ const Size2D &dilation = Size2D(1U, 1U),
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ bool enable_fast_math = false,
+ unsigned int num_groups = 1);
/** Static function to check if given info will lead to a valid configuration of @ref NEConvolutionLayer
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
* while every optional dimension from 4 and above represent a batch of inputs.
* Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
+ * Data type supported:Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED.
* @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
- * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
+ * Data type supported: Same as @p input, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
* @param[in] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
* Data types supported: Same as @p input.
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
* @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights
- * tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input.
+ * tensor has also been transposed with cpu::kernels::CpuGemmTranspose1xWKernel. Data type supported: Same as @p input.
* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
* @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
@@ -126,20 +150,28 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false,
- unsigned int num_groups = 1);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *biases,
+ const ITensorInfo *output,
+ const PadStrideInfo &conv_info,
+ const WeightsInfo &weights_info = WeightsInfo(),
+ const Size2D &dilation = Size2D(1U, 1U),
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ bool enable_fast_math = false,
+ unsigned int num_groups = 1);
/** Static function to check if given info will return the convolution called by @ref NEConvolutionLayer
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
* while every optional dimension from 4 and above represent a batch of inputs.
* Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
+ * Data type supported:Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED.
* @param[in] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
* Data types supported: Same as @p input.
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
* @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights
- * tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input.
+ * tensor has also been transposed with cpu::kernels::CpuGemmTranspose1xWKernel. Data type supported: Same as @p input.
* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
* @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
@@ -147,15 +179,21 @@ public:
*
* @return the Convolution Method Hint
*/
- static ConvolutionMethod get_convolution_method(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, const PadStrideInfo &conv_info,
- const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false);
+ static ConvolutionMethod get_convolution_method(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *output,
+ const PadStrideInfo &conv_info,
+ const WeightsInfo &weights_info = WeightsInfo(),
+ const Size2D &dilation = Size2D(1U, 1U),
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ bool enable_fast_math = false);
// Inherited methods overridden:
void run() override;
void prepare() override;
private:
- std::shared_ptr<IMemoryManager> _memory_manager;
- std::unique_ptr<IFunction> _function; /**< Function to run */
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
-#endif /* ARM_COMPUTE_NECONVOLUTIONLAYER_H */ \ No newline at end of file
+#endif // ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NECONVOLUTIONLAYER_H
diff --git a/arm_compute/runtime/NEON/functions/NECopy.h b/arm_compute/runtime/NEON/functions/NECopy.h
index a58ac9e620..840c03e968 100644
--- a/arm_compute/runtime/NEON/functions/NECopy.h
+++ b/arm_compute/runtime/NEON/functions/NECopy.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,31 +25,41 @@
#define ARM_COMPUTE_NECOPY_H
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
namespace arm_compute
{
class ITensor;
class ITensorInfo;
-/** Basic function to run @ref NECopyKernel */
-class NECopy : public INESimpleFunctionNoBorder
+/** Basic function to run @ref cpu::kernels::CpuCopyKernel */
+class NECopy : public IFunction
{
public:
- /** Constructor */
- NECopy() = default;
+ /** Default Constructor */
+ NECopy();
+ /** Default Destructor */
+ ~NECopy();
/** Prevent instances of this class from being copied (As this class contains pointers) */
NECopy(const NECopy &) = delete;
+ /** Default move constructor */
+ NECopy(NECopy &&);
/** Prevent instances of this class from being copied (As this class contains pointers) */
NECopy &operator=(const NECopy &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NECopy(NECopy &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NECopy &operator=(NECopy &&) = delete;
- /** Default destructor */
- ~NECopy();
+ /** Default move assignment operator */
+ NECopy &operator=(NECopy &&);
/** Initialise the function's source and destination.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |All |All |
+ *
* @param[in] input Source tensor. Data types supported: All
* @param[out] output Output tensor. Data types supported: Same as @p input.
*
@@ -63,6 +73,13 @@ public:
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+
+ // Inherited methods overridden
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_NECOPY_H */
diff --git a/arm_compute/runtime/NEON/functions/NECropResize.h b/arm_compute/runtime/NEON/functions/NECropResize.h
index 5c3733f8ee..f806762158 100644
--- a/arm_compute/runtime/NEON/functions/NECropResize.h
+++ b/arm_compute/runtime/NEON/functions/NECropResize.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -31,6 +31,7 @@
namespace arm_compute
{
// Forward Declarations
+class Tensor;
class ITensor;
class NECropKernel;
@@ -53,6 +54,14 @@ public:
/** Configure kernel
*
+ * Valid data layouts:
+ * - NHWC
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:--------|:--------|:------|:--------|
+ * |All |F32 |F32 |F32 |
+ *
* @note Supported tensor rank: up to 4
* @note Box indices may be outside of the bounds, in which case @p extrapolation_value is used.
* @note Start and end indices of boxes are inclusive.
@@ -66,8 +75,13 @@ public:
* @param[in] method The policy to be used when resizing image. Default is bilinear.
* @param[in] extrapolation_value Value to be used for values outside of the image for cropping and resizing. Default is 0.
*/
- void configure(const ITensor *input, const ITensor *boxes, const ITensor *box_ind, ITensor *output, Coordinates2D crop_size,
- InterpolationPolicy method = InterpolationPolicy::BILINEAR, float extrapolation_value = 0);
+ void configure(const ITensor *input,
+ const ITensor *boxes,
+ const ITensor *box_ind,
+ ITensor *output,
+ Coordinates2D crop_size,
+ InterpolationPolicy method = InterpolationPolicy::BILINEAR,
+ float extrapolation_value = 0);
/** Static function to check if given info will lead to a valid configuration of @ref NESlice
*
@@ -87,8 +101,13 @@ public:
*
* @return A status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *boxes, const ITensorInfo *box_ind, const ITensorInfo *output,
- Coordinates2D crop_size, InterpolationPolicy method, float extrapolation_value);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *boxes,
+ const ITensorInfo *box_ind,
+ const ITensorInfo *output,
+ Coordinates2D crop_size,
+ InterpolationPolicy method,
+ float extrapolation_value);
void run() override;
diff --git a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
index 97b1a47f64..aabe42f928 100644
--- a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,15 +24,14 @@
#ifndef ARM_COMPUTE_NEDECONVOLUTIONLAYER_H
#define ARM_COMPUTE_NEDECONVOLUTIONLAYER_H
-#include "arm_compute/runtime/CPP/functions/CPPUpsample.h"
-#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEReverse.h"
-
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CPP/functions/CPPUpsample.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEReverse.h"
#include "arm_compute/runtime/Tensor.h"
#include <memory>
@@ -64,11 +63,10 @@ namespace arm_compute
* The weights used by Deconvolution are supposed to be the same as the ones used for Convolution. Therefore, it will be necessary to use the weights in the
* reverse order to perform an actual convolution. This is achieved by using @ref NEReverse.
*
- * This function calls the following NEON kernels/functions:
+ * This function calls the following kernels/functions:
*
* -# @ref CPPUpsample
* -# @ref NEConvolutionLayer
- * -# @ref NEPermute
* -# @ref NEReverse
*
*/
@@ -77,39 +75,77 @@ class NEDeconvolutionLayer : public IFunction
public:
/** Constructor */
NEDeconvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
-
/** Prevent instances of this class from being copied (As this class contains pointers) */
NEDeconvolutionLayer(const NEDeconvolutionLayer &) = delete;
+ /** Default move constructor */
+ NEDeconvolutionLayer(NEDeconvolutionLayer &&) = default;
/** Prevent instances of this class from being copied (As this class contains pointers) */
NEDeconvolutionLayer &operator=(const NEDeconvolutionLayer &) = delete;
- /** Prevent instances of this class from being moved (As this class contains pointers) */
- NEDeconvolutionLayer(NEDeconvolutionLayer &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains pointers) */
- NEDeconvolutionLayer &operator=(NEDeconvolutionLayer &&) = delete;
+ /** Default move assignment operator */
+ NEDeconvolutionLayer &operator=(NEDeconvolutionLayer &&) = default;
/** Default destructor */
- virtual ~NEDeconvolutionLayer() = default;
+ ~NEDeconvolutionLayer() = default;
/** Set the input, weights, biases and output tensors.
*
- * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED.
- * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input.
- * @param[in] bias Optional, ignored if NULL. The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8 and QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input.
- * @param[out] output Output tensor. The output has the same number of dimensions as the @p input.
- * @param[in] info Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo.
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:--------------|:------------------|:------|:--------------|
+ * |F16 |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |F32 |
+ * |QASYMM8 |QASYMM8 |S32 |QASYMM8 |
+ * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED |
+ * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED |
+ *
+ * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs.
+ * Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED.
+ * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM].
+ * Data type supported: Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] bias Optional, ignored if NULL. The biases have one dimension.
+ * Data type supported: Data types supported: S32 for QASYMM8/QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input.
+ * @param[out] output Output tensor. The output has the same number of dimensions as the @p input.
+ * @param[in] info Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo.
+ * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
+ * available which may introduce a drop of accuracy as well. Default is false
+ * @param[in] weights_info (Optional) Specifies the weight format. Default is unspecified. This parameter can be used to specify the weight format that is optimal for
+ * the GEMM convolution.
*
*/
- void configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, const PadStrideInfo &info);
+ void configure(ITensor *input,
+ const ITensor *weights,
+ const ITensor *bias,
+ ITensor *output,
+ const PadStrideInfo &info,
+ bool enable_fast_math = false,
+ const WeightsInfo &weights_info = WeightsInfo());
/** Static function to check if given info will lead to a valid configuration of @ref NEDeconvolutionLayer
*
- * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED.
- * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input.
- * @param[in] bias (Optional) The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8 and QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input.
- * @param[in] output Output tensor info. The output has the same number of dimensions as the @p input.
- * @param[in] info Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo.
+ * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs.
+ * Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED.
+ * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM].
+ * Data type supported: Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] bias (Optional) The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8/QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input.
+ * @param[in] output Output tensor info. The output has the same number of dimensions as the @p input.
+ * @param[in] info Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo.
+ * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
+ * available which may introduce a drop of accuracy as well. Default is false
+ * @param[in] weights_info (Optional) Specifies the weight format. Default is unspecified. This parameter can be used to specify the weight format that is optimal for
+ * the GEMM convolution.
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output, const PadStrideInfo &info);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *bias,
+ const ITensorInfo *output,
+ const PadStrideInfo &info,
+ bool enable_fast_math = false,
+ const WeightsInfo &weights_info = WeightsInfo());
// Inherited methods overridden:
void run() override;
@@ -127,6 +163,7 @@ private:
ITensor *_input;
PadStrideInfo _info;
bool _is_prepared;
+ bool _do_upsampling;
};
-} // arm_compute
+} // namespace arm_compute
#endif /* ARM_COMPUTE_NEDECONVOLUTIONLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h b/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h
index c9817a63c1..7bfdfbd13d 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,38 +25,48 @@
#define ARM_COMPUTE_NEDEPTHCONVERT_H
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+#include "arm_compute/runtime/IFunction.h"
-#include <cstdint>
+#include <memory>
namespace arm_compute
{
class ITensor;
class ITensorInfo;
-/**Basic function to run @ref NEDepthConvertLayerKernel */
-class NEDepthConvertLayer : public INESimpleFunctionNoBorder
+/**Basic function to run @ref cpu::kernels::CpuCastKernel */
+class NEDepthConvertLayer : public IFunction
{
public:
- /* Contructor */
- NEDepthConvertLayer() = default;
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
+ /** Constructor */
+ NEDepthConvertLayer();
+ /** Destructor */
+ ~NEDepthConvertLayer();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
NEDepthConvertLayer(const NEDepthConvertLayer &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- const NEDepthConvertLayer &operator=(const NEDepthConvertLayer &) = delete;
- /** Default destructor */
- ~NEDepthConvertLayer() = default;
+ /** Default move constructor */
+ NEDepthConvertLayer(NEDepthConvertLayer &&);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEDepthConvertLayer &operator=(const NEDepthConvertLayer &) = delete;
+ /** Default move assignment operator */
+ NEDepthConvertLayer &operator=(NEDepthConvertLayer &&);
/** Initialize the function's source, destination
*
- * Valid conversions Input -> Output :
+ * Valid data layouts:
+ * - All
*
- * - QASYMM8 -> F16, F32
- * - U8 -> U16, S16, S32
- * - U16 -> U8, U32
- * - S16 -> U8, S32
- * - BFLOAT16 -> F32
- * - F16 -> QASYMM8, F32
- * - F32 -> QASYMM8, F16, BFLOAT16
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------------------|
+ * |QASYMM8 | F16, F32 |
+ * |U8 | U16, S16, S32 |
+ * |U16 | U8, U32 |
+ * |S16 | U8, S32 |
+ * |BFLOAT16 | F32 |
+ * |F16 | QASYMM8, F32 |
+ * |F32 | QASYMM8, F16, BFLOAT16 |
+ *
+ * Input data type must be different than output data type.
*
* @param[in] input The input tensor to convert. Data types supported: QASYMM8/U8/U16/S16/BFLOAT16/F16/F32.
* @param[out] output The output tensor. Data types supported: QASYMM8/U8/U16/S16/U32/S32/BFLOAT16/F16/F32.
@@ -73,7 +83,15 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift = 0);
+ static Status
+ validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift = 0);
+
+ // Inherited methods overridden
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_NEDEPTHCONVERT_H*/
diff --git a/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h b/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h
index 51f7ff7770..d27369670e 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,26 +21,27 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_NEDEPTHTOSPACELAYER_H
-#define ARM_COMPUTE_NEDEPTHTOSPACELAYER_H
+#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEDEPTHTOSPACELAYER_H
+#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEDEPTHTOSPACELAYER_H
+#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+#include <memory>
namespace arm_compute
{
// Forward declarations
class ITensor;
class ITensorInfo;
+class NEDepthToSpaceLayerKernel;
/** Basic function to run @ref NEDepthToSpaceLayerKernel. */
-class NEDepthToSpaceLayer : public INESimpleFunctionNoBorder
+class NEDepthToSpaceLayer : public IFunction
{
public:
/** Constructor */
- NEDepthToSpaceLayer() = default;
+ NEDepthToSpaceLayer();
/** Prevent instances of this class from being copied (As this class contains pointers) */
NEDepthToSpaceLayer(const NEDepthToSpaceLayer &) = delete;
/** Prevent instances of this class from being copied (As this class contains pointers) */
@@ -50,9 +51,18 @@ public:
/** Prevent instances of this class from being moved (As this class contains non movable objects) */
NEDepthToSpaceLayer &operator=(NEDepthToSpaceLayer &&) = delete;
/** Default destructor */
- ~NEDepthToSpaceLayer() = default;
+ ~NEDepthToSpaceLayer();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |All |All |
+ *
* @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All
* @param[out] output Tensor output. Data types supported: same as @p input
* @param[in] block_shape Block shape value.
@@ -67,6 +77,11 @@ public:
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape);
+
+ void run() override;
+
+private:
+ std::unique_ptr<NEDepthToSpaceLayerKernel> _kernel;
};
} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEDEPTHTOSPACELAYER_H */
+#endif // ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEDEPTHTOSPACELAYER_H
diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
index dc70aec7ff..6ad5aa7bfa 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,9 +24,11 @@
#ifndef ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H
#define ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H
+#include "arm_compute/runtime/IMemoryManager.h"
+#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
#include "arm_compute/runtime/NEON/functions/NEPermute.h"
-#include "arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h"
+
#include <memory>
namespace arm_compute
@@ -54,6 +56,20 @@ public:
~NEDepthwiseConvolutionLayer();
/** Initialize the function's source, destination, weights and convolution information.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:--------------|:------------------|:------|:--------------|
+ * |F16 |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |F32 |
+ * |QASYMM8 |QASYMM8 |S32 |QASYMM8 |
+ * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED |
+ * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED |
+ *
* @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32
* @param[out] output Destination tensor. Data type supported: same as @p input.
* @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
@@ -65,8 +81,14 @@ public:
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
*/
- void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
+ void configure(ITensor *input,
+ const ITensor *weights,
+ const ITensor *biases,
+ ITensor *output,
+ const PadStrideInfo &conv_info,
+ unsigned int depth_multiplier = 1,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ const Size2D &dilation = Size2D(1U, 1U));
/** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayer
*
@@ -83,40 +105,27 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *biases,
+ const ITensorInfo *output,
+ const PadStrideInfo &conv_info,
+ unsigned int depth_multiplier = 1,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ const Size2D &dilation = Size2D(1U, 1U));
// Inherited methods overriden:
void run() override;
void prepare() override;
private:
- /** Static function to choose the best depthwise convolution function for @ref NEDepthwiseConvolutionLayer
- *
- * @param[in] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32
- * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
- * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
- * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
- * @param[in] output Destination tensor. Data type supported: same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 quantized are supported.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- *
- * @return a Depthwise Convolution Function
- */
- static DepthwiseConvolutionFunction get_depthwiseconvolution_function(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
- const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
- ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
-
- /** Basic function to execute optimized depthwise convolution routines. This function calls the following NEON kernels:
+ /** Basic function to execute optimized depthwise convolution routines. This function calls the following kernels:
*
* @note At the moment 3x3 and 5x5 convolution of stride 1, 2 are supported
*
* -# @ref NEFillBorderKernel (if pad_x or pad_y > 0) and no assembly kernel implementation is present
* -# @ref NEDepthwiseConvolutionLayer3x3Kernel if 3x3 and no assembly kernel implementation is present
- * -# @ref NEDepthwiseConvolutionAssemblyDispatch if assembly kernel implementation is present
+ * -# @ref cpu::CpuDepthwiseConvolutionAssemblyDispatch if assembly kernel implementation is present
* -# @ref NEDirectConvolutionLayerOutputStageKernel if re-quantization of output is required
* -# @ref NEActivationLayer if fused activation is required
*
@@ -131,9 +140,11 @@ private:
/** Default move constructor */
NEDepthwiseConvolutionLayerOptimizedInternal(NEDepthwiseConvolutionLayerOptimizedInternal &&) = default;
/** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDepthwiseConvolutionLayerOptimizedInternal &operator=(const NEDepthwiseConvolutionLayerOptimizedInternal &) = delete;
+ NEDepthwiseConvolutionLayerOptimizedInternal &
+ operator=(const NEDepthwiseConvolutionLayerOptimizedInternal &) = delete;
/** Default move assignment operator */
- NEDepthwiseConvolutionLayerOptimizedInternal &operator=(NEDepthwiseConvolutionLayerOptimizedInternal &&) = default;
+ NEDepthwiseConvolutionLayerOptimizedInternal &
+ operator=(NEDepthwiseConvolutionLayerOptimizedInternal &&) = default;
/** Default destructor */
~NEDepthwiseConvolutionLayerOptimizedInternal() = default;
/** Initialize the function's source, destination, kernels and border_size.
@@ -148,8 +159,14 @@ private:
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
*/
- void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
+ void configure(ITensor *input,
+ const ITensor *weights,
+ const ITensor *biases,
+ ITensor *output,
+ const PadStrideInfo &conv_info,
+ unsigned int depth_multiplier = 1,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ const Size2D &dilation = Size2D(1U, 1U));
/** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayer3x3
*
@@ -165,34 +182,26 @@ private:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *biases,
+ const ITensorInfo *output,
+ const PadStrideInfo &conv_info,
+ unsigned int depth_multiplier = 1,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ const Size2D &dilation = Size2D(1U, 1U));
// Inherited methods overriden:
void run() override;
void prepare() override;
private:
- MemoryGroup _memory_group;
- NEDepthwiseConvolutionAssemblyDispatch _dwc_optimized_func;
- NEPermute _permute_input;
- NEPermute _permute_weights;
- NEPermute _permute_output;
- NEActivationLayer _activationlayer_function;
- Tensor _accumulator;
- Tensor _permuted_input;
- Tensor _permuted_weights;
- Tensor _permuted_output;
- const ITensor *_original_weights;
- bool _has_bias;
- bool _is_quantized;
- bool _is_nchw;
- bool _permute;
- bool _is_activationlayer_enabled;
- bool _is_prepared;
+ MemoryGroup _memory_group;
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
- /** Basic function to execute a generic depthwise convolution. This function calls the following NEON kernel:
+ /** Basic function to execute a generic depthwise convolution. This function calls the following kernel:
*
* -# @ref NEDepthwiseConvolutionLayerNativeKernel
*
@@ -225,8 +234,14 @@ private:
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
*/
- void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
+ void configure(ITensor *input,
+ const ITensor *weights,
+ const ITensor *biases,
+ ITensor *output,
+ const PadStrideInfo &conv_info,
+ unsigned int depth_multiplier = 1,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ const Size2D &dilation = Size2D(1U, 1U));
/** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayerGeneric
*
@@ -243,31 +258,25 @@ private:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *biases,
+ const ITensorInfo *output,
+ const PadStrideInfo &conv_info,
+ unsigned int depth_multiplier = 1,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ const Size2D &dilation = Size2D(1U, 1U));
// Inherited methods overriden:
void run() override;
- void prepare() override;
private:
- std::unique_ptr<NEDepthwiseConvolutionLayerNativeKernel> _depthwise_conv_kernel;
- NEPermute _permute_input;
- NEPermute _permute_weights;
- NEPermute _permute_output;
- NEActivationLayer _activationlayer_function;
- Tensor _permuted_input;
- Tensor _permuted_weights;
- Tensor _permuted_output;
- bool _is_prepared;
- bool _is_nchw;
- bool _is_activationlayer_enabled;
- const ITensor *_original_weights;
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
-
- DepthwiseConvolutionFunction _depth_conv_func;
- NEDepthwiseConvolutionLayerOptimizedInternal _func_optimized;
- NEDepthwiseConvolutionLayerGeneric _func_generic;
+ MemoryGroup _memory_group;
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H */ \ No newline at end of file
+#endif /* ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H */
diff --git a/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h b/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h
index f52d709c74..8b49930ef5 100644
--- a/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,9 +24,10 @@
#ifndef ARM_COMPUTE_NEDEQUANTIZATIONLAYER_H
#define ARM_COMPUTE_NEDEQUANTIZATIONLAYER_H
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
namespace arm_compute
{
@@ -34,12 +35,36 @@ namespace arm_compute
class ITensor;
class ITensorInfo;
-/** Basic function to run @ref NEDequantizationLayerKernel that dequantizes an input tensor */
-class NEDequantizationLayer : public INESimpleFunctionNoBorder
+/** Basic function to run @ref cpu::CpuDequantize that dequantizes an input tensor */
+class NEDequantizationLayer : public IFunction
{
public:
+ /** Default Constructor */
+ NEDequantizationLayer();
+ /** Default Destructor */
+ ~NEDequantizationLayer();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEDequantizationLayer(const NEDequantizationLayer &) = delete;
+ /** Default move constructor */
+ NEDequantizationLayer(NEDequantizationLayer &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEDequantizationLayer &operator=(const NEDequantizationLayer &) = delete;
+ /** Default move assignment operator */
+ NEDequantizationLayer &operator=(NEDequantizationLayer &&) = default;
/** Configure the kernel.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:------------------|:-----------|
+ * |QASYMM8 |F16, F32 |
+ * |QASYMM8_SIGNED |F16, F32 |
+ * |QSYMM8_PER_CHANNEL |F16, F32 |
+ * |QSYMM8 |F16, F32 |
+ * |QSYMM16 |F16, F32 |
+ *
* @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
* @param[out] output Destination tensor with the same dimensions of input. Data type supported: F16/F32.
*/
@@ -52,6 +77,13 @@ public:
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEDEQUANTIZATIONLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEDerivative.h b/arm_compute/runtime/NEON/functions/NEDerivative.h
deleted file mode 100644
index b14e38a23a..0000000000
--- a/arm_compute/runtime/NEON/functions/NEDerivative.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEDERIVATIVE_H
-#define ARM_COMPUTE_NEDERIVATIVE_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/IFunction.h"
-
-#include <memory>
-
-namespace arm_compute
-{
-class ITensor;
-class NEDerivativeKernel;
-class NEFillBorderKernel;
-
-/** Basic function to execute first order derivative operator. This function calls the following NEON kernels:
- *
- * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref NEDerivativeKernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class NEDerivative : public IFunction
-{
-public:
- /** Default constructor */
- NEDerivative();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDerivative(const NEDerivative &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDerivative &operator=(const NEDerivative &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEDerivative(NEDerivative &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEDerivative &operator=(NEDerivative &&) = delete;
- /** Default destructor */
- ~NEDerivative();
- /** Initialise the function's source, destinations and border mode.
- *
- * @note At least one of output_x or output_y must be not NULL.
- *
- * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output_x (optional) Destination tensor. Derivative along the X direction. Data type supported: S16.
- * @param[out] output_y (optional) Destination tensor. Derivative along the Y direction. Data type supported: S16.
- * @param[in] border_mode Border mode to use
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- *
- */
- void configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- std::unique_ptr<NEDerivativeKernel> _kernel; /**< Derivative kernel */
- std::unique_ptr<NEFillBorderKernel> _border_handler; /**< Kernel to handle tensor borders */
-};
-}
-#endif /* ARM_COMPUTE_NEDERIVATIVE_H */
diff --git a/arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h b/arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h
index d5c1f0ab6f..7a94833d10 100644
--- a/arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,13 +24,12 @@
#ifndef ARM_COMPUTE_NE_DETECTION_POSTPROCESS_H
#define ARM_COMPUTE_NE_DETECTION_POSTPROCESS_H
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
-
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CPP/functions/CPPDetectionPostProcessLayer.h"
#include "arm_compute/runtime/IMemoryManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/NEON/functions/NEDequantizationLayer.h"
+#include "arm_compute/runtime/NEON/INESimpleFunction.h"
#include "arm_compute/runtime/Tensor.h"
#include <map>
@@ -57,6 +56,16 @@ public:
~NEDetectionPostProcessLayer() = default;
/** Configure the detection output layer NE function
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 - src2 |dst0 - dst3 |
+ * |:--------------|:--------------|
+ * |QASYMM8 |F32 |
+ * |QASYMM8_SIGNED |F32 |
+ * |F32 |F32 |
+ *
* @param[in] input_box_encoding The bounding box input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F32.
* @param[in] input_score The class prediction input tensor. Data types supported: same as @p input_box_encoding.
* @param[in] input_anchors The anchors input tensor. Data types supported: same as @p input_box_encoding.
@@ -68,8 +77,14 @@ public:
*
* @note Output contains all the detections. Of those, only the ones selected by the valid region are valid.
*/
- void configure(const ITensor *input_box_encoding, const ITensor *input_score, const ITensor *input_anchors,
- ITensor *output_boxes, ITensor *output_classes, ITensor *output_scores, ITensor *num_detection, DetectionPostProcessLayerInfo info = DetectionPostProcessLayerInfo());
+ void configure(const ITensor *input_box_encoding,
+ const ITensor *input_score,
+ const ITensor *input_anchors,
+ ITensor *output_boxes,
+ ITensor *output_classes,
+ ITensor *output_scores,
+ ITensor *num_detection,
+ DetectionPostProcessLayerInfo info = DetectionPostProcessLayerInfo());
/** Static function to check if given info will lead to a valid configuration of @ref NEDetectionPostProcessLayer
*
* @param[in] input_box_encoding The bounding box input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F32.
@@ -83,8 +98,13 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input_box_encoding, const ITensorInfo *input_class_score, const ITensorInfo *input_anchors,
- ITensorInfo *output_boxes, ITensorInfo *output_classes, ITensorInfo *output_scores, ITensorInfo *num_detection,
+ static Status validate(const ITensorInfo *input_box_encoding,
+ const ITensorInfo *input_class_score,
+ const ITensorInfo *input_anchors,
+ ITensorInfo *output_boxes,
+ ITensorInfo *output_classes,
+ ITensorInfo *output_scores,
+ ITensorInfo *num_detection,
DetectionPostProcessLayerInfo info = DetectionPostProcessLayerInfo());
// Inherited methods overridden:
void run() override;
diff --git a/arm_compute/runtime/NEON/functions/NEDilate.h b/arm_compute/runtime/NEON/functions/NEDilate.h
deleted file mode 100644
index 1f2bcb50ea..0000000000
--- a/arm_compute/runtime/NEON/functions/NEDilate.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEDILATE_H
-#define ARM_COMPUTE_NEDILATE_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to execute dilate. This function calls the following NEON kernels:
- *
- * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref NEDilateKernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class NEDilate : public INESimpleFunction
-{
-public:
- /** Initialise the kernel's inputs, output and border mode.
- *
- * @param[in, out] input First tensor input. Data type supported: U8.(Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Output tensor. Data type supported: U8.
- * @param[in] border_mode Border mode to use for the convolution.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value = 0);
-};
-}
-#endif /*ARM_COMPUTE_NEDILATE_H */
diff --git a/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h
index 5b6ed55be2..3ae3b2a15c 100644
--- a/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,27 +25,22 @@
#define ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYER_H
#include "arm_compute/core/Types.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
-#include "arm_compute/runtime/Tensor.h"
#include <memory>
namespace arm_compute
{
-class NEDirectConvolutionLayerOutputStageKernel;
-class NEDirectConvolutionLayerKernel;
-class NEFillBorderKernel;
-
+class ITensor;
+class ITensorInfo;
/** Function to run the direct convolution.
*
- * This function calls the following NEON kernels:
+ * This function calls the following:
*
- * -# @ref NEFillBorderKernel for the input
- * -# @ref NEDirectConvolutionLayerOutputStageKernel
- * -# @ref NEDirectConvolutionLayerKernel
+ * -# @ref cpu::CpuDirectConv2d
*/
class NEDirectConvolutionLayer : public IFunction
{
@@ -64,6 +59,16 @@ public:
~NEDirectConvolutionLayer();
/** Set the input, weights, biases and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:------|:------|:------|:------|
+ * |F16 |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |F32 |
+ *
* @note: DirectConvolution only works in the following configurations:
* 1x1 convolution with stride_x = 1/2/3, stride_y = 1/2/3 data type = F16/F32
* 3x3 convolution with stride_x = 1/2/3, stride_y = 1/2/3 data type = F16/F32
@@ -80,7 +85,12 @@ public:
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
*/
- void configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ void configure(ITensor *input,
+ const ITensor *weights,
+ const ITensor *bias,
+ ITensor *output,
+ const PadStrideInfo &conv_info,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
/** Static function to check if given info will lead to a valid configuration of @ref NEDirectConvolutionLayer
*
* @note: DirectConvolution only works in the following configurations:
@@ -101,23 +111,20 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output, const PadStrideInfo &conv_info,
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *bias,
+ const ITensorInfo *output,
+ const PadStrideInfo &conv_info,
const ActivationLayerInfo &act_info = ActivationLayerInfo());
// Inherited methods overridden:
void run() override;
private:
- MemoryGroup _memory_group;
- std::unique_ptr<NEDirectConvolutionLayerOutputStageKernel> _output_stage_kernel;
- std::unique_ptr<NEDirectConvolutionLayerKernel> _conv_kernel;
- std::unique_ptr<NEFillBorderKernel> _input_border_handler;
- NEActivationLayer _activationlayer_function;
- Tensor _accumulator;
- bool _has_bias;
- bool _is_activationlayer_enabled;
- unsigned int _dim_split;
- bool _is_padding_required;
+ struct Impl;
+ std::shared_ptr<IMemoryManager> _memory_manager;
+ std::unique_ptr<Impl> _impl;
};
-}
+} // namespace arm_compute
#endif /* ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h b/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h
index 5c755e96ac..ebf2277d1f 100644
--- a/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h
+++ b/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,6 +25,7 @@
#define ARM_COMPUTE_NEELEMENTWISEOPERATIONS_H
#include "arm_compute/core/Types.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/NEON/INEOperator.h"
@@ -32,7 +33,7 @@ namespace arm_compute
{
class ITensor;
-/** Basic function to run @ref NEArithmeticOperationKernel for max
+/** Basic function to run @ref cpu::kernels::CpuArithmeticKernel for max
*
* @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
* @note The function performs a max operation between two tensors.
@@ -54,13 +55,29 @@ public:
NEElementwiseMax &operator=(NEElementwiseMax &&);
/** Initialise the kernel's inputs, output and conversion policy.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |S32 |S32 |S32 |
+ * |S16 |S16 |S16 |
+ * |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |
+ *
* @param[in, out] input1 First tensor input. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
* @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1.
* @param[out] output Output tensor. Data types supported: Same as @p input1.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
*/
- void configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
- /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel for max
+ void configure(ITensor *input1,
+ ITensor *input2,
+ ITensor *output,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuArithmeticKernel for max
*
* @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
* @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
@@ -69,7 +86,10 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ static Status validate(const ITensorInfo *input1,
+ const ITensorInfo *input2,
+ const ITensorInfo *output,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
// Inherited methods overridden:
void run() override;
@@ -79,7 +99,7 @@ private:
std::unique_ptr<Impl> _impl;
};
-/** Basic function to run @ref NEArithmeticOperationKernel for min
+/** Basic function to run @ref cpu::kernels::CpuArithmeticKernel for min
*
* @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
* @note The function performs a min operation between two tensors.
@@ -101,13 +121,29 @@ public:
NEElementwiseMin &operator=(NEElementwiseMin &&);
/** Initialise the kernel's inputs, output and conversion policy.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |S32 |S32 |S32 |
+ * |S16 |S16 |S16 |
+ * |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |
+ *
* @param[in, out] input1 First tensor input. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
* @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1.
* @param[out] output Output tensor. Data types supported: Same as @p input1.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
*/
- void configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
- /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel for min
+ void configure(ITensor *input1,
+ ITensor *input2,
+ ITensor *output,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuArithmeticKernel for min
*
* @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
* @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
@@ -116,7 +152,10 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ static Status validate(const ITensorInfo *input1,
+ const ITensorInfo *input2,
+ const ITensorInfo *output,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
// Inherited methods overridden:
void run() override;
@@ -126,7 +165,7 @@ private:
std::unique_ptr<Impl> _impl;
};
-/** Basic function to run @ref NEArithmeticOperationKernel for squared difference
+/** Basic function to run @ref cpu::kernels::CpuArithmeticKernel for squared difference
*
* @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
* @note The function performs a squared different operation between two tensors (i.e., out[i] = (in1[i] - in2[i])^2
@@ -148,13 +187,29 @@ public:
NEElementwiseSquaredDiff &operator=(NEElementwiseSquaredDiff &&);
/** Initialise the kernel's inputs, output and conversion policy.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |S32 |S32 |S32 |
+ * |S16 |S16 |S16 |
+ * |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |
+ *
* @param[in, out] input1 First tensor input. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
* @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1.
* @param[out] output Output tensor. Data types supported: Same as @p input1.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
*/
- void configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
- /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel for squared difference
+ void configure(ITensor *input1,
+ ITensor *input2,
+ ITensor *output,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuArithmeticKernel for squared difference
*
* @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
* @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
@@ -163,7 +218,10 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ static Status validate(const ITensorInfo *input1,
+ const ITensorInfo *input2,
+ const ITensorInfo *output,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
// Inherited methods overridden:
void run() override;
@@ -173,7 +231,7 @@ private:
std::unique_ptr<Impl> _impl;
};
-/** Basic function to run @ref NEArithmeticOperationKernel for division
+/** Basic function to run @ref cpu::kernels::CpuArithmeticKernel for division
*
* @note The tensor data type for the inputs must be F16/F32.
* @note The function performs a squared different operation between two tensors (i.e., out[i] = in1[i] / in2[i])
@@ -195,13 +253,25 @@ public:
NEElementwiseDivision &operator=(NEElementwiseDivision &&);
/** Initialise the kernel's inputs, output and conversion policy.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:--------------|:--------------|
+ * |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |
+ *
* @param[in, out] input1 First tensor input. Data types supported: F16/F32.
* @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1.
* @param[out] output Output tensor. Data types supported: Same as @p input1.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
*/
- void configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
- /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel for division
+ void configure(ITensor *input1,
+ ITensor *input2,
+ ITensor *output,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuArithmeticKernel for division
*
* @param[in] input1 First tensor input info. Data types supported: F16/F32.
* @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
@@ -210,7 +280,10 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ static Status validate(const ITensorInfo *input1,
+ const ITensorInfo *input2,
+ const ITensorInfo *output,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
// Inherited methods overridden:
void run() override;
@@ -220,7 +293,7 @@ private:
std::unique_ptr<Impl> _impl;
};
-/** Basic function to run @ref NEArithmeticOperationKernel for power
+/** Basic function to run @ref cpu::kernels::CpuArithmeticKernel for power
*
* @note The tensor data type for the inputs must be F16/F32.
* @note The function performs a elementwise power of in1 to in2 (i.e., out[i] = in1[i] ^ in2[i])
@@ -243,13 +316,25 @@ public:
NEElementwisePower &operator=(NEElementwisePower &&);
/** Initialise the kernel's inputs, output and conversion policy.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:--------------|:--------------|
+ * |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |
+ *
* @param[in, out] input1 First tensor input. Data types supported: F16/F32.
* @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1.
* @param[out] output Output tensor. Data types supported: Same as @p input1.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
*/
- void configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
- /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel for power
+ void configure(ITensor *input1,
+ ITensor *input2,
+ ITensor *output,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuArithmeticKernel for power
*
* @param[in] input1 First tensor input info. Data types supported: F16/F32.
* @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
@@ -258,7 +343,10 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ static Status validate(const ITensorInfo *input1,
+ const ITensorInfo *input2,
+ const ITensorInfo *output,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
// Inherited methods overridden:
void run() override;
@@ -268,7 +356,7 @@ private:
std::unique_ptr<Impl> _impl;
};
-/** Basic function to run @ref NEComparisonOperationKernel.
+/** Basic function to run @ref cpu::kernels::CpuComparisonKernel.
*
* @note The tensor data type for the inputs must be U8/QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
* @note The function performs a comparison operation between two tensors.
@@ -290,13 +378,27 @@ public:
NEElementwiseComparison &operator=(NEElementwiseComparison &&);
/** Initialise the kernel's inputs, output and conversion policy.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:--------------|:-----|
+ * |QASYMM8 |QASYMM8 |U8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |U8 |
+ * |S32 |S32 |U8 |
+ * |U8 |U8 |U8 |
+ * |S16 |S16 |U8 |
+ * |F16 |F16 |U8 |
+ * |F32 |F32 |U8 |
+ *
* @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
* @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1.
* @param[out] output Output tensor. Data types supported: U8.
* @param[in] op Comparison Operation to be performed.
*/
void configure(ITensor *input1, ITensor *input2, ITensor *output, ComparisonOperation op);
- /** Static function to check if given info will lead to a valid configuration of @ref NEComparisonOperationKernel
+ /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuComparisonKernel
*
* @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
* @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
@@ -305,7 +407,8 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ComparisonOperation op);
+ static Status
+ validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ComparisonOperation op);
// Inherited methods overridden:
void run() override;
@@ -315,7 +418,7 @@ private:
std::unique_ptr<Impl> _impl;
};
-/** Basic function to run @ref NEComparisonOperationKernel
+/** Basic function to run @ref cpu::kernels::CpuComparisonKernel
*
* @note The tensor data type for the inputs must be U8/QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
* @note The function performs a comparison operation between two tensors.
@@ -343,7 +446,7 @@ public:
* @param[out] output Output tensor. Data types supported: U16/U32.
*/
void configure(ITensor *input1, ITensor *input2, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEComparisonOperationKernel
+ /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuComparisonKernel
*
* @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
* @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
@@ -374,206 +477,5 @@ using NELess = NEElementwiseComparisonStatic<ComparisonOperation::Less>;
/** Basic function to run less-equal comparison. */
using NELessEqual = NEElementwiseComparisonStatic<ComparisonOperation::LessEqual>;
-namespace experimental
-{
-/** Basic function to run @ref NEArithmeticOperationKernel for max
- *
- * @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
- * @note The function performs a max operation between two tensors.
- */
-class NEElementwiseMax : public INEOperator
-{
-public:
- /** Initialise the kernel's inputs, output and conversion policy.
- *
- * @param[in, out] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
- * @param[in, out] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[out] output Output tensor info. Data types supported: Same as @p input1.
- */
- void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel for max
- *
- * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
- * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[in] output Output tensor info. Data types supported: Same as @p input1.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
-};
-
-/** Basic function to run @ref NEArithmeticOperationKernel for min
- *
- * @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
- * @note The function performs a min operation between two tensors.
- */
-class NEElementwiseMin : public INEOperator
-{
-public:
- /** Initialise the kernel's inputs, output and conversion policy.
- *
- * @param[in, out] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
- * @param[in, out] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[out] output Output tensor info. Data types supported: Same as @p input1.
- */
- void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel for min
- *
- * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
- * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[in] output Output tensor info. Data types supported: Same as @p input1.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
-};
-
-/** Basic function to run @ref NEArithmeticOperationKernel for squared difference
- *
- * @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
- * @note The function performs a squared different operation between two tensors (i.e., out[i] = (in1[i] - in2[i])^2
- */
-class NEElementwiseSquaredDiff : public INEOperator
-{
-public:
- /** Initialise the kernel's inputs, output and conversion policy.
- *
- * @param[in, out] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
- * @param[in, out] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[out] output Output tensor info. Data types supported: Same as @p input1.
- */
- void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel for squared difference
- *
- * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
- * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[in] output Output tensor info. Data types supported: Same as @p input1.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
-};
-
-/** Basic function to run @ref NEArithmeticOperationKernel for division
- *
- * @note The tensor data type for the inputs must be S32/F16/F32.
- * @note The function performs a division operation between two tensors (i.e., out[i] = in1[i] / in2[i])
- */
-class NEElementwiseDivision : public INEOperator
-{
-public:
- /** Initialise the kernel's inputs, output and conversion policy.
- *
- * @param[in, out] input1 First tensor input info. Data types supported: S32/F16/F32.
- * @param[in, out] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[out] output Output tensor info. Data types supported: Same as @p input1.
- */
- void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel for division
- *
- * @param[in] input1 First tensor input info. Data types supported: S32/F16/F32.
- * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[in] output Output tensor info. Data types supported: Same as @p input1.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
-};
-
-/** Basic function to run @ref NEArithmeticOperationKernel for power
- *
- * @note The tensor data type for the inputs must be F16/F32.
- * @note The function performs a elementwise power of in1 to in2 (i.e., out[i] = in1[i] ^ in2[i])
- * @note For an exponent that is a float, this function will only work with a positive base.
- */
-class NEElementwisePower : public INEOperator
-{
-public:
- /** Initialise the kernel's inputs, output and conversion policy.
- *
- * @param[in, out] input1 First tensor input info. Data types supported: F16/F32.
- * @param[in, out] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[out] output Output tensor info. Data types supported: Same as @p input1.
- */
- void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel for power
- *
- * @param[in] input1 First tensor input info. Data types supported: F16/F32.
- * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[in] output Output tensor info. Data types supported: Same as @p input1.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
-};
-
-/** Basic function to run @ref NEComparisonOperationKernel.
- *
- * @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
- * @note The function performs a comparison operation between two tensors.
- */
-class NEElementwiseComparison : public INEOperator
-{
-public:
- /** Initialise the kernel's inputs, output and conversion policy.
- *
- * @param[in, out] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
- * @param[in, out] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[out] output Output tensor info. Data types supported: U16/U32.
- * @param[in] op Comparison Operation to be performed.
- */
- void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, ComparisonOperation op);
- /** Static function to check if given info will lead to a valid configuration of @ref NEComparisonOperationKernel
- *
- * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
- * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[in] output Output tensor info. Data types supported: U16/U32.
- * @param[in] op Comparison Operation to be performed.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ComparisonOperation op);
-};
-
-/** Basic function to run @ref NEComparisonOperationKernel
- *
- * @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
- * @note The function performs a comparison operation between two tensors.
- */
-template <ComparisonOperation op>
-class NEElementwiseComparisonStatic : public INEOperator
-{
-public:
- /** Initialise the kernel's inputs, output and conversion policy.
- *
- * @param[in, out] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
- * @param[in, out] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[out] output Output tensor info. Data types supported: U16/U32.
- */
- void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEComparisonOperationKernel
- *
- * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
- * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[in] output Output tensor info. Data types supported: U16/U32.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
-};
-
-/** Basic function to run equal comparison. */
-using NEEqual = NEElementwiseComparisonStatic<ComparisonOperation::Equal>;
-/** Basic function to run not equal comparison. */
-using NENotEqual = NEElementwiseComparisonStatic<ComparisonOperation::NotEqual>;
-/** Basic function to run greater comparison. */
-using NEGreater = NEElementwiseComparisonStatic<ComparisonOperation::Greater>;
-/** Basic function to run greater-equal comparison. */
-using NEGreaterEqual = NEElementwiseComparisonStatic<ComparisonOperation::GreaterEqual>;
-/** Basic function to run less comparison. */
-using NELess = NEElementwiseComparisonStatic<ComparisonOperation::Less>;
-/** Basic function to run less-equal comparison. */
-using NELessEqual = NEElementwiseComparisonStatic<ComparisonOperation::LessEqual>;
-} // namespace experimental
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEELEMENTWISEOPERATIONS_H */
diff --git a/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h b/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h
index 46a7316705..63e47b8377 100644
--- a/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,151 +25,72 @@
#define ARM_COMPUTE_NEELEMENTWISEUNARYLAYER_H
#include "arm_compute/core/Error.h"
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
namespace arm_compute
{
class ITensor;
class ITensorInfo;
-
-/** Basic function to perform inverse square root on an input tensor. */
-class NERsqrtLayer : public INESimpleFunctionNoBorder
-{
-public:
- /** Initialize the function
- *
- * @param[in] input Input tensor. Data types supported: F16/F32.
- * @param[out] output Output tensor. Data types supported: same as @p input.
- */
- void configure(const ITensor *input, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NERsqrtLayer
- *
- * @param[in] input First tensor input info. Data types supported: F16/F32.
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-};
-
-/** Basic function to perform exponential on an input tensor. */
-class NEExpLayer : public INESimpleFunctionNoBorder
+/** Basic function to perform unary elementwise operations */
+template <ElementWiseUnary op>
+class NEElementwiseUnaryLayer : public IFunction
{
public:
- /** Initialize the function
- *
- * @param[in] input Input tensor. Data types supported: F16/F32.
- * @param[out] output Output tensor. Data types supported: same as @p input.
- */
- void configure(const ITensor *input, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEExpLayer
- *
- * @param[in] input First tensor input info. Data types supported: F16/F32.
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-};
+ /** Default Constructor */
+ NEElementwiseUnaryLayer();
+ /** Default Destructor */
+ ~NEElementwiseUnaryLayer();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEElementwiseUnaryLayer(const NEElementwiseUnaryLayer &) = delete;
+ /** Default move constructor */
+ NEElementwiseUnaryLayer(NEElementwiseUnaryLayer &&);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEElementwiseUnaryLayer &operator=(const NEElementwiseUnaryLayer &) = delete;
+ /** Default move assignment operator */
+ NEElementwiseUnaryLayer &operator=(NEElementwiseUnaryLayer &&);
-/** Basic function to negate an input tensor. */
-class NENegLayer : public INESimpleFunctionNoBorder
-{
-public:
/** Initialize the function
*
- * @param[in] input Input tensor. Data types supported: F16/F32/S32.
- * @param[out] output Output tensor. Data types supported: same as @p input.
- */
- void configure(const ITensor *input, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NENegLayer
- *
- * @param[in] input First tensor input info. Data types supported: F16/F32/S32.
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
+ * Valid data layouts:
+ * - All
*
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-};
-
-/** Basic function to compute the natural logarithm of an input tensor. */
-class NELogLayer : public INESimpleFunctionNoBorder
-{
-public:
- /** Initialize the function
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |F16 |F16 |
+ * |F32 |F32 |
+ * |S32 |S32 |
*
- * @param[in] input Input tensor. Data types supported: F16/F32.
- * @param[out] output Output tensor. Data types supported: same as @p input.
+ * @param[in] input Input tensor. Data types supported: F16/F32, F16/F32/S32 for NEG/ABS operations.
+ * @param[out] output Output tensor. Data types supported: Same as @p input.
*/
void configure(const ITensor *input, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NELogLayer
+ /** Static function to check if given info will lead to a valid configuration
*
- * @param[in] input First tensor input info. Data types supported: F16/F32.
+ * @param[in] input Input tensor info. Data types supported: F16/F32, F16/F32/S32 for NEG/ABS operations.
* @param[in] output Output tensor info. Data types supported: Same as @p input.
*
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-};
+ // Inherited methods overridden:
+ void run() override;
-/** Basic function to compute the absolute value of an input tensor. */
-class NEAbsLayer : public INESimpleFunctionNoBorder
-{
-public:
- /** Initialize the function
- *
- * @param[in] input Input tensor. Data types supported: F16/F32/S32.
- * @param[out] output Output tensor. Data types supported: same as @p input.
- */
- void configure(const ITensor *input, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEAbsLayer
- *
- * @param[in] input First tensor input info. Data types supported: F16/F32/S32.
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
-/** Basic function to compute the round value elementwise of an input tensor. */
-class NERoundLayer : public INESimpleFunctionNoBorder
-{
-public:
- /** Initialize the function
- *
- * @param[in] input Input tensor. Data types supported: F16/F32.
- * @param[out] output Output tensor. Data types supported: same as @p input.
- */
- void configure(const ITensor *input, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NERoundLayer
- *
- * @param[in] input First tensor input info. Data types supported: F16/F32.
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-};
+using NERsqrtLayer = NEElementwiseUnaryLayer<ElementWiseUnary::RSQRT>;
+using NEExpLayer = NEElementwiseUnaryLayer<ElementWiseUnary::EXP>;
+using NENegLayer = NEElementwiseUnaryLayer<ElementWiseUnary::NEG>;
+using NELogLayer = NEElementwiseUnaryLayer<ElementWiseUnary::LOG>;
+using NEAbsLayer = NEElementwiseUnaryLayer<ElementWiseUnary::ABS>;
+using NERoundLayer = NEElementwiseUnaryLayer<ElementWiseUnary::ROUND>;
+using NESinLayer = NEElementwiseUnaryLayer<ElementWiseUnary::SIN>;
-/** Basic function to compute the sine of an input tensor. */
-class NESinLayer : public INESimpleFunctionNoBorder
-{
-public:
- /** Initialize the function
- *
- * @param[in] input Input tensor. Data types supported: F16/F32.
- * @param[out] output Output tensor. Data types supported: same as @p input.
- */
- void configure(const ITensor *input, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NESinLayer
- *
- * @param[in] input First tensor input info. Data types supported: F16/F32.
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEELEMENTWISEUNARYLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h b/arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h
deleted file mode 100644
index e81b4ce33a..0000000000
--- a/arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEEQUALIZEHISTOGRAM_H
-#define ARM_COMPUTE_NEEQUALIZEHISTOGRAM_H
-
-#include "arm_compute/runtime/Distribution1D.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/Lut.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-class NEHistogramKernel;
-class NECumulativeDistributionKernel;
-class NETableLookupKernel;
-using IImage = ITensor;
-
-/** Basic function to execute histogram equalization. This function calls the following NEON kernels:
- *
- * -# @ref NEHistogramKernel
- * -# @ref NECumulativeDistributionKernel
- * -# @ref NETableLookupKernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class NEEqualizeHistogram : public IFunction
-{
-public:
- /** Default Constructor. */
- NEEqualizeHistogram();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEEqualizeHistogram(const NEEqualizeHistogram &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEEqualizeHistogram &operator=(const NEEqualizeHistogram &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEEqualizeHistogram(NEEqualizeHistogram &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEEqualizeHistogram &operator=(NEEqualizeHistogram &&) = delete;
- /** Default destructor */
- ~NEEqualizeHistogram();
- /** Initialise the kernel's inputs.
- *
- * @note Currently the width of the input image must be a multiple of 16.
- *
- * @param[in] input Input image. Data type supported: U8.
- * @param[out] output Output image. Data type supported: same as @p input
- */
- void configure(const IImage *input, IImage *output);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- std::unique_ptr<NEHistogramKernel> _histogram_kernel; /**< Kernel that calculates the histogram of input. */
- std::unique_ptr<NECumulativeDistributionKernel> _cd_histogram_kernel; /**< Kernel that calculates the cumulative distribution
- and creates the relevant LookupTable. */
- std::unique_ptr<NETableLookupKernel> _map_histogram_kernel; /**< Kernel that maps the input to output using the lut. */
- Distribution1D _hist; /**< Distribution that holds the histogram of the input image. */
- Distribution1D _cum_dist; /**< Distribution that holds the cummulative distribution of the input histogram. */
- Lut _cd_lut; /**< Holds the equalization lookuptable. */
- static constexpr uint32_t nr_bins{ 256 }; /**< Histogram bins of the internal histograms. */
- static constexpr uint32_t max_range{ nr_bins - 1 }; /**< Histogram range of the internal histograms. */
-};
-}
-#endif /*ARM_COMPUTE_NEEQUALIZEHISTOGRAM_H */
diff --git a/arm_compute/runtime/NEON/functions/NEErode.h b/arm_compute/runtime/NEON/functions/NEErode.h
deleted file mode 100644
index b81da4e5b6..0000000000
--- a/arm_compute/runtime/NEON/functions/NEErode.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEERODE_H
-#define ARM_COMPUTE_NEERODE_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to execute erode. This function calls the following NEON kernels:
- *
- * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref NEErodeKernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class NEErode : public INESimpleFunction
-{
-public:
- /** Initialise the kernel's inputs, output and border mode
- *
- * @param[in, out] input First tensor input. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Output tensor. Data type supported: U8.
- * @param[in] border_mode Border mode to use for the convolution.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value = 0);
-};
-}
-#endif /*ARM_COMPUTE_NEERODE_H */
diff --git a/arm_compute/runtime/NEON/functions/NEFFT1D.h b/arm_compute/runtime/NEON/functions/NEFFT1D.h
index 4b6cc3fd18..99c6fd4eb4 100644
--- a/arm_compute/runtime/NEON/functions/NEFFT1D.h
+++ b/arm_compute/runtime/NEON/functions/NEFFT1D.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,9 +24,8 @@
#ifndef ARM_COMPUTE_NEFFT1D_H
#define ARM_COMPUTE_NEFFT1D_H
-#include "arm_compute/runtime/IFunction.h"
-
#include "arm_compute/runtime/FunctionDescriptors.h"
+#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/Tensor.h"
@@ -40,7 +39,7 @@ class NEFFTDigitReverseKernel;
class NEFFTRadixStageKernel;
class NEFFTScaleKernel;
-/** Basic function to execute one dimensional FFT. This function calls the following NEON kernels:
+/** Basic function to execute one dimensional FFT. This function calls the following kernels:
*
* -# @ref NEFFTDigitReverseKernel Performs digit reverse
* -# @ref NEFFTRadixStageKernel A list of FFT kernels depending on the radix decomposition
@@ -63,6 +62,14 @@ public:
~NEFFT1D();
/** Initialise the function's source and destinations.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:------|:------|
+ * |F32 |F32 |
+ *
* @param[in] input Source tensor. Data types supported: F32. Number of channels supported: 1 (real tensor) or 2 (complex tensor).
* @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input.
* Number of channels supported: 1 (real tensor) or 2 (complex tensor).If @p input is real, @p output must be complex.
diff --git a/arm_compute/runtime/NEON/functions/NEFFT2D.h b/arm_compute/runtime/NEON/functions/NEFFT2D.h
index 18e72c1a2f..cefd3df17a 100644
--- a/arm_compute/runtime/NEON/functions/NEFFT2D.h
+++ b/arm_compute/runtime/NEON/functions/NEFFT2D.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,9 +24,8 @@
#ifndef ARM_COMPUTE_NEFFT2D_H
#define ARM_COMPUTE_NEFFT2D_H
-#include "arm_compute/runtime/IFunction.h"
-
#include "arm_compute/runtime/FunctionDescriptors.h"
+#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/NEON/functions/NEFFT1D.h"
#include "arm_compute/runtime/Tensor.h"
@@ -36,7 +35,7 @@ namespace arm_compute
// Forward declaration
class ITensor;
-/** Basic function to execute two dimensional FFT. This function calls the following NEON kernels:
+/** Basic function to execute two dimensional FFT. This function calls the following kernels:
*
* -# @ref NEFFT1D 1D FFT is performed on the first given axis
* -# @ref NEFFT1D 1D FFT is performed on the second given axis
@@ -58,6 +57,14 @@ public:
~NEFFT2D();
/** Initialise the function's source and destinations
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:------|:------|
+ * |F32 |F32 |
+ *
* @param[in] input Source tensor. Data types supported: F32.
* @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input.
* @param[in] config FFT related configuration
diff --git a/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h
index 37750e243b..84bfe6b02f 100644
--- a/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,9 +24,8 @@
#ifndef ARM_COMPUTE_NEFFTCONVOLUTIONLAYER_H
#define ARM_COMPUTE_NEFFTCONVOLUTIONLAYER_H
-#include "arm_compute/runtime/IFunction.h"
-
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
#include "arm_compute/runtime/NEON/functions/NEFFT2D.h"
@@ -43,7 +42,7 @@ namespace arm_compute
// Forward declarations
class ITensor;
-/** Basic function to execute FFT-based convolution on NEON. This function calls the following NEON functions/kernels:
+/** Basic function to execute FFT-based convolution on CPU. This function calls the following functions/kernels:
*
* -# @ref NEPermute Permute input if NHWC(only NCHW is supported).
* -# @ref NEPadLayer Pad input.
@@ -73,38 +72,58 @@ public:
~NEFFTConvolutionLayer();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:------|:------|
+ * |F32 |F32 |
+ *
* @note: This function only works with any square kernel size and unit strides for both NCHW and NHWC data layout
*
- * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
- * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].Data type supported: Same as @p input
- * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
- * Data types supported: Same as @p input.
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of inputs.
+ * Data types supported: F32.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
+ * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].Data type supported: Same as @p input
+ * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
+ * Data types supported: Same as @p input.
+ * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ * @param[in] enable_fast_math (Optional) Enable fast math computation. Unused for CPU backend.
*/
- void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
- const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ void configure(ITensor *input,
+ const ITensor *weights,
+ const ITensor *biases,
+ ITensor *output,
+ const PadStrideInfo &conv_info,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ bool enable_fast_math = false);
/** Static function to check if given info will lead to a valid configuration of @ref NEFFTConvolutionLayer
*
* @note: This function only works with any square kernel size and unit strides for both NCHW and NHWC data layout
*
- * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
- * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].Data type supported: Same as @p input
- * @param[in] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
- * Data types supported: Same as @p input.
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of inputs.
+ * Data types supported: F32.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
+ * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].Data type supported: Same as @p input
+ * @param[in] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
+ * Data types supported: Same as @p input.
+ * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ * @param[in] enable_fast_math (Optional) Enable fast math computation. Unused for CPU backend.
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *biases,
+ const ITensorInfo *output,
+ const PadStrideInfo &conv_info,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ bool enable_fast_math = false);
// Inherited methods overridden:
void run() override;
diff --git a/arm_compute/runtime/NEON/functions/NEFastCorners.h b/arm_compute/runtime/NEON/functions/NEFastCorners.h
deleted file mode 100644
index e86a87eb7e..0000000000
--- a/arm_compute/runtime/NEON/functions/NEFastCorners.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEFASTCORNERS_H
-#define ARM_COMPUTE_NEFASTCORNERS_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/Array.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/Tensor.h"
-
-#include <cstdint>
-#include <memory>
-
-namespace arm_compute
-{
-class ITensor;
-class NENonMaximaSuppression3x3Kernel;
-class NEFastCornersKernel;
-class NEFillBorderKernel;
-class NEFillArrayKernel;
-using IImage = ITensor;
-
-/** Basic function to execute fast corners. This function call the following NEON kernels:
- *
- * -# @ref NEFastCornersKernel
- * -# @ref NENonMaximaSuppression3x3Kernel (executed if nonmax_suppression == true)
- * -# @ref NEFillArrayKernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class NEFastCorners : public IFunction
-{
-public:
- /** Constructor */
- NEFastCorners(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEFastCorners(const NEFastCorners &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEFastCorners &operator=(const NEFastCorners &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEFastCorners(NEFastCorners &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEFastCorners &operator=(NEFastCorners &&) = delete;
- /** Default destructor */
- ~NEFastCorners();
- /** Initialize the function's source, destination, conv and border_mode.
- *
- * @param[in, out] input Source image. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[in] threshold Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3.
- * @param[in] nonmax_suppression If true, non-maximum suppression is applied to detected corners before being placed in the array.
- * @param[out] corners Array of keypoints to store the results.
- * @param[in] border_mode Strategy to use for borders.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(IImage *input, float threshold, bool nonmax_suppression, KeyPointArray *corners,
- BorderMode border_mode, uint8_t constant_border_value = 0);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- MemoryGroup _memory_group;
- std::unique_ptr<NEFastCornersKernel> _fast_corners_kernel;
- std::unique_ptr<NEFillBorderKernel> _border_handler;
- std::unique_ptr<NENonMaximaSuppression3x3Kernel> _nonmax_kernel;
- std::unique_ptr<NEFillArrayKernel> _fill_kernel;
- Image _output;
- Image _suppressed;
- bool _non_max;
-};
-}
-#endif /*ARM_COMPUTE_NEFASTCORNERS_H */
diff --git a/arm_compute/runtime/NEON/functions/NEFill.h b/arm_compute/runtime/NEON/functions/NEFill.h
index 14d690f419..1829c71fef 100644
--- a/arm_compute/runtime/NEON/functions/NEFill.h
+++ b/arm_compute/runtime/NEON/functions/NEFill.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,22 +26,51 @@
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
namespace arm_compute
{
class ITensor;
-/** Basic function to run @ref NEMemsetKernel */
-class NEFill : public INESimpleFunctionNoBorder
+/** Basic function to run @ref cpu::kernels::CpuFillKernel */
+class NEFill : public IFunction
{
public:
+ /** Default Constructor */
+ NEFill();
+ /** Default Destructor */
+ ~NEFill();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEFill(const NEFill &) = delete;
+ /** Default move constructor */
+ NEFill(NEFill &&);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEFill &operator=(const NEFill &) = delete;
+ /** Default move assignment operator */
+ NEFill &operator=(NEFill &&);
/** Initialize the function
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:------|:------|
+ * |All |All |
+ *
* @param[in,out] tensor Source tensor. Data types supported: All
* @param[in] constant_value Constant value to use to fill tensor.
*/
void configure(ITensor *tensor, PixelValue constant_value);
+
+ // Inherited methods overridden
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_FILL_H */
diff --git a/arm_compute/runtime/NEON/functions/NEFillBorder.h b/arm_compute/runtime/NEON/functions/NEFillBorder.h
index e9a08ef7ec..44b1d4a62b 100644
--- a/arm_compute/runtime/NEON/functions/NEFillBorder.h
+++ b/arm_compute/runtime/NEON/functions/NEFillBorder.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -27,6 +27,7 @@
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
+
#include <memory>
namespace arm_compute
@@ -39,8 +40,17 @@ class NEFillBorderKernel;
class NEFillBorder : public IFunction
{
public:
+ NEFillBorder();
/** Initialize the function's source, destination and border_mode.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |All |All |
+ *
* @note This function fills the borders within the XY-planes.
*
* @param[in, out] input Source tensor. Data type supported: All
@@ -48,7 +58,10 @@ public:
* @param[in] border_mode Strategy to use for borders.
* @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
*/
- void configure(ITensor *input, unsigned int border_width, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue());
+ void configure(ITensor *input,
+ unsigned int border_width,
+ BorderMode border_mode,
+ const PixelValue &constant_border_value = PixelValue());
// Inherited methods overridden:
void run() override;
diff --git a/arm_compute/runtime/NEON/functions/NEFlattenLayer.h b/arm_compute/runtime/NEON/functions/NEFlattenLayer.h
index 9f0d5226de..3e92143824 100644
--- a/arm_compute/runtime/NEON/functions/NEFlattenLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEFlattenLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,7 +25,9 @@
#define ARM_COMPUTE_NEFLATTENLAYER_H
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
namespace arm_compute
{
@@ -33,11 +35,30 @@ class ITensor;
class ITensorInfo;
/** Basic function to execute flatten layer kernel. */
-class NEFlattenLayer : public INESimpleFunctionNoBorder
+class NEFlattenLayer : public IFunction
{
public:
+ NEFlattenLayer();
+ /** Destructor */
+ ~NEFlattenLayer();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEFlattenLayer(const NEFlattenLayer &) = delete;
+ /** Default move constructor */
+ NEFlattenLayer(NEFlattenLayer &&);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEFlattenLayer &operator=(const NEFlattenLayer &) = delete;
+ /** Default move assignment operator */
+ NEFlattenLayer &operator=(NEFlattenLayer &&);
/** Initialise the kernel's input and output.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |All |All |
+ *
* @param[in] input First input tensor to flatten with at least 3 dimensions. The dimensions over the third will be interpreted as batches. Data types supported: All
* @param[out] output Output tensor with shape [w*h*d, input_batches] where:
* w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input
@@ -54,6 +75,13 @@ public:
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
diff --git a/arm_compute/runtime/NEON/functions/NEFloor.h b/arm_compute/runtime/NEON/functions/NEFloor.h
index 7f4248eadb..77ac484bab 100644
--- a/arm_compute/runtime/NEON/functions/NEFloor.h
+++ b/arm_compute/runtime/NEON/functions/NEFloor.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,21 +24,44 @@
#ifndef ARM_COMPUTE_NEFLOOR_H
#define ARM_COMPUTE_NEFLOOR_H
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
namespace arm_compute
{
+// Forward declarations
class ITensor;
class ITensorInfo;
-/** Basic function to run @ref NEFloorKernel */
-class NEFloor : public INESimpleFunctionNoBorder
+/** Basic function to run @ref cpu::kernels::CpuFloorKernel */
+class NEFloor : public IFunction
{
public:
+ /** Constructor */
+ NEFloor();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEFloor(const NEFloor &) = delete;
+ /** Default move constructor */
+ NEFloor(NEFloor &&);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEFloor &operator=(const NEFloor &) = delete;
+ /** Default move assignment operator */
+ NEFloor &operator=(NEFloor &&);
+ /** Destructor */
+ ~NEFloor();
/** Set the source, destination of the kernel
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:------|:------|
+ * |F32 |F32 |
+ * |F16 |F16 |
+ *
* @param[in] input Source tensor. Data type supported: F16/F32.
* @param[out] output Destination tensor. Same as @p input
*/
@@ -51,6 +74,13 @@ public:
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+
+ // Inherited methods overridden
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEFLOOR_H */
diff --git a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
index 0a7748a94b..885f8430cf 100644
--- a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,57 +24,20 @@
#ifndef ARM_COMPUTE_NEFULLYCONNECTEDLAYER_H
#define ARM_COMPUTE_NEFULLYCONNECTEDLAYER_H
+#include "arm_compute/function_info/FullyConnectedLayerInfo.h"
#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h"
-#include "arm_compute/runtime/NEON/functions/NEFlattenLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMM.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h"
+#include "arm_compute/runtime/IMemoryManager.h"
+#include "arm_compute/runtime/IWeightsManager.h"
+#include "arm_compute/runtime/NEON/functions/NETranspose.h"
#include "arm_compute/runtime/Tensor.h"
-namespace arm_compute
-{
-class NEFlattenLayerKernel;
+#include <memory>
-/** Basic function to reshape the weights of Fully Connected layer with NEON. This function calls the following kernels:
- *
- * @note The fully connected layer accepts "weights" tensors only with 2 dimensions.
- */
-class NEFullyConnectedLayerReshapeWeights : public INESimpleFunctionNoBorder
+namespace arm_compute
{
-public:
- /** Constructor */
- NEFullyConnectedLayerReshapeWeights() = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEFullyConnectedLayerReshapeWeights(const NEFullyConnectedLayerReshapeWeights &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEFullyConnectedLayerReshapeWeights &operator=(const NEFullyConnectedLayerReshapeWeights &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEFullyConnectedLayerReshapeWeights(NEFullyConnectedLayerReshapeWeights &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEFullyConnectedLayerReshapeWeights &operator=(NEFullyConnectedLayerReshapeWeights &&) = delete;
- /** Default destructor */
- ~NEFullyConnectedLayerReshapeWeights() = default;
- /** Set the input and output tensors.
- *
- * @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[out] output Destination tensor. Data type supported: Same as @p input.
- */
- void configure(const ITensor *input, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEFullyConnectedLayerReshapeWeights
- *
- * @param[in] input Weights tensor info. The weights must be 2 dimensional. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] output Destination tensor info. Data type supported: Same as @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-};
-
namespace weights_transformations
{
-/** Basic function to manage the reshape weights generated from @ref NEFullyConnectedLayerReshapeWeights */
+/** Basic function to manage the reshape weights generated from @ref NETranspose */
class NEFullyConnectedLayerReshapeWeightsManaged : public ITransformWeights
{
public:
@@ -106,17 +69,17 @@ public:
}
private:
- static constexpr uint32_t _uid = 0x0;
- Tensor _output{};
- NEFullyConnectedLayerReshapeWeights _func{};
+ static constexpr uint32_t _uid = 0x0;
+ Tensor _output{};
+ NETranspose _func{};
};
} // namespace weights_transformations
-/** Basic function to compute a Fully Connected layer on NEON. This function calls the following NEON kernels:
- * -# @ref NEIm2ColKernel (called when the input comes from a convolutional layer)
- * -# @ref NEFullyConnectedLayerReshapeWeights (if @p are_weights_reshaped is set to false and transpose_weights is set to true ) (called once)
- * -# @ref NEGEMMMatrixMultiplyKernel or @ref NEGEMMLowpMatrixMultiplyCore (if quantized asymmetric)
- * -# @ref NEGEMMMatrixAdditionKernel or @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint (if quantized asymmetric) (if @p biases is not equal to nullptr)
+/** Basic function to compute a Fully Connected layer. This function calls the following kernels:
+ * -# @ref cpu::kernels::CpuIm2ColKernel (called when the input comes from a convolutional layer)
+ * -# @ref NETranspose (if @p are_weights_reshaped is set to false and transpose_weights is set to true ) (called once)
+ * -# @ref NEGEMM or @ref NEGEMMLowpMatrixMultiplyCore (if quantized asymmetric)
+ * -# @ref cpu::kernels::CpuGemmMatrixAdditionKernel or @ref NEGEMMLowpOutputStage (if quantized asymmetric) (if @p biases is not equal to nullptr)
*
* @note The fully connected layer accepts "weights" tensors only with 2 dimensions.
*/
@@ -124,7 +87,8 @@ class NEFullyConnectedLayer : public IFunction
{
public:
/** Constructor */
- NEFullyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr, IWeightsManager *weights_manager = nullptr);
+ NEFullyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr,
+ IWeightsManager *weights_manager = nullptr);
/** Prevent instances of this class from being copied (As this class contains pointers) */
NEFullyConnectedLayer(const NEFullyConnectedLayer &) = delete;
/** Prevent instances of this class from being moved (As this class contains pointers) */
@@ -137,66 +101,77 @@ public:
~NEFullyConnectedLayer();
/** Set the input and output tensors.
*
- * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] weights Weights tensor. The weights must be 2 dimensional.
- * If this function is called after a Convolution Layer, the (transposed) weights will have as many rows as the product of the first 3 input's dimensions.
- * If it is called after another FullyConnected Layer, the (transposed) weights will have as many rows as the input's first dimension.
- * Data type supported: Same as @p input.
- * @param[in] biases Bias tensor. Can be nullptr. Data type supported: Same as @p weights, S32 if @p weights is QASYMM8/QASYMM8_SIGNED.
- * @param[out] output Destination tensor. Its shape should be equal to the output of a matrix multiplication between:
- * - The output of im2col on the input and the (transposed) 2D weights, if the function is called after a Convolution Layer
- * - The input tensor and the (transposed) 2D weights, if the function is called after another FullyConnected Layer.
- * Data type supported: Same as @p input.
- * @param[in] fc_info (Optional) Fully connected layer additional info
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:--------------|:------------------|:------|:--------------|
+ * |F16 |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |F32 |
+ * |QASYMM8 |QASYMM8 |S32 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED |
+ *
+ * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] weights Weights tensor. The weights must be 2 dimensional.
+ * If this function is called after a Convolution Layer, the (transposed) weights will have as many rows as the product of the first 3 input's dimensions.
+ * If it is called after another FullyConnected Layer, the (transposed) weights will have as many rows as the input's first dimension.
+ * Data type supported: Same as @p input.
+ * @param[in] biases Bias tensor. Can be nullptr. Data type supported: Same as @p weights, S32 if @p weights is QASYMM8/QASYMM8_SIGNED.
+ * @param[out] output Destination tensor. Its shape should be equal to the output of a matrix multiplication between:
+ * - The output of im2col on the input and the (transposed) 2D weights, if the function is called after a Convolution Layer
+ * - The input tensor and the (transposed) 2D weights, if the function is called after another FullyConnected Layer.
+ * Data type supported: Same as @p input.
+ * @param[in] fc_info (Optional) Fully connected layer additional info
+ * @param[in] weights_info (Optional) Stores neccessary compute information when weights are already reshaped
*/
- void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output,
- FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo());
+ void configure(const ITensor *input,
+ const ITensor *weights,
+ const ITensor *biases,
+ ITensor *output,
+ FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo(),
+ const WeightsInfo &weights_info = WeightsInfo());
/** Static function to check if given info will lead to a valid configuration of @ref NEFullyConnectedLayer
*
- * @param[in] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] weights Weights tensor info. The weights must be 2 dimensional.
- * If this function is called after a Convolution Layer, the (transposed) weights will have as many rows as the product of the first 3 input's dimensions.
- * If it is called after another FullyConnected Layer, the (transposed) weights will have as many rows as the input's first dimension.
- * Data type supported: Same as @p input.
- * @param[in] biases Bias tensor. Can be nullptr. Data type supported: Same as @p weights, S32 if @p weights is QASYMM8/QASYMM8_SIGNED.
- * @param[in] output Destination tensor info. Its shape should be equal to the output of a matrix multiplication between:
- * - The output of im2col on the input and the (transposed) 2D weights, if the function is called after a Convolution Layer
- * - The input tensor and the (transposed) 2D weights, if the function is called after another FullyConnected Layer.
- * Data type supported: Same as @p input.
- * @param[in] fc_info (Optional) Fully connected layer additional info
+ * Similar to @ref NEFullyConnectedLayer::configure()
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
- FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo());
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *biases,
+ const ITensorInfo *output,
+ FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo(),
+ const WeightsInfo &weights_info = WeightsInfo());
+
+ /** Static function that queries whether fixed-format kernel exists for a given problem description
+ *
+ * @param[out] expected_weight_format Format in which weights should be for found fixed format kernel
+ * @param[in] input Source tensor
+ * @param[in] weights Weights tensor.
+ * @param[in] biases Bias tensor. Can be nullptr. Data type supported: Same as @p weights, S32 if @p weights is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] output Destination tensor
+ * @param[in] fc_info Fully connected layer additional info
+ * @param[in] weights_info Describes weights shape
+ *
+ * @return a status
+ */
+ static Status has_opt_impl(arm_compute::WeightFormat &expected_weight_format,
+ const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *biases,
+ const ITensorInfo *output,
+ const FullyConnectedLayerInfo &fc_info,
+ const WeightsInfo &weights_info);
//Inherited methods override
void run() override;
void prepare() override;
private:
- void configure_fc_fc(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const ActivationLayerInfo &act);
- void configure_conv_fc(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const ActivationLayerInfo &act);
- void configure_mm(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const ActivationLayerInfo &act);
-
- MemoryGroup _memory_group;
- IWeightsManager *_weights_manager;
- std::unique_ptr<NEFlattenLayerKernel> _flatten_kernel;
- NEConvertFullyConnectedWeights _convert_weights;
- weights_transformations::NEConvertFullyConnectedWeightsManaged _convert_weights_managed;
- NEFullyConnectedLayerReshapeWeights _reshape_weights_function;
- weights_transformations::NEFullyConnectedLayerReshapeWeightsManaged _reshape_weights_managed_function;
- NEGEMM _mm_gemm;
- NEGEMMLowpMatrixMultiplyCore _mm_gemmlowp;
- Tensor _flatten_output;
- Tensor _converted_weights_output;
- Tensor _reshape_weights_output;
- const ITensor *_original_weights;
- bool _are_weights_converted;
- bool _are_weights_reshaped;
- bool _is_fc_after_conv;
- bool _is_quantized_asymmetric;
- bool _is_prepared;
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEFULLYCONNECTEDLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h b/arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h
index 5dc804e240..f53b3de7f6 100644
--- a/arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h
+++ b/arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -52,6 +52,16 @@ public:
~NEFuseBatchNormalization();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |F32 |F32 |
+ * |F16 |F16 |
+ *
* @param[in] input_weights Input weights tensor for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC
* @param[in] bn_mean Batch normalization layer mean tensor. Same as @p input_weights
* @param[in] bn_var Batch normalization layer variance tensor. Same as @p input_weights
@@ -65,9 +75,16 @@ public:
* @param[in] epsilon (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f.
* @param[in] fbn_type (Optional) Fused batch normalization type. Defaults to Convolution.
*/
- void configure(const ITensor *input_weights, const ITensor *bn_mean, const ITensor *bn_var, ITensor *fused_weights, ITensor *fused_bias,
- const ITensor *input_bias = nullptr, const ITensor *bn_beta = nullptr, const ITensor *bn_gamma = nullptr,
- float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION);
+ void configure(const ITensor *input_weights,
+ const ITensor *bn_mean,
+ const ITensor *bn_var,
+ ITensor *fused_weights,
+ ITensor *fused_bias,
+ const ITensor *input_bias = nullptr,
+ const ITensor *bn_beta = nullptr,
+ const ITensor *bn_gamma = nullptr,
+ float epsilon = 0.001f,
+ FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION);
/** Static function to check if given info will lead to a valid configuration of @ref NEFuseBatchNormalization
*
* @param[in] input_weights Input weights tensor info for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC
@@ -85,10 +102,16 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input_weights, const ITensorInfo *bn_mean, const ITensorInfo *bn_var,
- const ITensorInfo *fused_weights, const ITensorInfo *fused_bias,
- const ITensorInfo *input_bias = nullptr, const ITensorInfo *bn_beta = nullptr, const ITensorInfo *bn_gamma = nullptr,
- float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION);
+ static Status validate(const ITensorInfo *input_weights,
+ const ITensorInfo *bn_mean,
+ const ITensorInfo *bn_var,
+ const ITensorInfo *fused_weights,
+ const ITensorInfo *fused_bias,
+ const ITensorInfo *input_bias = nullptr,
+ const ITensorInfo *bn_beta = nullptr,
+ const ITensorInfo *bn_gamma = nullptr,
+ float epsilon = 0.001f,
+ FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION);
// Inherited methods overridden:
void run() override;
diff --git a/arm_compute/runtime/NEON/functions/NEGEMM.h b/arm_compute/runtime/NEON/functions/NEGEMM.h
index 645ab56417..29650a5eca 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMM.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMM.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,38 +24,18 @@
#ifndef ARM_COMPUTE_NEGEMM_H
#define ARM_COMPUTE_NEGEMM_H
+#include "arm_compute/function_info/GEMMInfo.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
#include "arm_compute/runtime/IWeightsManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h"
-#include "arm_compute/runtime/Tensor.h"
#include <memory>
namespace arm_compute
{
-class NEGEMMInterleave4x4Kernel;
-class NEGEMMMatrixAdditionKernel;
-class NEGEMMMatrixMultiplyKernel;
-class NEGEMMTranspose1xWKernel;
-/** Basic function to execute GEMM on NEON. This function calls the following NEON kernels:
+/** Basic function to execute GEMM. This function calls the following kernels:
*
- * If optimized assembly is available:
- * -# @ref NEGEMMAssemblyDispatch
- * -# @ref NEActivationLayer (if alpha != 1.0)
- * Else:
- * -# @ref NEGEMMInterleave4x4Kernel (if the output tensor is a matrix)
- * -# @ref NEGEMMTranspose1xWKernel (if the output tensor is a matrix)
- * -# @ref NEGEMMMatrixMultiplyKernel
- * In both cases:
- * -# @ref NEGEMMMatrixAdditionKernel (if c != nullptr and beta != 0.0 and is not reshaped once)
- * Else:
- * -# @ref NEArithmeticAdditionKernel (if c != nullptr and is reshaped once and not optimized assembly in place)
- *
- * -# @ref NEActivationLayer (if activation is specified in GEMMInfo)
+ * -# @ref cpu::CpuGemm
*/
class NEGEMM : public IFunction
{
@@ -74,9 +54,21 @@ public:
~NEGEMM();
/** Initialise the kernel's inputs, output
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:------------|:-----------|:---------|:--------------|
+ * |F32 |F32 |F32 |F32 |
+ * |F16 |F16 |F16 |F16 |
+ * |BFLOAT16 |BFLOAT16 |BFLOAT16 |BFLOAT16 |
+ *
* @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C].
* @note GEMM: The tensors a, b, c, d must have the same data type. You should not mix data types when calling this function.
*
+ * @note Batched GEMM only supports broadcasting cases where RHS rank < LHS rank but not the other way around
+ *
* @param[in] a First input tensor (Matrix A or Vector A). Data type supported: BFLOAT16/F16/F32
* @param[in] b Second input tensor (Matrix B). Data type supported: same as @p a
* @param[in] c Third input tensor (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a
@@ -86,49 +78,49 @@ public:
* @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and
* if the reshape of matrix B should happen only for the first run
*/
- void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *d, float alpha, float beta, const GEMMInfo &gemm_info = GEMMInfo());
+ void configure(const ITensor *a,
+ const ITensor *b,
+ const ITensor *c,
+ ITensor *d,
+ float alpha,
+ float beta,
+ const GEMMInfo &gemm_info = GEMMInfo());
/** Static function to check if given info will lead to a valid configuration of @ref NEGEMM.
*
- * @param[in] a First input tensor info (Matrix or Vector A). Data types supported: BFLOAT16/F16/F32
- * @param[in] b Second input tensor info (Matrix B). Data type supported: same as @p a.
- * @param[in] c Third input tensor info (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a.
- * @param[out] output Output tensor info. Data type supported: same as @p a
- * @param[in] alpha Weight of the matrix product
- * @param[in] beta Weight of matrix C
- * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and
- * if the reshape of matrix B should happen only for the first run
+ * Similar to @ref NEGEMM::configure()
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *a,
+ const ITensorInfo *b,
+ const ITensorInfo *c,
+ const ITensorInfo *output,
+ float alpha,
+ float beta,
+ const GEMMInfo &gemm_info = GEMMInfo());
+
+ /** Static function that queries whether there exists fixed-format kernel and if it exists it will return in the first argument in what format
+ * weights are expected to be reshaped as defined by WeightFormat class. Apart from the first argument the rest of the arguments are the same
+ * as in @ref NEGEMM::validate() except that all arguments are required.
*
* @return a status
*/
- static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info = GEMMInfo());
+ static Status has_opt_impl(arm_compute::WeightFormat &expected_weight_format,
+ const ITensorInfo *a,
+ const ITensorInfo *b,
+ const ITensorInfo *c,
+ const ITensorInfo *output,
+ float alpha,
+ float beta,
+ const GEMMInfo &gemm_info = GEMMInfo());
// Inherited methods overridden:
void run() override;
void prepare() override;
private:
- MemoryGroup _memory_group;
- IWeightsManager *_weights_manager;
- std::unique_ptr<NEGEMMInterleave4x4Kernel> _interleave_kernel;
- std::unique_ptr<NEGEMMTranspose1xWKernel> _transpose_kernel;
- std::unique_ptr<NEGEMMMatrixMultiplyKernel> _mm_kernel;
- NEGEMMAssemblyDispatch _asm_glue;
- std::unique_ptr<NEGEMMMatrixAdditionKernel> _ma_kernel;
- NEActivationLayer _alpha_scale_func;
- NEArithmeticAddition _add_bias;
- NEActivationLayer _activation_func;
-
- Tensor _tmp_a;
- Tensor _tmp_b;
- Tensor _tmp_d;
- const ITensor *_original_b;
- bool _run_vector_matrix_multiplication;
- bool _run_alpha_scale;
- bool _run_addition;
- bool _run_bias_addition;
- bool _run_activation;
- bool _reshape_b_only_on_first_run;
- bool _is_prepared;
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_NEGEMM_H */
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h b/arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h
deleted file mode 100644
index 8f9498d0f5..0000000000
--- a/arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGEMMASSEMBLYDISPATCH_H
-#define ARM_COMPUTE_NEGEMMASSEMBLYDISPATCH_H
-
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/IWeightsManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/Tensor.h"
-
-namespace arm_compute
-{
-/* Convolution method supported by the assembly gemm interface */
-enum class AsmConvMethod
-{
- Im2Col,
- Indirect,
- Conv
-};
-
-struct AsmGemmInfo
-{
- AsmConvMethod method{ AsmConvMethod::Im2Col };
- PadStrideInfo ps_info{};
- ActivationLayerInfo activation_info{};
- GEMMLowpOutputStageInfo output_stage{};
- bool negated_offsets{ true };
- bool reinterpret_input_as_3d{ false };
- bool depth_output_gemm3d{ false };
- int64_t padding_top{ 0 };
- int64_t padding_left{ 0 };
- float padding_value{ 0.f };
-};
-
-/** Assembly kernel glue */
-class NEGEMMAssemblyDispatch : public IFunction
-{
-public:
- /** Constructor */
- NEGEMMAssemblyDispatch(std::shared_ptr<IMemoryManager> memory_manager = nullptr, IWeightsManager *weights_manager = nullptr);
- /** Prevent instances of this class from being copy constructed */
- NEGEMMAssemblyDispatch(const NEGEMMAssemblyDispatch &) = delete;
- /** Prevent instances of this class from being copied */
- NEGEMMAssemblyDispatch &operator=(const NEGEMMAssemblyDispatch &) = delete;
- NEGEMMAssemblyDispatch(NEGEMMAssemblyDispatch &&) = default;
- NEGEMMAssemblyDispatch &operator=(NEGEMMAssemblyDispatch &&) = default;
- ~NEGEMMAssemblyDispatch() = default;
-
- class IFallback
- {
- public:
- virtual void run() = 0;
- virtual void prepare() = 0;
- virtual bool is_configured() const = 0;
- virtual ~IFallback() = default;
- };
-
-public:
- /** If supported create a Compute Library function else fallback to the arm_gemm function.
- *
- * @param[in] a Input tensor (Matrix A)
- * @param[in] b Input tensor (Matrix B)
- * @param[in] c Input tensor (Matrix C) used to pass the bias for quantized calculations
- * @param[out] d Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0.
- * @param[in] info GEMM meta-data
- */
- void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *d, const AsmGemmInfo &info);
-
- /** Indicates whether or not this function can be used to process the given parameters.
- *
- * @param[in] a Input tensor info (Matrix A)
- * @param[in] b Input tensor info (Matrix B)
- * @param[in] c Input tensor info (Matrix C) used to pass the bias for quantized calculations
- * @param[in] d Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0.
- * @param[in] info GEMM meta-data
- *
- * @return a status.
- */
- static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *d, const AsmGemmInfo &info);
- /** Checks if activation is supported by the gemm assembly dispatcher
- *
- * @param[in] activation Activation to check
- *
- * @return True if activation is supported else false
- */
- static bool is_activation_supported(const ActivationLayerInfo &activation);
- /** Was the function successfully configured ?
- *
- * @return True if the function is configured and ready to run
- */
- bool is_configured() const;
-
- // Inherited methods overridden:
- void prepare() override;
- void run() override;
-
-private:
- std::unique_ptr<IFallback> _arm_gemm; /** Interface for the arm_gemm fallback */
- MemoryGroup _memory_group; /**< Function memory group */
- IWeightsManager *_weights_manager; /**< Pointer to the weights manager */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEGEMMASSEMBLYDISPATCH_H */
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h b/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h
index 7cae39397f..d1c5a1c9b3 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020 Arm Limited.
+ * Copyright (c) 2020-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -27,21 +27,20 @@
#include "arm_compute/runtime/FunctionDescriptors.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h"
-#include "arm_compute/runtime/NEON/functions/NEPermute.h"
-#include "arm_compute/runtime/Tensor.h"
#include <memory>
+
namespace arm_compute
{
// Forward declarations
class ITensor;
-/** Basic function to compute the convolution layer. This function calls the following NEON kernels/functions:
+class ITensorInfo;
+
+/** Basic function to compute the convolution layer. This function calls the following kernels/functions:
*
* Supports only NHWC data layout
*
- * -# @ref NEGEMMAssemblyDispatch
+ * -# @ref cpu::CpuGemmAssemblyDispatch
* -# @ref NEActivationLayer, in case activation cannot be fused in the assembly dispatch
*
* Weights are transformed from OHWI to HWIO format using the following kernels:
@@ -60,8 +59,22 @@ public:
NEGEMMConv2d &operator=(const NEGEMMConv2d &) = delete;
/** Default move assignment operator */
NEGEMMConv2d &operator=(NEGEMMConv2d &&) = default;
+ /** Destructor */
+ ~NEGEMMConv2d();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:--------------|:--------------|:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |S32 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED |
+ * |F16 |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |F32 |
+ * |BFLOAT16 |BFLOAT16 |BFLOAT16 |BFLOAT16 |
+ *
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
* while every optional dimension from 4 and above represent a batch of inputs.
* Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
@@ -73,7 +86,8 @@ public:
* Data types supported: Same as @p input.
* @param[in] info Convolution layer descriptor
*/
- void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const Conv2dInfo &info);
+ void
+ configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const Conv2dInfo &info);
/** Static function to check if given info will lead to a valid configuration of @ref NEGEMMConv2d
*
* @param[in] input Source tensor info. 3 lower dimensions represent a single input [width, height, IFM],
@@ -89,20 +103,19 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const Conv2dInfo &info);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *biases,
+ const ITensorInfo *output,
+ const Conv2dInfo &info);
// Inherited methods overridden:
void run() override;
void prepare() override;
private:
- NEGEMMAssemblyDispatch _gemm_asm_func;
- NEActivationLayer _activation_func;
- NEPermute _weights_permute_func;
- const ITensor *_original_weights;
- Tensor _permuted_weights;
- bool _is_prepared;
- bool _run_activation;
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEGEMMCONV2D_H */
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
index 59d83ed68d..3e84c3e2cf 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,147 +24,31 @@
#ifndef ARM_COMPUTE_NEGEMMCONVOLUTIONLAYER_H
#define ARM_COMPUTE_NEGEMMCONVOLUTIONLAYER_H
-#include "arm_compute/runtime/IFunction.h"
-
#include "arm_compute/core/Types.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IMemoryManager.h"
#include "arm_compute/runtime/IWeightsManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMM.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h"
-#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h"
-#include "arm_compute/runtime/Tensor.h"
#include <memory>
namespace arm_compute
{
class ITensor;
-class NECol2ImKernel;
-class NEIm2ColKernel;
-class NEWeightsReshapeKernel;
-
-/** Function to reshape the weights. This function calls the following kernel:
- * -# @ref NEWeightsReshapeKernel
- */
-class NEConvolutionLayerReshapeWeights : public IFunction
-{
-public:
- /** Constructor */
- NEConvolutionLayerReshapeWeights();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEConvolutionLayerReshapeWeights(const NEConvolutionLayerReshapeWeights &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEConvolutionLayerReshapeWeights(NEConvolutionLayerReshapeWeights &&) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEConvolutionLayerReshapeWeights &operator=(const NEConvolutionLayerReshapeWeights &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEConvolutionLayerReshapeWeights &operator=(NEConvolutionLayerReshapeWeights &&) = delete;
- /** Default destructor */
- ~NEConvolutionLayerReshapeWeights();
- /** Set the input and output tensors.
- *
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
- * Data type supported: All.
- * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
- * Data type supported: same as @p weights.
- * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types.
- * @param[out] output Destination tensor. Data types supported: same as @p weights.
- */
- void configure(const ITensor *weights, const ITensor *biases, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEConvolutionLayerReshapeWeights
- *
- * @param[in] weights Weights tensor info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
- * Data type supported: All.
- * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
- * Data type supported: same as @p weights.
- * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types.
- * @param[in] output Destination tensor. Data types supported: same as @p weights.
- *
- * @return an error status
- */
- static Status validate(const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- std::unique_ptr<NEWeightsReshapeKernel> _weights_reshape_kernel;
-};
-
-namespace weights_transformations
-{
-/** Basic function to manage the reshape weights generated from @ref NEConvolutionLayerReshapeWeights */
-class NEConvolutionLayerReshapeWeightsTransform : public ITransformWeights
-{
-public:
- /** Constructor */
- NEConvolutionLayerReshapeWeightsTransform() = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEConvolutionLayerReshapeWeightsTransform(const NEConvolutionLayerReshapeWeightsTransform &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEConvolutionLayerReshapeWeightsTransform &operator=(const NEConvolutionLayerReshapeWeightsTransform &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEConvolutionLayerReshapeWeightsTransform(NEConvolutionLayerReshapeWeightsTransform &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEConvolutionLayerReshapeWeightsTransform &operator=(NEConvolutionLayerReshapeWeightsTransform &&) = delete;
- /** Default destructor */
- ~NEConvolutionLayerReshapeWeightsTransform() = default;
- void configure(const ITensor *input, const ITensor *biases)
- {
- _bias_bit = (biases != nullptr) ? 1 : 0;
- _func.configure(input, biases, &_output);
- }
-
- void run() override
- {
- _output.allocator()->allocate();
- _func.run();
- _reshape_run = true;
- }
-
- ITensor *get_weights() override
- {
- return &_output;
- }
-
- void release() override
- {
- _output.allocator()->free();
- }
+class ITensorInfo;
- uint32_t uid() override
- {
- return ((0x8) | (_bias_bit << 7));
- }
-
- bool is_reshape_run()
- {
- return _reshape_run;
- }
-
-private:
- Tensor _output{};
- NEConvolutionLayerReshapeWeights _func{};
- int32_t _bias_bit{ 0 };
-};
-} // namespace weights_transformations
-
-/** Basic function to compute the convolution layer. This function calls the following NEON kernels/functions:
+/** Basic function to compute the convolution layer. This function calls the following kernels/functions:
*
- * -# @ref NEIm2ColKernel
- * -# @ref NEGEMM (if the data type is BFLOAT16/FP16/FP32)
- * -# @ref NEGEMMLowpMatrixMultiplyCore (if the data type is QASYMM8/QASYMM8_SIGNED)
- * -# @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint (if the data type is QASYMM8/QASYMM8_SIGNED)
- * -# @ref NEArithmeticAdditionKernel (if biases != nullptr and we have a 1x1 convolution with the NHWC data layout)
- * -# @ref NECol2ImKernel (if NCHW data layout)
+ * -# @ref cpu::CpuGemmConv2d
*
*/
class NEGEMMConvolutionLayer : public IFunction
{
public:
/** Constructor */
- NEGEMMConvolutionLayer(const std::shared_ptr<IMemoryManager> &memory_manager = nullptr, IWeightsManager *weights_manager = nullptr);
+ NEGEMMConvolutionLayer(const std::shared_ptr<IMemoryManager> &memory_manager = nullptr,
+ IWeightsManager *weights_manager = nullptr);
/** Prevent instances of this class from being copied (As this class contains pointers) */
NEGEMMConvolutionLayer(const NEGEMMConvolutionLayer &) = delete;
/** Prevent instances of this class from being moved (As this class contains non movable objects) */
@@ -177,116 +61,154 @@ public:
~NEGEMMConvolutionLayer();
/** Set the input and output tensors.
*
- * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
- * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.
- * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
- * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
- * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
- * Data types supported: Same as @p input.
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights
- * tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
- * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:--------------|:------------------|:--------|:--------------|
+ * |F16 |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |F32 |
+ * |BFLOAT16 |BFLOAT16 |BFLOAT16 |BFLOAT16 |
+ * |QASYMM8 |QASYMM8 |S32 |QASYMM8 |
+ * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED |
+ * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED |
+ *
+ * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of inputs.
+ * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
+ * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.
+ * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
+ * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
+ * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
+ * Data types supported: Same as @p input.
+ * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights
+ * tensor has also been transposed with cpu::kernels::CpuGemmTranspose1xWKernel. Data type supported: Same as @p input.
+ * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
+ * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
+ * available which may introduce a drop of accuracy as well. Default is false
+ * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported
*/
- void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(),
- const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1);
+ void configure(const ITensor *input,
+ const ITensor *weights,
+ const ITensor *biases,
+ ITensor *output,
+ const PadStrideInfo &conv_info,
+ const WeightsInfo &weights_info = WeightsInfo(),
+ const Size2D &dilation = Size2D(1U, 1U),
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ bool enable_fast_math = false,
+ unsigned int num_groups = 1);
/** Static function to check if given info will lead to a valid configuration of @ref NEGEMMConvolutionLayer
*
- * @param[in] input Source tensor info. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
- * @param[in] weights Weights tensor info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
- * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.
- * @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
- * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
- * @param[in] output Destination tensor info. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
- * Data types supported: Same as @p input.
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights
- * tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
- * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported
+ * @param[in] input Source tensor info. 3 lower dimensions represent a single input [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of inputs.
+ * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
+ * @param[in] weights Weights tensor info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
+ * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.
+ * @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
+ * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
+ * @param[in] output Destination tensor info. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
+ * Data types supported: Same as @p input.
+ * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights
+ * tensor has also been transposed with cpu::kernels::CpuGemmTranspose1xWKernel. Data type supported: Same as @p input.
+ * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
+ * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
+ * available which may introduce a drop of accuracy as well. Default is false
+ * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1);
-
- // Inherited methods overridden:
- void run() override;
- void prepare() override;
-
-private:
- /** Configures the appropriate matrix multiply routine
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *biases,
+ const ITensorInfo *output,
+ const PadStrideInfo &conv_info,
+ const WeightsInfo &weights_info = WeightsInfo(),
+ const Size2D &dilation = Size2D(1U, 1U),
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ bool enable_fast_math = false,
+ unsigned int num_groups = 1);
+
+ /** Static function to check if there is an optimized version of
+ * GEMM available for the input parameters.
*
- * @param[in] input Input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
- * @param[in] weights Weights tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.
- * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
- * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
- * @param[out] output Output tensor. Data types supported: Same as @p input,
- * except for input of QASYMM8/QASYMM8_SIGNED type where output should be of S32 type.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
- * @param[in] gemm_3d_depth (Optional) Depth of GEMM 3D (Defaults to 1)
- */
- void configure_mm(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo(), int gemm_3d_depth = 1);
- /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMConvolutionLayer matrix multiply routines
+ * The method is intended to be used to find out the optimal
+ * memory layout to be used for the weights tensor when running
+ * variable weights execution.
*
- * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
- * @param[in] weights Weights tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.
- * @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
- * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
- * @param[in] output Output tensor info. Data types supported: Same as @p input,
- * except for input of QASYMM8/QASYMM8_SIGNED type where output should be of S32 type.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
- * @param[in] gemm_3d_depth (Optional) Depth of GEMM 3D (Defaults to 1)
- * @param[in] skip_im2col (Optional) Flag which specifies if im2col has to be skipped. i.e. 1x1 convolution with NHWC data layout. (Default to false)
+ * The user can query the database of optimised kernels in
+ * arm_gemm by specifying one of the enumerations of
+ * arm_compute::WeightFormat in the weight_format field of the input
+ * parameter weights_info. In case of success, the method
+ * writes the expected format in the output parameter
+ * expected_weight_format. The expected_weight_format can than be
+ * used in the configure method of the class for retrieving the
+ * best optimal kernel.
*
- * @return a status
- */
- static Status validate_mm(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo(),
- int gemm_3d_depth = 1, bool skip_im2col = false);
- /** Static function to check if GEMM3D is supported in @ref NEGEMM or in @ref NEGEMMLowpMatrixMultiplyCore
+ * Use case one - query for a specific format:
*
- * @param[in] input_info Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
- * @param[in] weights_info Weights tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
- * @param[in] act_info Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
- * @param[in] gemm_3d_depth Depth of GEMM 3D
- * @param[in] skip_im2col Flag which specifies if im2col has to be skipped. i.e. 1x1 convolution with NHWC data layout
+ * WeightInfo weights_info(..., arm_compute::WeightFormat::OHWIo4, ...); // Set the value of the input query.
+ * if (NEGEMMConvolutionlayer::has_opt_impl(WeightFormat(), ...., weights_info, ...))
+ * {
+ * auto conv = std::unique_ptr<NEGEMMConvolutionlayer>();
+ * conv->configure(..., weights_info, ...); // uses the same WeightFormat the user wanted originally, OHWYo4.
+ * conv->run(...);
+ * }
*
- * @return a status
+ * Use case two - query for any format that would be optimal for the GEMM to execute:
+ *
+ * WeightInfo weights_info(..., arm_compute::WeightFormat::ANY, ...); // Set the value of the input query.
+ * arm_compute::WeightFormat expected_wf;
+ * if (NEGEMMConvolutionlayer::has_opt_impl(expected_wf, ...., weights_info, ...))
+ * {
+ * auto conv = std::unique_ptr<NEGEMMConvolutionlayer>();
+ * // ... code to convert the layout of the weights tensor to the layout returned by has_opt_impl
+ * WeightInfo new_weights_info(..., expected_wf, ...); // Set the value of the WeightFormat returned by has_opt_impl.
+ * conv->configure(..., new_weights_info, ...);
+ * conv->run(...);
+ * }
+ *
+ * Notice that a GEMM configured with a WeightFormat other than
+ * UNSPECIFIED will run GEMM with variable weights mode.
+ *
+ * @param[out] expected_weight_format The arm_compute::WeightFormat expected by the kernel.
+ * @param[in] src Source tensor info.
+ * @param[in] weights Weights tensor info.
+ * @param[in] biases Biases tensor info. Shared biases supported.
+ * @param[in] dst Destination tensor info.
+ * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ * @param[in] weights_info (optional) Specifies additional configuration parameters for the weights of the GEMM computation.
+ * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. And no activation (i.e. Linear) which is the default value.
+ * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
+ *
+ * @return a Status
*/
- static Status validate_gemm3d(const ITensorInfo *input_info, const ITensorInfo *weights_info, const ActivationLayerInfo &act_info, int gemm_3d_depth, bool skip_im2col);
+ static Status has_opt_impl(arm_compute::WeightFormat &expected_weight_format,
+ const ITensorInfo *src,
+ const ITensorInfo *weights,
+ const ITensorInfo *biases,
+ const ITensorInfo *dst,
+ const PadStrideInfo &conv_info,
+ const WeightsInfo &weights_info = WeightsInfo(),
+ const Size2D &dilation = Size2D(1U, 1U),
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ bool enable_fast_math = false);
+ // Inherited methods overridden:
+ void run() override;
+ void prepare() override;
private:
- MemoryGroup _memory_group;
- IWeightsManager *_weights_manager;
- NEConvolutionLayerReshapeWeights _reshape_weights;
- weights_transformations::NEConvolutionLayerReshapeWeightsTransform _reshape_weights_managed;
- std::unique_ptr<NEIm2ColKernel> _im2col_kernel;
- NEGEMM _mm_gemm;
- NEGEMMLowpMatrixMultiplyCore _mm_gemmlowp;
- std::unique_ptr<NECol2ImKernel> _col2im_kernel;
- NEReshapeLayer _reshape_layer;
-
- const ITensor *_original_weights;
-
- Tensor _im2col_output;
- Tensor _weights_reshaped;
- Tensor _gemm_output;
- Tensor _tmp_output;
-
- DataLayout _data_layout;
-
- bool _skip_im2col;
- bool _skip_col2im;
- bool _is_quantized;
- bool _is_prepared;
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
-#endif /* ARM_COMPUTE_NECONVOLUTIONGEMMLAYER_H */
+#endif /* ARM_COMPUTE_NEGEMMCONVOLUTIONLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h b/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h
deleted file mode 100644
index 7195c71063..0000000000
--- a/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGEMMINTERLEAVE4X4_H
-#define ARM_COMPUTE_NEGEMMINTERLEAVE4X4_H
-
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to execute NEGEMMInterleave4x4Kernel. This function calls the following NEON kernel:
- *
- * -# @ref NEGEMMInterleave4x4Kernel
- *
- */
-class NEGEMMInterleave4x4 : public INESimpleFunctionNoBorder
-{
-public:
- /** Initialise the kernel's inputs, output
- *
- * @param[in] input First input tensor. Data types supported: All
- * @param[out] output Output tensor. Data type supported: same as @p input
- */
- void configure(const ITensor *input, ITensor *output);
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEGEMMINTERLEAVE4X4_H */
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
index cb1d6bd782..6d07675d3d 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021, 2023-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,49 +21,34 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H
-#define ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H
+#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEGEMMLOWPMATRIXMULTIPLYCORE_H
+#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEGEMMLOWPMATRIXMULTIPLYCORE_H
-#include "NEActivationLayer.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/function_info/GEMMInfo.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h"
-#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/IWeightsManager.h"
#include <memory>
namespace arm_compute
{
class ITensor;
-class NEConvertQuantizedSignednessKernel;
-class NEConvertQuantizedSignednessKernel;
-class NEGEMMInterleave4x4Kernel;
-class NEGEMMLowpMatrixMultiplyKernel;
-class NEGEMMLowpOffsetContributionKernel;
-class NEGEMMLowpOffsetContributionOutputStageKernel;
-class NEGEMMLowpMatrixAReductionKernel;
-class NEGEMMLowpMatrixBReductionKernel;
-class NEGEMMTranspose1xWKernel;
+class ITensorInfo;
-/** Basic function to execute GEMMLowpMatrixMultiplyCore on NEON. This function calls the following NEON kernels if the DOT product instruction is not available:
+/** Function to run Gemm on quantized types.
*
- * -# @ref NEGEMMInterleave4x4Kernel
- * -# @ref NEGEMMTranspose1xWKernel
- * -# @ref NEGEMMLowpMatrixMultiplyKernel
- * -# @ref NEGEMMLowpOffsetContributionKernel
- * -# @ref NEActivationLayer
+ * This function calls the following:
*
- * otherwise if the DOT product instruction is available:
- *
- * -# @ref NEGEMMLowpOffsetContributionKernel
- *
-*/
+ * -# @ref cpu::CpuGemmLowpMatrixMultiplyCore
+ */
class NEGEMMLowpMatrixMultiplyCore : public IFunction
{
public:
/** Constructor */
- NEGEMMLowpMatrixMultiplyCore(std::shared_ptr<IMemoryManager> memory_manager = nullptr, IWeightsManager *weights_manager = nullptr);
+ NEGEMMLowpMatrixMultiplyCore(std::shared_ptr<IMemoryManager> memory_manager = nullptr,
+ IWeightsManager *weights_manager = nullptr);
/** Prevent instances of this class from being copied (As this class contains pointers) */
NEGEMMLowpMatrixMultiplyCore(const NEGEMMLowpMatrixMultiplyCore &) = delete;
/** Default move constructor */
@@ -76,6 +61,27 @@ public:
~NEGEMMLowpMatrixMultiplyCore();
/** Initialise the kernel's inputs, output
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:--------------|:------------------|:--------|:--------------|
+ * |QASYMM8 |QASYMM8 |S32 |QASYMM8 |
+ * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 |
+ * |QASYMM8 |QSYMM8 |S32 |QASYMM8 |
+ * |QASYMM8 |QASYMM8 |S32 |S32 |
+ * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |S32 |
+ * |QASYMM8 |QSYMM8 |S32 |S32 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED |
+ * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED |
+ * |QASYMM8_SIGNED |QSYMM8 |S32 |QASYMM8_SIGNED |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |S32 |
+ * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |S32 |
+ * |QASYMM8_SIGNED |QSYMM8 |S32 |S32 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |F32 |F32 |
+ *
* @note GEMM_LOWP: low precision GEMM kernel
* This kernel performs the following computations:
*
@@ -83,69 +89,36 @@ public:
* -# Convert b values from QASYMM8 to int32 add b_offset to each of them.
* -# Compute the matrix product of the resulting a * b in int32.
*
- * @note The @p output type is S32 if @p gemm_info.type == GEMMLowpOutputStageType::NONE. It is QASYMM8/QASYMM8_SIGNED otherwise
+ * @note The @p output type is S32 if @p gemm_info.type == GEMMLowpOutputStageType::NONE. It is QASYMM8/QASYMM8_SIGNED/F32 otherwise
*
* @param[in] a First input tensor (Matrix A). Data type supported: QASYMM8/QASYMM8_SIGNED.
* @param[in] b Second input tensor (Matrix B). Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL.
- * @param[in] c Third input tensor (Matrix C). It can be a nullptr. Data type supported: S32
- * @param[out] output Output tensor. Data type supported: Data type supported: S32/QASYMM8/QASYMM8_SIGNED
+ * @param[in] c Third input tensor (Matrix C). It can be a nullptr. Data type supported: S32/F32
+ * @param[out] output Output tensor. Data type supported: Data type supported: S32/QASYMM8/QASYMM8_SIGNED/F32
* @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and
* if the reshape of matrix B should be executed only for the first run
*/
- void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *output, const GEMMInfo &gemm_info = GEMMInfo());
+ void configure(
+ const ITensor *a, const ITensor *b, const ITensor *c, ITensor *output, const GEMMInfo &gemm_info = GEMMInfo());
/** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpMatrixMultiplyCore
*
- * @note The @p output type is S32 if @p gemm_info.type == GEMMLowpOutputStageType::NONE. It is QASYMM8/QASYMM8_SIGNED otherwise
- *
- * @param[in] a First input tensor info (Matrix A). Data type supported: QASYMM8/QASYMM8_SIGNED.
- * @param[in] b Second input tensor info (Matrix B). Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL.
- * @param[in] c Third input tensor info (Matrix C). It can be a nullptr. Data type supported: S32
- * @param[in] output Output tensor info. Data type supported: Data type supported: S32/QASYMM8/QASYMM8_SIGNED
- * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and
- * if the reshape of matrix B should be executed only for the first run
+ * Similar to @ref NEGEMMLowpMatrixMultiplyCore::configure()
*
* @return a status
*/
- static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info = GEMMInfo());
+ static Status validate(const ITensorInfo *a,
+ const ITensorInfo *b,
+ const ITensorInfo *c,
+ const ITensorInfo *output,
+ const GEMMInfo &gemm_info = GEMMInfo());
// Inherited methods overridden
void run() override;
void prepare() override;
private:
- MemoryGroup _memory_group;
- IWeightsManager *_weights_manager;
- NEGEMMAssemblyDispatch _asm_glue;
- std::unique_ptr<NEGEMMLowpMatrixMultiplyKernel> _mm_kernel;
- std::unique_ptr<NEGEMMInterleave4x4Kernel> _mtx_a_reshape_kernel;
- std::unique_ptr<NEGEMMTranspose1xWKernel> _mtx_b_reshape_kernel;
- std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _mtx_a_reduction_kernel;
- std::unique_ptr<NEGEMMLowpMatrixBReductionKernel> _mtx_b_reduction_kernel;
- std::unique_ptr<NEGEMMLowpOffsetContributionKernel> _offset_contribution_kernel;
- std::unique_ptr<NEGEMMLowpOffsetContributionOutputStageKernel> _offset_contribution_output_stage_kernel;
- NEActivationLayer _activation_func;
- std::unique_ptr<NEConvertQuantizedSignednessKernel> _convert_to_signed_asymm;
- std::unique_ptr<NEConvertQuantizedSignednessKernel> _convert_from_signed_asymm;
-
- Tensor _vector_sum_col;
- Tensor _vector_sum_row;
- Tensor _tmp_a;
- Tensor _tmp_b;
- Tensor _mm_result_s32;
- Tensor _signed_a;
- Tensor _signed_output;
- const ITensor *_original_b;
- int32_t _a_offset;
- int32_t _b_offset;
-
- bool _run_vector_matrix_multiplication;
- bool _assembly_path;
- bool _fused_assembly_path;
- bool _reshape_b_only_on_first_run;
- bool _is_prepared;
- bool _fuse_output_stage;
- bool _run_activation;
- bool _flip_signedness;
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H */
+#endif // ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEGEMMLOWPMATRIXMULTIPLYCORE_H
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h
index 6977d27cb6..0d932bb4af 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,9 +25,9 @@
#define ARM_COMPUTE_NEGEMMLOWPOUTPUTSTAGE_H
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+#include "arm_compute/runtime/IFunction.h"
-/** This file contains all available output stages for GEMMLowp on NEON.
+/** This file contains all available output stages for GEMMLowp.
*
* In gemmlowp, the "output stage" is the process that takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyCore),
* and processes it to obtain the final ASYMM8 value.
@@ -39,237 +39,17 @@ namespace arm_compute
{
class ITensor;
class ITensorInfo;
-
-/** Basic function to execute NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint on NEON.
- *
- * NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint depends on 3 parameters:
- *
- * result_fixedpoint_multiplier, result_shift, result_offset_after_shift
- *
- * The final result is:
- *
- * (FixedPointMul(input[i][k], result_fixedpoint_multiplier) >> result_shift) + result_offset_after_shift
- *
- * where FixedPointMul(x, y) is the nearest integer to the following
- * mathematical expression, evaluated without overflow or intermediate rounding:
- *
- * (x * y) / 2^31
- *
- * For more information: https://github.com/google/gemmlowp/blob/master/public/output_stages.h#L68
- *
- * In case the bias tensor is provided, the final result is:
- *
- * ((FixedPointMul(input[i][k] + bias[k], result_fixedpoint_multiplier)) >> result_shift) + result_offset_after_shift
- *
- * This function calls the following NEON kernels:
- *
- * -# @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel
- *
- * @note The function accepts also 2 optional input arguments (min and max) which can be used to implement "rectified linear unit" activation functions
- * after the result is shifted right by result_shift
-*/
-class NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint : public INESimpleFunctionNoBorder
-{
-public:
- /** Constructor */
- NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint() = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint(const NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint &operator=(const NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint(NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint &operator=(NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint &&) = delete;
- /** Default destructor */
- ~NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint();
- /** Initialise the kernel's inputs, output
- *
- * @param[in] input Input tensor. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8
- * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
- * @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication
- * @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
- * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
- */
- void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift,
- int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
- /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint
- *
- * @param[in] input Input tensor. It is the output of @ref NEGEMMLowpMatrixMultiplyCore function. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
- * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
-};
-/** Basic function to execute NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint on NEON.
- *
- * NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint depends on 3 parameters:
- *
- * result_fixedpoint_multiplier, result_shift, result_offset_after_shift
- *
- * The final result is:
- *
- * (FixedPointMul(input[i][k], result_fixedpoint_multiplier) >> result_shift) + result_offset_after_shift
- *
- * where FixedPointMul(x, y) is the nearest integer to the following
- * mathematical expression, evaluated without overflow or intermediate rounding:
- *
- * (x * y) / 2^31
- *
- * For more information: https://github.com/google/gemmlowp/blob/master/public/output_stages.h#L68
- *
- * In case the bias tensor is provided, the final result is:
- *
- * ((FixedPointMul(input[i][k] + bias[k], result_fixedpoint_multiplier)) >> result_shift) + result_offset_after_shift
- *
- * This function calls the following NEON kernels:
- *
- * -# @ref NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel
- *
- * @note The function accepts also 2 optional input arguments (min and max) which can be used to implement "rectified linear unit" activation functions
- * after the result is shifted right by result_shift
-*/
-class NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint : public INESimpleFunctionNoBorder
-{
-public:
- /** Constructor */
- NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint() = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint(const NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint &operator=(const NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint(NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint &operator=(NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint &&) = delete;
- /** Default destructor */
- ~NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint();
- /** Initialise the kernel's inputs, output
- *
- * @param[in] input Input tensor. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8_SIGNED
- * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
- * @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication
- * @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8_SIGNED
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to the minimum possible 32-bit signed integer.
- * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
- */
- void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift,
- int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
- /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint
- *
- * @param[in] input Input tensor. It is the output of @ref NEGEMMLowpMatrixMultiplyCore function. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8_SIGNED
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to the minimum possible 32-bit signed integer.
- * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
-};
-/** Basic function to execute NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint on NEON.
- *
- * NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint depends on 2 parameters:
+/** Basic function to execute GEMMLowpQuantizeDown kernels.
*
- * result_fixedpoint_multiplier, result_shift
+ * This function calls the following operators:
*
- * The final result is:
- *
- * (FixedPointMul(input[i][k], result_fixedpoint_multiplier) >> result_shift)
- *
- * where FixedPointMul(x, y) is the nearest integer to the following
- * mathematical expression, evaluated without overflow or intermediate rounding:
- *
- * (x * y) / 2^31
- *
- * For more information: https://github.com/google/gemmlowp/blob/master/public/output_stages.h#L68
- *
- * In case the bias tensor is provided, the final result is:
- *
- * ((FixedPointMul(input[i][k] + bias[k], result_fixedpoint_multiplier)) >> result_shift) + result_offset_after_shift
- *
- * This function calls the following NEON kernels:
- *
- * -# @ref NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel
- *
- * @note The function accepts also 2 optional input arguments (min and max) which can be used to implement "rectified linear unit" activation functions
- * after the result is shifted right by result_shift
+ * -# @ref cpu::CpuGemmLowpOutputStage
*/
-class NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint : public INESimpleFunctionNoBorder
+class NEGEMMLowpOutputStage : public IFunction
{
public:
/** Constructor */
- NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint() = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint(const NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint &operator=(const NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint(NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint &operator=(NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint &&) = delete;
- /** Default destructor */
- ~NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint();
- /** Initialise the kernel's inputs, output
- *
- * @param[in] input Input tensor. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[out] output Output tensor. Data type supported: Data type supported: QSYMM16
- * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
- * @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to the minimum possible 32-bit signed integer.
- * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16.
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
- */
- void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int min = std::numeric_limits<int32_t>::lowest(),
- int max = std::numeric_limits<int32_t>::max());
- /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint
- *
- * @param[in] input Input tensor info. It is the output of @ref NEGEMMLowpMatrixMultiplyCore function. Data type supported: S32
- * @param[in] bias Biases tensor info. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[in] output Output tensor info. Data type supported: Data type supported: QSYMM16
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to the minimum possible 32-bit signed integer.
- * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
-};
-
-/** Basic function to execute GEMMLowpQuantizeDown kernels on NEON.
- *
- * This function calls the following NEON kernels:
- *
- * -# @ref NEGEMMLowpQuantizeDownInt32ScaleKernel
- * -# @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel
- * -# @ref NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel
- * -# @ref NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel
-*/
-class NEGEMMLowpOutputStage : public INESimpleFunctionNoBorder
-{
-public:
- /** Constructor */
- NEGEMMLowpOutputStage() = default;
+ NEGEMMLowpOutputStage();
/** Prevent instances of this class from being copied (As this class contains pointers) */
NEGEMMLowpOutputStage(const NEGEMMLowpOutputStage &) = delete;
/** Prevent instances of this class from being copied (As this class contains pointers) */
@@ -282,6 +62,16 @@ public:
~NEGEMMLowpOutputStage();
/** Initialise the kernel's inputs, output
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:-------------|:-------------|
+ * |S32 |S32 |QASYMM8 |
+ * |S32 |S32 |QASYMM8_SIGNED|
+ * |S32 |S32 |QSYMM16 |
+ *
* @param[in] input Input tensor. Data type supported: S32
* @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
* Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
@@ -299,7 +89,17 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo &info);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *bias,
+ const ITensorInfo *output,
+ const GEMMLowpOutputStageInfo &info);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_NEGEMMLOWPOUTPUTSTAGE_H */
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h b/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h
deleted file mode 100644
index 723a638d76..0000000000
--- a/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGEMMTRANSPOSE1XW_H
-#define ARM_COMPUTE_NEGEMMTRANSPOSE1XW_H
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-class ITensorInfo;
-
-/** Basic function to execute NEGEMMTranspose1xWKernel. This function calls the following NEON kernels:
- *
- * -# @ref NEGEMMTranspose1xWKernel
- *
- */
-class NEGEMMTranspose1xW : public INESimpleFunctionNoBorder
-{
-public:
- /** Constructor */
- NEGEMMTranspose1xW() = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEGEMMTranspose1xW(const NEGEMMTranspose1xW &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEGEMMTranspose1xW &operator=(const NEGEMMTranspose1xW &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEGEMMTranspose1xW(NEGEMMTranspose1xW &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEGEMMTranspose1xW &operator=(NEGEMMTranspose1xW &&) = delete;
- /** Default destructor */
- ~NEGEMMTranspose1xW() = default;
- /** Initialise the kernel's inputs, output
- *
- * @param[in] input First input tensor. Data type supported: All
- * @param[out] output Output tensor. Data type supported: same as @p input
- */
- void configure(const ITensor *input, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMTranspose1xW
- *
- * @param[in] input First input tensor. Data type supported: All
- * @param[in] output Output tensor. Data type supported: same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEGEMMTRANSPOSE1XW_H */
diff --git a/arm_compute/runtime/NEON/functions/NEGather.h b/arm_compute/runtime/NEON/functions/NEGather.h
index a5e0461227..9c7ae0134d 100644
--- a/arm_compute/runtime/NEON/functions/NEGather.h
+++ b/arm_compute/runtime/NEON/functions/NEGather.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -40,19 +40,26 @@ class NEGather : public INESimpleFunctionNoBorder
public:
/** Initialise the kernel's inputs and outputs
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |All |All |
+ *
* @param[in] input Source tensor. Supported tensor rank: up to 4. Data type supported: All
- * @param[in] indices Indices tensor. Supported tensor rank: up to 1. Must be one of the following type: U32/S32. Each value Must be in range [0, input.shape[@p axis])
+ * @param[in] indices Indices tensor. Supported tensor rank: up to 3. Must be one of the following type: U32/S32. Each value must be in range [0, input.shape[@p axis]), otherwise the result will become unpredictable.
+ * @note The "axis" must be in the range [0, input.rank -1] when indices is a vector, and must be 1 when indices is a 2D or 3D tensor.
* @param[out] output Destination tensor. Data type supported: Same as @p input
* @param[in] axis (Optional) The axis in @p input to gather @p indices from. Defaults to 0
+ *
*/
void configure(const ITensor *input, const ITensor *indices, ITensor *output, int axis = 0);
- /** Static function to check if given info will lead to a valid configuration of @ref NEGatherKernel
+ /** Static function to check if given info will lead to a valid configuration
*
- * @param[in] input Source tensor info. Supported tensor rank: up to 4. Data type supported: All
- * @param[in] indices Indices tensor info. Supported tensor rank: up to 1. Must be one of the following types: U32/S32. Each value Must be in range [0, input.shape[@p axis])
- * @param[in] output Destination tensor info. Data type supported: Same as @p input
- * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Defaults to 0
+ * Similar to @ref NEGather::configure()
*
* @return a status
*/
diff --git a/arm_compute/runtime/NEON/functions/NEGaussian3x3.h b/arm_compute/runtime/NEON/functions/NEGaussian3x3.h
deleted file mode 100644
index 9341c76d85..0000000000
--- a/arm_compute/runtime/NEON/functions/NEGaussian3x3.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGAUSSIAN3x3_H
-#define ARM_COMPUTE_NEGAUSSIAN3x3_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to execute gaussian filter 3x3. This function calls the following NEON kernels:
- *
- * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref NEGaussian3x3Kernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class NEGaussian3x3 : public INESimpleFunction
-{
-public:
- /** Initialise the function's input, output and border mode.
- *
- * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination tensor, Data type supported: U8.
- * @param[in] border_mode Strategy to use for borders.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value = 0);
-};
-}
-#endif /*ARM_COMPUTE_NEGAUSSIAN3x3_H */
diff --git a/arm_compute/runtime/NEON/functions/NEGaussian5x5.h b/arm_compute/runtime/NEON/functions/NEGaussian5x5.h
deleted file mode 100644
index 51ebee3e8e..0000000000
--- a/arm_compute/runtime/NEON/functions/NEGaussian5x5.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGAUSSIAN5x5_H
-#define ARM_COMPUTE_NEGAUSSIAN5x5_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/Tensor.h"
-
-#include <cstdint>
-#include <memory>
-
-namespace arm_compute
-{
-class ITensor;
-class NEGaussian5x5HorKernel;
-class NEGaussian5x5VertKernel;
-class NEFillBorderKernel;
-
-/** Basic function to execute gaussian filter 5x5. This function calls the following NEON kernels:
- *
- * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref NEGaussian5x5HorKernel
- * -# @ref NEGaussian5x5VertKernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class NEGaussian5x5 : public IFunction
-{
-public:
- /** Default constructor
- */
- NEGaussian5x5(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEGaussian5x5(const NEGaussian5x5 &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEGaussian5x5 &operator=(const NEGaussian5x5 &) = delete;
- /** Allow instances of this class to be moved */
- NEGaussian5x5(NEGaussian5x5 &&) = default;
- /** Allow instances of this class to be moved */
- NEGaussian5x5 &operator=(NEGaussian5x5 &&) = default;
- /** Default destructor */
- ~NEGaussian5x5();
- /** Initialise the function's input, output and border mode.
- *
- * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination tensor, Data type supported: U8.
- * @param[in] border_mode Strategy to use for borders.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value = 0);
-
- // Inherited methods overridden:
- void run() override;
-
-protected:
- MemoryGroup _memory_group; /**< Function memory group */
- std::unique_ptr<NEGaussian5x5HorKernel> _kernel_hor; /**< kernel for horizontal pass */
- std::unique_ptr<NEGaussian5x5VertKernel> _kernel_vert; /**< kernel for vertical pass */
- Tensor _tmp; /**< temporary buffer for output of horizontal pass */
- std::unique_ptr<NEFillBorderKernel> _border_handler; /**< kernel to handle tensor borders */
-};
-}
-#endif /*ARM_COMPUTE_NEGAUSSIAN5x5_H */
diff --git a/arm_compute/runtime/NEON/functions/NEGaussianPyramid.h b/arm_compute/runtime/NEON/functions/NEGaussianPyramid.h
deleted file mode 100644
index f5a1272b53..0000000000
--- a/arm_compute/runtime/NEON/functions/NEGaussianPyramid.h
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGAUSSIANPYRAMID_H
-#define ARM_COMPUTE_NEGAUSSIANPYRAMID_H
-
-#include "arm_compute/core/IPyramid.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h"
-#include "arm_compute/runtime/NEON/functions/NEScale.h"
-#include "arm_compute/runtime/Pyramid.h"
-#include "arm_compute/runtime/Tensor.h"
-
-#include <cstdint>
-#include <memory>
-
-namespace arm_compute
-{
-class ITensor;
-class NEGaussianPyramidHorKernel;
-class NEGaussianPyramidVertKernel;
-class NEFillBorderKernel;
-
-/** Common interface for all Gaussian pyramid functions
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class NEGaussianPyramid : public IFunction
-{
-public:
- /** Default constructor */
- NEGaussianPyramid();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEGaussianPyramid(const NEGaussianPyramid &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEGaussianPyramid &operator=(const NEGaussianPyramid &) = delete;
- /** Allow instances of this class to be moved */
- NEGaussianPyramid(NEGaussianPyramid &&) = default;
- /** Allow instances of this class to be moved */
- NEGaussianPyramid &operator=(NEGaussianPyramid &&) = default;
- /** Default destructor */
- virtual ~NEGaussianPyramid() = default;
-
- /** Initialise the function's source, destinations and border mode.
- *
- * @param[in] input Source tensor. Data type supported: U8.
- * @param[out] pyramid Destination pyramid tensors, Data type supported at each level: U8.
- * @param[in] border_mode Border mode to use.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- *
- */
- virtual void configure(const ITensor *input, IPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) = 0;
-
-protected:
- const ITensor *_input;
- IPyramid *_pyramid;
- Pyramid _tmp;
-};
-
-/** Basic function to execute gaussian pyramid with HALF scale factor. This function calls the following NEON kernels:
- *
- * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref NEGaussianPyramidHorKernel
- * -# @ref NEGaussianPyramidVertKernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- *
- */
-class NEGaussianPyramidHalf : public NEGaussianPyramid
-{
-public:
- /** Constructor */
- NEGaussianPyramidHalf();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEGaussianPyramidHalf(const NEGaussianPyramidHalf &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEGaussianPyramidHalf &operator=(const NEGaussianPyramidHalf &) = delete;
- /** Allow instances of this class to be moved */
- NEGaussianPyramidHalf(NEGaussianPyramidHalf &&) = default;
- /** Allow instances of this class to be moved */
- NEGaussianPyramidHalf &operator=(NEGaussianPyramidHalf &&) = default;
- /** Default destructor */
- ~NEGaussianPyramidHalf();
-
- // Inherited methods overridden:
- void configure(const ITensor *input, IPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override;
- void run() override;
-
-private:
- std::vector<std::unique_ptr<NEFillBorderKernel>> _horizontal_border_handler;
- std::vector<std::unique_ptr<NEFillBorderKernel>> _vertical_border_handler;
- std::vector<std::unique_ptr<NEGaussianPyramidHorKernel>> _horizontal_reduction;
- std::vector<std::unique_ptr<NEGaussianPyramidVertKernel>> _vertical_reduction;
-};
-
-/** Basic function to execute gaussian pyramid with ORB scale factor. This function calls the following NEON kernels and functions:
- *
- * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref NEGaussian5x5
- * -# @ref NEScaleKernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- *
- */
-class NEGaussianPyramidOrb : public NEGaussianPyramid
-{
-public:
- /** Constructor */
- NEGaussianPyramidOrb();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEGaussianPyramidOrb(const NEGaussianPyramidOrb &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEGaussianPyramidOrb &operator=(const NEGaussianPyramidOrb &) = delete;
- /** Allow instances of this class to be moved */
- NEGaussianPyramidOrb(NEGaussianPyramidOrb &&) = default;
- /** Allow instances of this class to be moved */
- NEGaussianPyramidOrb &operator=(NEGaussianPyramidOrb &&) = default;
- /** Default destructor */
- ~NEGaussianPyramidOrb();
-
- // Inherited methods overridden:
- void configure(const ITensor *input, IPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override;
- void run() override;
-
-private:
- std::vector<NEGaussian5x5> _gaus5x5;
- std::vector<NEScale> _scale_nearest;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEGAUSSIANPYRAMID_H */
diff --git a/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h b/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h
index 613f0d1c47..0f294fde22 100644
--- a/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -30,7 +30,6 @@
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h"
-#include "arm_compute/runtime/NEON/functions/NEComputeAllAnchors.h"
#include "arm_compute/runtime/NEON/functions/NEDequantizationLayer.h"
#include "arm_compute/runtime/NEON/functions/NEPadLayer.h"
#include "arm_compute/runtime/NEON/functions/NEPermute.h"
@@ -41,17 +40,18 @@
namespace arm_compute
{
class ITensor;
+class NEComputeAllAnchorsKernel;
/** Basic function to generate proposals for a RPN (Region Proposal Network)
*
- * This function calls the following Neon kernels:
- * -# @ref NEComputeAllAnchors
+ * This function calls the following Arm(R) Neon(TM) layers/kernels:
+ * -# @ref NEComputeAllAnchorsKernel
* -# @ref NEPermute x 2
* -# @ref NEReshapeLayer x 2
* -# @ref NEBoundingBoxTransform
* -# @ref NEPadLayerKernel
- * -# @ref NEDequantizationLayerKernel x 2
- * -# @ref NEQuantizationLayerKernel
+ * -# @ref NEDequantizationLayer x 2
+ * -# @ref NEQuantizationLayer
* And the following CPP kernels:
* -# @ref CPPBoxWithNonMaximaSuppressionLimit
*/
@@ -72,6 +72,16 @@ public:
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:--------------|:------------------|:--------|:--------------|
+ * |F16 |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |F32 |
+ * |QASYMM8 |QSYMM8 |QSYMM16 |QASYMM8 |
+ *
* @param[in] scores Scores from convolution layer of size (W, H, A), where H and W are the height and width of the feature map, and A is the number of anchors.
* Data types supported: QASYMM8/F16/F32
* @param[in] deltas Bounding box deltas from convolution layer of size (W, H, 4*A). Data types supported: Same as @p scores
@@ -85,7 +95,12 @@ public:
* @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the @ref GenerateProposalsInfo struct.
* @note Proposals contains all the proposals. Of those, only the first num_valid_proposals are valid.
*/
- void configure(const ITensor *scores, const ITensor *deltas, const ITensor *anchors, ITensor *proposals, ITensor *scores_out, ITensor *num_valid_proposals,
+ void configure(const ITensor *scores,
+ const ITensor *deltas,
+ const ITensor *anchors,
+ ITensor *proposals,
+ ITensor *scores_out,
+ ITensor *num_valid_proposals,
const GenerateProposalsInfo &info);
/** Static function to check if given info will lead to a valid configuration of @ref NEGenerateProposalsLayer
@@ -102,7 +117,11 @@ public:
*
* @return a Status
*/
- static Status validate(const ITensorInfo *scores, const ITensorInfo *deltas, const ITensorInfo *anchors, const ITensorInfo *proposals, const ITensorInfo *scores_out,
+ static Status validate(const ITensorInfo *scores,
+ const ITensorInfo *deltas,
+ const ITensorInfo *anchors,
+ const ITensorInfo *proposals,
+ const ITensorInfo *scores_out,
const ITensorInfo *num_valid_proposals,
const GenerateProposalsInfo &info);
@@ -113,17 +132,17 @@ private:
// Memory group manager
MemoryGroup _memory_group;
- // Neon kernels
- NEPermute _permute_deltas;
- NEReshapeLayer _flatten_deltas;
- NEPermute _permute_scores;
- NEReshapeLayer _flatten_scores;
- NEComputeAllAnchors _compute_anchors;
- NEBoundingBoxTransform _bounding_box;
- NEPadLayer _pad;
- NEDequantizationLayer _dequantize_anchors;
- NEDequantizationLayer _dequantize_deltas;
- NEQuantizationLayer _quantize_all_proposals;
+ // kernels/layers
+ NEPermute _permute_deltas;
+ NEReshapeLayer _flatten_deltas;
+ NEPermute _permute_scores;
+ NEReshapeLayer _flatten_scores;
+ std::unique_ptr<NEComputeAllAnchorsKernel> _compute_anchors;
+ NEBoundingBoxTransform _bounding_box;
+ NEPadLayer _pad;
+ NEDequantizationLayer _dequantize_anchors;
+ NEDequantizationLayer _dequantize_deltas;
+ NEQuantizationLayer _quantize_all_proposals;
// CPP functions
CPPBoxWithNonMaximaSuppressionLimit _cpp_nms;
diff --git a/arm_compute/runtime/NEON/functions/NEHOGDescriptor.h b/arm_compute/runtime/NEON/functions/NEHOGDescriptor.h
deleted file mode 100644
index a2d42fedf8..0000000000
--- a/arm_compute/runtime/NEON/functions/NEHOGDescriptor.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEHOGDESCRIPTOR_H
-#define ARM_COMPUTE_NEHOGDESCRIPTOR_H
-
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/functions/NEHOGGradient.h"
-#include "arm_compute/runtime/Tensor.h"
-
-#include <memory>
-
-namespace arm_compute
-{
-class IHOG;
-class NEHOGOrientationBinningKernel;
-class NEHOGBlockNormalizationKernel;
-
-/** Basic function to calculate HOG descriptor. This function calls the following NEON kernels:
- *
- * -# @ref NEHOGGradient
- * -# @ref NEHOGOrientationBinningKernel
- * -# @ref NEHOGBlockNormalizationKernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class NEHOGDescriptor : public IFunction
-{
-public:
- /** Default constructor */
- NEHOGDescriptor(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEHOGDescriptor(const NEHOGDescriptor &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEHOGDescriptor &operator=(const NEHOGDescriptor &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEHOGDescriptor(NEHOGDescriptor &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEHOGDescriptor &operator=(NEHOGDescriptor &&) = delete;
- /** Default destructor */
- ~NEHOGDescriptor();
- /** Initialise the function's source, destination, HOG data-object and border mode
- *
- * @param[in, out] input Input tensor. Data type supported: U8
- * (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Output tensor which stores the HOG descriptor. DataType supported: F32. The number of channels is equal to the number of histogram bins per block
- * @param[in] hog HOG data object which describes the HOG descriptor
- * @param[in] border_mode Border mode to use.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(ITensor *input, ITensor *output, const IHOG *hog, BorderMode border_mode, uint8_t constant_border_value = 0);
-
- // Inherited method overridden:
- void run() override;
-
-private:
- MemoryGroup _memory_group;
- NEHOGGradient _gradient;
- std::unique_ptr<NEHOGOrientationBinningKernel> _orient_bin;
- std::unique_ptr<NEHOGBlockNormalizationKernel> _block_norm;
- Tensor _mag;
- Tensor _phase;
- Tensor _hog_space;
-};
-}
-
-#endif /* ARM_COMPUTE_NEHOGDESCRIPTOR_H */
diff --git a/arm_compute/runtime/NEON/functions/NEHOGDetector.h b/arm_compute/runtime/NEON/functions/NEHOGDetector.h
deleted file mode 100644
index 644851ee92..0000000000
--- a/arm_compute/runtime/NEON/functions/NEHOGDetector.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEHOGDETECTOR_H
-#define ARM_COMPUTE_NEHOGDETECTOR_H
-
-#include "arm_compute/core/IArray.h"
-#include "arm_compute/core/IHOG.h"
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
-namespace arm_compute
-{
-class ITensor;
-class ITensorInfo;
-/** Basic function to execute HOG detector based on linear SVM. This function calls the following NEON kernel:
- *
- * -# @ref NEHOGDetectorKernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class NEHOGDetector : public INESimpleFunctionNoBorder
-{
-public:
- /** Constructor */
- NEHOGDetector() = default;
- /** Prevent instances of this class from being copied */
- NEHOGDetector(const NEHOGDetector &) = delete;
- /** Default move constructor */
- NEHOGDetector(NEHOGDetector &&) = default;
- /** Prevent instances of this class from being copied */
- NEHOGDetector &operator=(const NEHOGDetector &) = delete;
- /** Default move assignment operator */
- NEHOGDetector &operator=(NEHOGDetector &&) = default;
- /** Destructor */
- ~NEHOGDetector();
- /** Initialise the kernel's input, output, HOG data object, detection window stride, threshold and index class
- *
- * @attention The function does not reset the number of values in @ref IDetectionWindowArray so it is caller's responsibility to clear it.
- *
- * @param[in] input Input tensor. It is the output of @ref NEHOGDescriptor. Data type supported: F32
- * @param[in] hog HOG data-object that describes the HOG descriptor
- * @param[out] detection_windows Array of @ref DetectionWindow used to store the detected objects
- * @param[in] detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions.
- * It must be multiple of the block stride stored in hog
- * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane
- * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to
- */
- void configure(const ITensor *input, const IHOG *hog, IDetectionWindowArray *detection_windows, const Size2D &detection_window_stride, float threshold = 0.0f, size_t idx_class = 0);
-};
-} // namespace arm_compute
-
-#endif /* ARM_COMPUTE_NEHOGDETECTOR_H */
diff --git a/arm_compute/runtime/NEON/functions/NEHOGGradient.h b/arm_compute/runtime/NEON/functions/NEHOGGradient.h
deleted file mode 100644
index 426bc4b23c..0000000000
--- a/arm_compute/runtime/NEON/functions/NEHOGGradient.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEHOGGRADIENT_H
-#define ARM_COMPUTE_NEHOGGRADIENT_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/functions/NEDerivative.h"
-#include "arm_compute/runtime/Tensor.h"
-
-#include <cstdint>
-#include <memory>
-
-namespace arm_compute
-{
-class ITensor;
-class ICPPKernel;
-
-/** Basic function to calculate the gradient for HOG. This function calls the following NEON kernels:
- *
- * -# @ref NEDerivative
- * -# NEMagnitudePhaseKernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class NEHOGGradient : public IFunction
-{
-public:
- /** Default constructor */
- NEHOGGradient(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEHOGGradient(const NEHOGGradient &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEHOGGradient &operator=(const NEHOGGradient &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEHOGGradient(NEHOGGradient &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEHOGGradient &operator=(NEHOGGradient &&) = delete;
- /** Default destructor */
- ~NEHOGGradient();
- /** Initialise the function's source, destinations, phase type and border mode
- *
- * @param[in, out] input Input tensor. Data type supported: U8.
- * (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output_magnitude Output tensor (magnitude). Data type supported: U16.
- * @param[out] output_phase Output tensor.(phase). Format supported: U8
- * @param[in] phase_type Type of @ref PhaseType
- * @param[in] border_mode Border mode to use
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(ITensor *input, ITensor *output_magnitude, ITensor *output_phase, PhaseType phase_type, BorderMode border_mode, uint8_t constant_border_value = 0);
-
- // Inherited method overridden:
- void run() override;
-
-private:
- MemoryGroup _memory_group;
- NEDerivative _derivative;
- std::unique_ptr<ICPPKernel> _mag_phase;
- Tensor _gx;
- Tensor _gy;
-};
-}
-#endif /*ARM_COMPUTE_NEHOGGRADIENT_H */
diff --git a/arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h b/arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h
deleted file mode 100644
index f370dd29ab..0000000000
--- a/arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEHOGMULTIDETECTION_H
-#define ARM_COMPUTE_NEHOGMULTIDETECTION_H
-
-#include "arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h"
-#include "arm_compute/core/IArray.h"
-#include "arm_compute/core/IMultiHOG.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/functions/NEHOGDetector.h"
-#include "arm_compute/runtime/NEON/functions/NEHOGGradient.h"
-#include "arm_compute/runtime/Tensor.h"
-
-#include <memory>
-
-namespace arm_compute
-{
-class NEHOGOrientationBinningKernel;
-class NEHOGBlockNormalizationKernel;
-
-/** Basic function to detect multiple objects (or the same object at different scales) on the same input image using HOG. This function calls the following NEON kernels:
- *
- * -# @ref NEHOGGradient
- * -# @ref NEHOGOrientationBinningKernel
- * -# @ref NEHOGBlockNormalizationKernel
- * -# @ref NEHOGDetector
- * -# @ref CPPDetectionWindowNonMaximaSuppressionKernel (executed if non_maxima_suppression == true)
- *
- * @note This implementation works if all the HOG data-objects within the IMultiHOG container have the same:
- * -# Phase type
- -# Normalization type
- -# L2 hysteresis threshold if the normalization type is L2HYS_NORM
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class NEHOGMultiDetection : public IFunction
-{
-public:
- /** Default constructor */
- NEHOGMultiDetection(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEHOGMultiDetection(const NEHOGMultiDetection &) = delete;
- /** Prevent instances of this class from being moved (As this class contains pointers) */
- NEHOGMultiDetection(NEHOGMultiDetection &&) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEHOGMultiDetection &operator=(const NEHOGMultiDetection &) = delete;
- /** Prevent instances of this class from being moved (As this class contains pointers) */
- NEHOGMultiDetection &operator=(NEHOGMultiDetection &&) = delete;
- /** Default destructor */
- ~NEHOGMultiDetection();
- /** Initialise the function's source, destination, detection window strides, border mode, threshold and non-maxima suppression
- *
- * @param[in, out] input Input tensor. Data type supported: U8
- * (Written to only for @p border_mode != UNDEFINED)
- * @param[in] multi_hog Container of multiple HOG data object. Each HOG data object describes one HOG model to detect.
- * This container should store the HOG data-objects in descending or ascending cell_size width order.
- * This will help to understand if the HOG descriptor computation can be skipped for some HOG data-objects
- * @param[out] detection_windows Array of @ref DetectionWindow used for locating the detected objects
- * @param[in] detection_window_strides Array of @ref Size2D used to specify the distance in pixels between 2 consecutive detection windows in x and y directions for each HOG data-object
- * The dimension of this array must be the same of multi_hog->num_models()
- * The i-th detection_window_stride of this array must be multiple of the block_stride stored in the i-th multi_hog array
- * @param[in] border_mode Border mode to use.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane
- * @param[in] non_maxima_suppression (Optional) Flag to specify whether the non-maxima suppression is required or not.
- * True if the non-maxima suppression stage has to be computed
- * @param[in] min_distance (Optional) Radial Euclidean distance to use for the non-maxima suppression stage
- *
- */
- void configure(ITensor *input, const IMultiHOG *multi_hog, IDetectionWindowArray *detection_windows, const ISize2DArray *detection_window_strides, BorderMode border_mode,
- uint8_t constant_border_value = 0,
- float threshold = 0.0f, bool non_maxima_suppression = false, float min_distance = 1.0f);
-
- // Inherited method overridden:
- void run() override;
-
-private:
- MemoryGroup _memory_group;
- NEHOGGradient _gradient_kernel;
- std::vector<NEHOGOrientationBinningKernel> _orient_bin_kernel;
- std::vector<NEHOGBlockNormalizationKernel> _block_norm_kernel;
- std::vector<NEHOGDetector> _hog_detect_kernel;
- CPPDetectionWindowNonMaximaSuppressionKernel _non_maxima_kernel;
- std::vector<Tensor> _hog_space;
- std::vector<Tensor> _hog_norm_space;
- IDetectionWindowArray *_detection_windows;
- Tensor _mag;
- Tensor _phase;
- bool _non_maxima_suppression;
- size_t _num_orient_bin_kernel;
- size_t _num_block_norm_kernel;
- size_t _num_hog_detect_kernel;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEHOGMULTIDETECTION_H */
diff --git a/arm_compute/runtime/NEON/functions/NEHarrisCorners.h b/arm_compute/runtime/NEON/functions/NEHarrisCorners.h
deleted file mode 100644
index 477b843aee..0000000000
--- a/arm_compute/runtime/NEON/functions/NEHarrisCorners.h
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEHARRISCORNERS_H
-#define ARM_COMPUTE_NEHARRISCORNERS_H
-
-#include "arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h"
-#include "arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/Array.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h"
-#include "arm_compute/runtime/Tensor.h"
-
-#include <cstdint>
-#include <memory>
-
-namespace arm_compute
-{
-class ITensor;
-class NEFillBorderKernel;
-class INEHarrisScoreKernel;
-using IImage = ITensor;
-
-/** Basic function to execute harris corners detection. This function calls the following NEON kernels and functions:
- *
- * -# @ref NESobel3x3 (if gradient_size == 3) or<br/>
- * @ref NESobel5x5 (if gradient_size == 5) or<br/>
- * @ref NESobel7x7 (if gradient_size == 7)
- * -# @ref NEFillBorderKernel
- * -# NEHarrisScoreKernel<3> (if block_size == 3) or<br/>
- * NEHarrisScoreKernel<5> (if block_size == 5) or<br/>
- * NEHarrisScoreKernel<7> (if block_size == 7)
- * -# @ref NENonMaximaSuppression3x3
- * -# @ref CPPCornerCandidatesKernel
- * -# @ref CPPSortEuclideanDistanceKernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class NEHarrisCorners : public IFunction
-{
-public:
- /** Constructor
- *
- * Initialize _sobel, _harris_score and _corner_list to nullptr.
- *
- * @param[in] memory_manager (Optional) Memory manager.
- */
- NEHarrisCorners(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEHarrisCorners(const NEHarrisCorners &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEHarrisCorners &operator=(const NEHarrisCorners &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEHarrisCorners(NEHarrisCorners &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEHarrisCorners &operator=(NEHarrisCorners &&) = delete;
- /** Default destructor */
- ~NEHarrisCorners();
- /** Initialize the function's source, destination, conv and border_mode.
- *
- * @param[in, out] input Source image. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[in] threshold Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel).
- * @param[in] min_dist Radial Euclidean distance for the euclidean diatance stage
- * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation
- * @param[in] gradient_size The gradient window size to use on the input. The implementation supports 3, 5, and 7
- * @param[in] block_size The block window size used to compute the Harris Corner score. The implementation supports 3, 5, and 7.
- * @param[out] corners Array of keypoints to store the results.
- * @param[in] border_mode Border mode to use
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(IImage *input, float threshold, float min_dist, float sensitivity,
- int32_t gradient_size, int32_t block_size, KeyPointArray *corners,
- BorderMode border_mode, uint8_t constant_border_value = 0);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- MemoryGroup _memory_group; /**< Function's memory group */
- std::unique_ptr<IFunction> _sobel; /**< Sobel function */
- std::unique_ptr<INEHarrisScoreKernel> _harris_score; /**< Harris score kernel */
- NENonMaximaSuppression3x3 _non_max_suppr; /**< Non-maxima suppression function */
- CPPCornerCandidatesKernel _candidates; /**< Sort kernel */
- CPPSortEuclideanDistanceKernel _sort_euclidean; /**< Euclidean distance kernel */
- std::unique_ptr<NEFillBorderKernel> _border_gx; /**< Border handler before running harris score */
- std::unique_ptr<NEFillBorderKernel> _border_gy; /**< Border handler before running harris score */
- Image _gx; /**< Source image - Gx component */
- Image _gy; /**< Source image - Gy component */
- Image _score; /**< Source image - Harris score */
- Image _nonmax; /**< Source image - Non-Maxima suppressed image */
- std::vector<InternalKeypoint> _corners_list; /**< Array of InternalKeypoint. It stores the potential corner candidates */
- int32_t _num_corner_candidates; /**< Number of potential corner candidates */
-};
-}
-#endif /*ARM_COMPUTE_NEHARRISCORNERS_H */
diff --git a/arm_compute/runtime/NEON/functions/NEHistogram.h b/arm_compute/runtime/NEON/functions/NEHistogram.h
deleted file mode 100644
index d922ef1214..0000000000
--- a/arm_compute/runtime/NEON/functions/NEHistogram.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEHISTOGRAM_H
-#define ARM_COMPUTE_NEHISTOGRAM_H
-
-#include "arm_compute/runtime/IFunction.h"
-
-#include <cstddef>
-#include <cstdint>
-#include <memory>
-#include <vector>
-
-namespace arm_compute
-{
-class ITensor;
-class IDistribution1D;
-class NEHistogramKernel;
-using IImage = ITensor;
-
-/** Basic function to run @ref NEHistogramKernel.
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class NEHistogram : public IFunction
-{
-public:
- /** Default Constructor. */
- NEHistogram();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEHistogram(const NEHistogram &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEHistogram &operator=(const NEHistogram &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEHistogram(NEHistogram &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEHistogram &operator=(NEHistogram &&) = delete;
- /** Default destructor */
- ~NEHistogram();
- /** Initialise the kernel's inputs.
- *
- * @param[in] input Input image. Data type supported: U8.
- * @param[out] output Output distribution.
- */
- void configure(const IImage *input, IDistribution1D *output);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- std::unique_ptr<NEHistogramKernel> _histogram_kernel;
- std::vector<uint32_t> _local_hist;
- std::vector<uint32_t> _window_lut;
- size_t _local_hist_size;
- /** 256 possible pixel values as we handle only U8 images */
- static constexpr unsigned int window_lut_default_size = 256;
-};
-}
-#endif /*ARM_COMPUTE_NEHISTOGRAM_H */
diff --git a/arm_compute/runtime/NEON/functions/NEIm2Col.h b/arm_compute/runtime/NEON/functions/NEIm2Col.h
deleted file mode 100644
index 2f023f44fe..0000000000
--- a/arm_compute/runtime/NEON/functions/NEIm2Col.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEIM2COL_H
-#define ARM_COMPUTE_NEIM2COL_H
-
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
-#include "arm_compute/core/Size2D.h"
-#include "arm_compute/core/Types.h"
-#include <memory>
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-class ITensorInfo;
-class NEIm2ColKernel;
-
-/** Basic function to run @ref NEIm2ColKernel */
-class NEIm2Col : public IFunction
-{
-public:
- /** Default constructor */
- NEIm2Col();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEIm2Col(const NEIm2Col &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEIm2Col &operator=(const NEIm2Col &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEIm2Col(NEIm2Col &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEIm2Col &operator=(NEIm2Col &&) = delete;
- /** Default destructor */
- ~NEIm2Col();
- /** Configure the im2col NEON kernel
- *
- * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32
- * Note: QASYMM8/QASYMM8_SIGNED works only for has_bias = false
- * @param[out] output The output tensor. Data types supported: Same as @p input
- * @param[in] kernel_dims The kernel dimensions (width and height).
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- * @param[in] has_bias In case biases are provided expands the matrix with 1.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution
- */
- void configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation = Size2D(1U, 1U),
- unsigned int num_groups = 1);
- /** Static function to check if given info will lead to a valid configuration of @ref NEIm2Col
- *
- * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32
- * Note: QASYMM8/QASYMM8_SIGNED works only for has_bias = false
- * @param[in] output The output tensor. Data types supported: Same as @p input
- * @param[in] kernel_dims The kernel dimensions (width and height).
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- * @param[in] has_bias In case biases are provided expands the matrix with 1.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation = Size2D(1U, 1U),
- unsigned int num_groups = 1);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- std::unique_ptr<NEIm2ColKernel> _kernel;
- unsigned int _y_dim;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEIM2COL_H */
diff --git a/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayer.h b/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayer.h
index 57165c94b4..0bc57be09e 100644
--- a/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -60,6 +60,16 @@ public:
~NEInstanceNormalizationLayer();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------|:---------|
+ * |F16 |F16 |
+ * |F32 |F32 |
+ *
* @param[in, out] input Source tensor. In case of @p output tensor = nullptr this tensor will store the result of the normalization.
* Data types supported: F16/F32. Data layout supported: NHWC, NCHW
* @param[out] output Destination tensor. Data types and data layouts supported: same as @p input.
@@ -79,7 +89,11 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, float gamma = 1.0f, float beta = 0.0f, float epsilon = 1e-12f);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *output,
+ float gamma = 1.0f,
+ float beta = 0.0f,
+ float epsilon = 1e-12f);
// Inherited methods overridden:
void run() override;
@@ -93,5 +107,5 @@ private:
Tensor _permuted_input;
Tensor _permuted_output;
};
-}
+} // namespace arm_compute
#endif /* ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEIntegralImage.h b/arm_compute/runtime/NEON/functions/NEIntegralImage.h
deleted file mode 100644
index 31c0ec9ebe..0000000000
--- a/arm_compute/runtime/NEON/functions/NEIntegralImage.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEINTEGRALIMAGE_H
-#define ARM_COMPUTE_NEINTEGRALIMAGE_H
-
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to run a @ref NEIntegralImageKernel
- *
-* @deprecated This function is deprecated and is intended to be removed in 21.05 release
-*
-*/
-class NEIntegralImage : public INESimpleFunction
-{
-public:
- /** Constructor */
- NEIntegralImage() = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEIntegralImage(const NEIntegralImage &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEIntegralImage &operator=(const NEIntegralImage &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEIntegralImage(NEIntegralImage &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEIntegralImage &operator=(NEIntegralImage &&) = delete;
- /** Default destructor */
- ~NEIntegralImage();
- /** Initialise the function's source, destinations and border mode.
- *
- * @param[in] input Source tensor. Data type supported: U8.
- * @param[out] output Destination tensor. Data type supported: U32.
- */
- void configure(const ITensor *input, ITensor *output);
-};
-}
-#endif /*ARM_COMPUTE_NEINTEGRALIMAGE_H */
diff --git a/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h b/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h
index 173b9d2141..8502cee5d2 100644
--- a/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -60,6 +60,16 @@ public:
~NEL2NormalizeLayer();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------|:---------|
+ * |F16 |F16 |
+ * |F32 |F32 |
+ *
* @param[in, out] input Source tensor. Data types supported: F16/F32. (Written to only for border_size != 0)
* @param[out] output Destination tensor. Data types and data layouts supported: same as @p input.
* @param[in] axis Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2
@@ -87,5 +97,5 @@ private:
std::unique_ptr<NEL2NormalizeLayerKernel> _normalize_kernel;
Tensor _sumsq;
};
-}
+} // namespace arm_compute
#endif /* ARM_COMPUTE_NEL2NORMALIZELAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NELSTMLayer.h b/arm_compute/runtime/NEON/functions/NELSTMLayer.h
index ef8defb827..629c5d10a0 100644
--- a/arm_compute/runtime/NEON/functions/NELSTMLayer.h
+++ b/arm_compute/runtime/NEON/functions/NELSTMLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,7 +25,8 @@
#define ARM_COMPUTE_NELSTMLAYER_H
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
+#include "arm_compute/runtime/common/LSTMParams.h"
+#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
#include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h"
#include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h"
@@ -35,7 +36,6 @@
#include "arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h"
#include "arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h"
#include "arm_compute/runtime/NEON/functions/NETranspose.h"
-#include "arm_compute/runtime/common/LSTMParams.h"
namespace arm_compute
{
@@ -60,6 +60,15 @@ public:
~NELSTMLayer();
/** Initialize function's tensors.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 - src13 | dst0 - dst3 |
+ * |:------------|:------------|
+ * |F16 |F16 |
+ * |F32 |F32 |
+ *
* @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: F16/F32.
* @param[in] input_to_forget_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input.
* @param[in] input_to_cell_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input.
@@ -95,13 +104,26 @@ public:
* @param[in] projection_threshold The clipping threshold for the output from the projection layer, such that values are bound within [-proj_clip, proj_clip].
* If set to 0.0 then clipping is disabled.
*/
- void configure(const ITensor *input,
- const ITensor *input_to_forget_weights, const ITensor *input_to_cell_weights, const ITensor *input_to_output_weights,
- const ITensor *recurrent_to_forget_weights, const ITensor *recurrent_to_cell_weights, const ITensor *recurrent_to_output_weights,
- const ITensor *forget_gate_bias, const ITensor *cell_bias, const ITensor *output_gate_bias,
- const ITensor *output_state_in, const ITensor *cell_state_in,
- ITensor *scratch_buffer, ITensor *output_state_out, ITensor *cell_state_out, ITensor *output,
- const LSTMParams<ITensor> &lstm_params, const ActivationLayerInfo &activation_info, float cell_threshold = 0.f, float projection_threshold = 0.f);
+ void configure(const ITensor *input,
+ const ITensor *input_to_forget_weights,
+ const ITensor *input_to_cell_weights,
+ const ITensor *input_to_output_weights,
+ const ITensor *recurrent_to_forget_weights,
+ const ITensor *recurrent_to_cell_weights,
+ const ITensor *recurrent_to_output_weights,
+ const ITensor *forget_gate_bias,
+ const ITensor *cell_bias,
+ const ITensor *output_gate_bias,
+ const ITensor *output_state_in,
+ const ITensor *cell_state_in,
+ ITensor *scratch_buffer,
+ ITensor *output_state_out,
+ ITensor *cell_state_out,
+ ITensor *output,
+ const LSTMParams<ITensor> &lstm_params,
+ const ActivationLayerInfo &activation_info,
+ float cell_threshold = 0.f,
+ float projection_threshold = 0.f);
/** Static function to check if given info will lead to a valid configuration of @ref NELSTMLayer
*
@@ -142,13 +164,26 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input,
- const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights,
- const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights,
- const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias,
- const ITensorInfo *output_state_in, const ITensorInfo *cell_state_in,
- const ITensorInfo *scratch_buffer, const ITensorInfo *output_state_out, const ITensorInfo *cell_state_out, const ITensorInfo *output,
- const LSTMParams<ITensorInfo> &lstm_params, const ActivationLayerInfo &activation_info, float cell_threshold = 0.f, float projection_threshold = 0.f);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *input_to_forget_weights,
+ const ITensorInfo *input_to_cell_weights,
+ const ITensorInfo *input_to_output_weights,
+ const ITensorInfo *recurrent_to_forget_weights,
+ const ITensorInfo *recurrent_to_cell_weights,
+ const ITensorInfo *recurrent_to_output_weights,
+ const ITensorInfo *forget_gate_bias,
+ const ITensorInfo *cell_bias,
+ const ITensorInfo *output_gate_bias,
+ const ITensorInfo *output_state_in,
+ const ITensorInfo *cell_state_in,
+ const ITensorInfo *scratch_buffer,
+ const ITensorInfo *output_state_out,
+ const ITensorInfo *cell_state_out,
+ const ITensorInfo *output,
+ const LSTMParams<ITensorInfo> &lstm_params,
+ const ActivationLayerInfo &activation_info,
+ float cell_threshold = 0.f,
+ float projection_threshold = 0.f);
// Inherited methods overridden:
void run() override;
diff --git a/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h b/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h
index a354a4df7b..ae951669b3 100644
--- a/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h
+++ b/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,6 +25,7 @@
#define ARM_COMPUTE_NELSTMLAYERQUANTIZED_H
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/common/LSTMParams.h"
#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
#include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h"
@@ -38,8 +39,6 @@
#include "arm_compute/runtime/NEON/functions/NESlice.h"
#include "arm_compute/runtime/NEON/functions/NETranspose.h"
-#include "arm_compute/runtime/common/LSTMParams.h"
-
namespace arm_compute
{
// Forward declarations
@@ -47,10 +46,10 @@ class ITensor;
/** Basic function to run @ref NELSTMLayerQuantized
*
- * This function calls the following NEON functions/kernels:
+ * This function calls the following functions/kernels:
*
* -# @ref NEGEMMLowpMatrixMultiplyCore Quantized matrix multiplication core. Accumulators are 32-bit integers
- * -# @ref NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint Convert 32-bit integers into QSYMM16
+ * -# @ref NEGEMMLowpOutputStage Convert 32-bit integers into QSYMM16
* -# @ref NETranspose Matrix transpose
* -# @ref NEConcatenateLayer Tensor concatenation
* -# @ref NEActivationLayer Activation functions (tanh and logistic)
@@ -77,6 +76,14 @@ public:
~NELSTMLayerQuantized();
/** Initialize function's tensors.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 - src8 |src9 - src12 |src13 |src14 |dst0 |dst1 |
+ * |:-----------|:------------|:-------|:------|:------|:------|
+ * |QASYMM8 |S32 |QSYMM16 |QASYMM8|QSYMM16|QASYMM8|
+ *
* @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: QASYMM8.
* @param[in] input_to_input_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input.
* @param[in] input_to_forget_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input.
@@ -96,11 +103,22 @@ public:
* @param[out] output_state_out Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size].Data types supported: Same as @p input.
*/
void configure(const ITensor *input,
- const ITensor *input_to_input_weights, const ITensor *input_to_forget_weights, const ITensor *input_to_cell_weights, const ITensor *input_to_output_weights,
- const ITensor *recurrent_to_input_weights, const ITensor *recurrent_to_forget_weights, const ITensor *recurrent_to_cell_weights, const ITensor *recurrent_to_output_weights,
- const ITensor *input_gate_bias, const ITensor *forget_gate_bias, const ITensor *cell_bias, const ITensor *output_gate_bias,
- ITensor *cell_state_in, const ITensor *output_state_in,
- ITensor *cell_state_out, ITensor *output_state_out);
+ const ITensor *input_to_input_weights,
+ const ITensor *input_to_forget_weights,
+ const ITensor *input_to_cell_weights,
+ const ITensor *input_to_output_weights,
+ const ITensor *recurrent_to_input_weights,
+ const ITensor *recurrent_to_forget_weights,
+ const ITensor *recurrent_to_cell_weights,
+ const ITensor *recurrent_to_output_weights,
+ const ITensor *input_gate_bias,
+ const ITensor *forget_gate_bias,
+ const ITensor *cell_bias,
+ const ITensor *output_gate_bias,
+ ITensor *cell_state_in,
+ const ITensor *output_state_in,
+ ITensor *cell_state_out,
+ ITensor *output_state_out);
/** Static function to check if given info will lead to a valid configuration of @ref NELSTMLayer
*
@@ -125,11 +143,22 @@ public:
* @return a status
*/
static Status validate(const ITensorInfo *input,
- const ITensorInfo *input_to_input_weights, const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights,
- const ITensorInfo *recurrent_to_input_weights, const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights,
- const ITensorInfo *input_gate_bias, const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias,
- const ITensorInfo *cell_state_in, const ITensorInfo *output_state_in,
- const ITensorInfo *cell_state_out, const ITensorInfo *output_state_out);
+ const ITensorInfo *input_to_input_weights,
+ const ITensorInfo *input_to_forget_weights,
+ const ITensorInfo *input_to_cell_weights,
+ const ITensorInfo *input_to_output_weights,
+ const ITensorInfo *recurrent_to_input_weights,
+ const ITensorInfo *recurrent_to_forget_weights,
+ const ITensorInfo *recurrent_to_cell_weights,
+ const ITensorInfo *recurrent_to_output_weights,
+ const ITensorInfo *input_gate_bias,
+ const ITensorInfo *forget_gate_bias,
+ const ITensorInfo *cell_bias,
+ const ITensorInfo *output_gate_bias,
+ const ITensorInfo *cell_state_in,
+ const ITensorInfo *output_state_in,
+ const ITensorInfo *cell_state_out,
+ const ITensorInfo *output_state_out);
// Inherited methods overridden:
void run() override;
@@ -139,30 +168,30 @@ private:
MemoryGroup _memory_group;
// Functions used
- NEGEMMLowpMatrixMultiplyCore _gemmlowp;
- NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint _output_stage;
- NETranspose _transpose_weights;
- NEConcatenateLayer _concat_input_weights;
- NEConcatenateLayer _concat_recurrent_weights;
- NEConcatenateLayer _concat_weights;
- NEConcatenateLayer _concat_inputs;
- NEConcatenateLayer _concat_bias;
- NEActivationLayer _sigmoid_forget_gate;
- NEActivationLayer _sigmoid_input_gate;
- NEActivationLayer _sigmoid_output_gate;
- NEActivationLayer _tanh_modulation_gate;
- NEActivationLayer _tanh_output_state;
- NEArithmeticAddition _add1;
- NEArithmeticAddition _add2;
- NEPixelWiseMultiplication _mul1;
- NEPixelWiseMultiplication _mul2;
- NEPixelWiseMultiplication _mul3;
- NESlice _slice_input_tensor;
- NESlice _slice_forget_tensor;
- NESlice _slice_cell_tensor;
- NESlice _slice_output_tensor;
- NEDequantizationLayer _dequantize;
- NEQuantizationLayer _quantize;
+ NEGEMMLowpMatrixMultiplyCore _gemmlowp;
+ NEGEMMLowpOutputStage _output_stage;
+ NETranspose _transpose_weights;
+ NEConcatenateLayer _concat_input_weights;
+ NEConcatenateLayer _concat_recurrent_weights;
+ NEConcatenateLayer _concat_weights;
+ NEConcatenateLayer _concat_inputs;
+ NEConcatenateLayer _concat_bias;
+ NEActivationLayer _sigmoid_forget_gate;
+ NEActivationLayer _sigmoid_input_gate;
+ NEActivationLayer _sigmoid_output_gate;
+ NEActivationLayer _tanh_modulation_gate;
+ NEActivationLayer _tanh_output_state;
+ NEArithmeticAddition _add1;
+ NEArithmeticAddition _add2;
+ NEPixelWiseMultiplication _mul1;
+ NEPixelWiseMultiplication _mul2;
+ NEPixelWiseMultiplication _mul3;
+ NESlice _slice_input_tensor;
+ NESlice _slice_forget_tensor;
+ NESlice _slice_cell_tensor;
+ NESlice _slice_output_tensor;
+ NEDequantizationLayer _dequantize;
+ NEQuantizationLayer _quantize;
// Tensor pointers
const ITensor *_input_to_input_weights;
diff --git a/arm_compute/runtime/NEON/functions/NELaplacianPyramid.h b/arm_compute/runtime/NEON/functions/NELaplacianPyramid.h
deleted file mode 100644
index 9ca30141a6..0000000000
--- a/arm_compute/runtime/NEON/functions/NELaplacianPyramid.h
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NELAPLACIANPYRAMID_H
-#define ARM_COMPUTE_NELAPLACIANPYRAMID_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h"
-#include "arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h"
-#include "arm_compute/runtime/NEON/functions/NEGaussianPyramid.h"
-#include "arm_compute/runtime/Pyramid.h"
-
-#include <cstddef>
-#include <cstdint>
-#include <memory>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to execute laplacian pyramid. This function calls the following NEON kernels and functions:
- *
- * -# @ref NEGaussianPyramidHalf
- * -# @ref NEGaussian5x5
- * -# @ref NEArithmeticSubtraction
- *
- * First a Gaussian pyramid is created. Then, for each level i, the corresponding tensor I(i) is blurred with the Gaussian 5x5 filter, and then
- * difference between the two tensors is the corresponding level L(i) of the Laplacian pyramid.
- * L(i) = I(i) - Gaussian5x5(I(i))
- * Level 0 has always the same first two dimensions as the input tensor.
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
-*/
-class NELaplacianPyramid : public IFunction
-{
-public:
- /** Constructor */
- NELaplacianPyramid();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NELaplacianPyramid(const NELaplacianPyramid &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NELaplacianPyramid &operator=(const NELaplacianPyramid &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NELaplacianPyramid(NELaplacianPyramid &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NELaplacianPyramid &operator=(NELaplacianPyramid &&) = delete;
- /** Default destructor */
- ~NELaplacianPyramid();
- /** Initialise the function's source, destinations and border mode.
- *
- * @param[in] input Source tensor. Data type supported: U8.
- * @param[out] pyramid Destination pyramid tensors, Data type supported at each level: S16.
- * @param[out] output The lowest resolution tensor necessary to reconstruct the input tensor from the pyramid. Data type supported: S16.
- * The first two dimensions of this tensor must match the first two dimensions of the tensor in the last level of the pyramid, that is:
- * out.width = in.width() / pow(2,pyramid_levels-1) and out.height = in.height() / pow(2,pyramid_levels-1)
- * @param[in] border_mode Border mode to use.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- *
- */
- void configure(const ITensor *input, IPyramid *pyramid, ITensor *output, BorderMode border_mode, uint8_t constant_border_value);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- size_t _num_levels;
- NEGaussianPyramidHalf _gaussian_pyr_function;
- std::vector<NEGaussian5x5> _convf;
- std::vector<NEArithmeticSubtraction> _subf;
- Pyramid _gauss_pyr;
- Pyramid _conv_pyr;
- NEDepthConvertLayer _depth_function;
-};
-}
-#endif /*ARM_COMPUTE_NELAPLACIANPYRAMID_H */
diff --git a/arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h b/arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h
deleted file mode 100644
index 8e0a3efff0..0000000000
--- a/arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NELAPLACIANRECONSTRUCT_H
-#define ARM_COMPUTE_NELAPLACIANRECONSTRUCT_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
-#include "arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEScale.h"
-#include "arm_compute/runtime/Pyramid.h"
-
-#include <cstdint>
-#include <memory>
-
-namespace arm_compute
-{
-class ITensor;
-using IImage = ITensor;
-
-/** Basic function to execute laplacian reconstruction. This function calls the following NEON kernels and functions:
- *
- * -# @ref NEArithmeticAddition
- * -# @ref NEScale
- * -# @ref NEDepthConvertLayer
- *
- * This function reconstructs the original image from a Laplacian Image Pyramid.
- *
- * The input image is added to the last level of the Laplacian pyramid L(n-2), the resulting image is upsampled to the
- * resolution of the next pyramid level.
- *
- * I(n-2) = upsample( input + L(n-1)
- *
- * For each pyramid level i, except i=0 and i=n-1:
- * I(i-1) = upsample(I(i) + L(i))
- *
- * output = I(0) + L(0)
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
-*/
-class NELaplacianReconstruct : public IFunction
-{
-public:
- /** Constructor */
- NELaplacianReconstruct();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NELaplacianReconstruct(const NELaplacianReconstruct &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NELaplacianReconstruct &operator=(const NELaplacianReconstruct &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NELaplacianReconstruct(NELaplacianReconstruct &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NELaplacianReconstruct &operator=(NELaplacianReconstruct &&) = delete;
- /** Default destructor */
- ~NELaplacianReconstruct();
- /** Initialise the function's source, destinations and border mode.
- *
- * The Output image must have the same size as the first level of the pyramid.
- * The Input image must have the same size as the last level of the pyramid.
- *
- * The idea is to reconstuct the original hi-res image from a low-res representation of it and the laplacian pyramid.
- *
- * @param[in] pyramid Laplacian pyramid tensors, Data type supported at each level: S16.
- * @param[in] input Source tensor. Data type supported: S16.
- * @param[out] output Output tensor. Data type supported: U8.
- * @param[in] border_mode Border mode to use for the convolution.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- *
- */
- void configure(const IPyramid *pyramid, ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- Pyramid _tmp_pyr;
- std::vector<NEArithmeticAddition> _addf;
- std::vector<NEScale> _scalef;
- NEDepthConvertLayer _depthf;
-};
-}
-#endif /*ARM_COMPUTE_NELAPLACIANRECONSTRUCT_H */
diff --git a/arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h
deleted file mode 100644
index 86e6300130..0000000000
--- a/arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NELOCALLYCONNECTEDLAYER_H
-#define ARM_COMPUTE_NELOCALLYCONNECTEDLAYER_H
-
-#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/functions/NECol2Im.h"
-#include "arm_compute/runtime/NEON/functions/NEIm2Col.h"
-#include "arm_compute/runtime/Tensor.h"
-
-#include <memory>
-
-namespace arm_compute
-{
-class INETensor;
-class NEWeightsReshapeKernel;
-class NELocallyConnectedMatrixMultiplyKernel;
-
-/** Basic function to compute the locally connected layer. This function calls the following NEON kernels:
- *
- * -# @ref NEWeightsReshapeKernel (executed only once for each configuration)
- * -# @ref NEIm2ColKernel
- * -# @ref NELocallyConnectedMatrixMultiplyKernel
- * -# @ref NECol2ImKernel
- */
-class NELocallyConnectedLayer : public IFunction
-{
-public:
- /** Default constructor */
- NELocallyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NELocallyConnectedLayer(const NELocallyConnectedLayer &) = delete;
- /** Prevent instances of this class from being moved (As this class contains pointers) */
- NELocallyConnectedLayer(NELocallyConnectedLayer &&) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NELocallyConnectedLayer &operator=(const NELocallyConnectedLayer &) = delete;
- /** Prevent instances of this class from being moved (As this class contains pointers) */
- NELocallyConnectedLayer &operator=(NELocallyConnectedLayer &&) = delete;
- /** Default destructor */
- ~NELocallyConnectedLayer();
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: F16, F32.
- * @param[in] weights Weights tensor. Weights are 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches]. Data type supported:Same as @p input.
- * @param[in] biases Biases tensor. Shared biases supported. Biases are 2D tensor with dimensions [OFM, num_patches]. Data type supported:Same as @p input.
- * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
- * Data types supported: Same as @p input.
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- */
- ARM_COMPUTE_DEPRECATED_REL(20.11)
- void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info);
- /** Static function to check if given info will lead to a valid configuration of @ref NELocallyConnectedLayer
- *
- * @param[in] input Input tensor info. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: F16, F32.
- * @param[in] weights Weights tensor info. Weights are 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches]. Data type supported:Same as @p input.
- * @param[in] biases Biases tensor info. Shared biases supported. Biases are 2D tensor with dimensions [OFM, num_patches]. Data type supported:Same as @p input.
- * @param[in] output Output tensor info. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
- * Data types supported: Same as @p input.
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info);
-
- // Inherited methods overridden:
- void run() override;
- void prepare() override;
-
-private:
- MemoryGroup _memory_group;
- NEIm2Col _input_im2col;
- std::unique_ptr<NEWeightsReshapeKernel> _weights_reshape_kernel;
- std::unique_ptr<NELocallyConnectedMatrixMultiplyKernel> _mm_kernel;
- NECol2Im _output_col2im;
- Tensor _input_im2col_reshaped;
- Tensor _weights_reshaped;
- Tensor _gemm_output;
- bool _is_prepared;
- const ITensor *_original_weights;
-};
-}
-#endif /* ARM_COMPUTE_NELOCALLYCONNECTEDLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NELogical.h b/arm_compute/runtime/NEON/functions/NELogical.h
index 04ffce6221..0ad23200c6 100644
--- a/arm_compute/runtime/NEON/functions/NELogical.h
+++ b/arm_compute/runtime/NEON/functions/NELogical.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020 Arm Limited.
+ * Copyright (c) 2020-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,7 +26,6 @@
#include "arm_compute/core/Error.h"
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/Macros.h"
#include <memory>
@@ -42,12 +41,27 @@ class NELogicalAnd : public IFunction
public:
/** Constructor */
NELogicalAnd();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NELogicalAnd(const NELogicalAnd &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NELogicalAnd(NELogicalAnd &&) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NELogicalAnd &operator=(const NELogicalAnd &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NELogicalAnd &operator=(NELogicalAnd &&) = delete;
/** Destructor */
~NELogicalAnd();
- ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE_INC(NELogicalAnd)
/** Initialise the kernel's inputs and output
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:-------------|:------------|
+ * |U8 |U8 |U8 |
+ *
* @param[in] input1 First tensor input. Data type supported: U8.
* @param[in] input2 Second tensor input. Data type supported: U8.
* @param[out] output Output tensor. Data type supported: U8.
@@ -77,12 +91,27 @@ class NELogicalOr : public IFunction
public:
/** Constructor */
NELogicalOr();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NELogicalOr(const NELogicalOr &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NELogicalOr(NELogicalOr &&) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NELogicalOr &operator=(const NELogicalOr &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NELogicalOr &operator=(NELogicalOr &&) = delete;
/** Destructor */
~NELogicalOr();
- ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE_INC(NELogicalOr)
/** Initialise the kernel's inputs and output
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:-------------|:------------|
+ * |U8 |U8 |U8 |
+ *
* @param[in] input1 First tensor input. Data type supported: U8.
* @param[in] input2 Second tensor input. Data type supported: U8.
* @param[out] output Output tensor. Data type supported: U8.
@@ -112,12 +141,27 @@ class NELogicalNot : public IFunction
public:
/** Constructor */
NELogicalNot();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NELogicalNot(const NELogicalNot &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NELogicalNot(NELogicalNot &&) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NELogicalNot &operator=(const NELogicalNot &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NELogicalNot &operator=(NELogicalNot &&) = delete;
/** Destructor */
~NELogicalNot();
- ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE_INC(NELogicalNot)
/** Initialise the kernel's inputs and output
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:-------------|
+ * |U8 |U8 |
+ *
* @param[in] input Input tensor. Data type supported: U8.
* @param[out] output Output tensor. Data type supported: U8.
*/
diff --git a/arm_compute/runtime/NEON/functions/NEMagnitude.h b/arm_compute/runtime/NEON/functions/NEMagnitude.h
deleted file mode 100644
index e100de2e08..0000000000
--- a/arm_compute/runtime/NEON/functions/NEMagnitude.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEMAGNITUDE_H
-#define ARM_COMPUTE_NEMAGNITUDE_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to run @ref NEMagnitudePhaseKernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class NEMagnitude : public INESimpleFunctionNoBorder
-{
-public:
- /** Constructor */
- NEMagnitude() = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEMagnitude(const NEMagnitude &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEMagnitude &operator=(const NEMagnitude &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEMagnitude(NEMagnitude &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEMagnitude &operator=(NEMagnitude &&) = delete;
- /** Default destructor */
- ~NEMagnitude();
- /** Initialise the kernel's inputs.
- *
- * @param[in] input1 First tensor input. Data type supported: S16.
- * @param[in] input2 Second tensor input. Data type supported: S16.
- * @param[out] output Output tensor. Data type supported: S16.
- * @param[in] mag_type (Optional) Magnitude calculation type. Default: L2NORM.
- */
- void configure(const ITensor *input1, const ITensor *input2, ITensor *output, MagnitudeType mag_type = MagnitudeType::L2NORM);
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEMAGNITUDE_H */
diff --git a/arm_compute/runtime/NEON/functions/NEMatMul.h b/arm_compute/runtime/NEON/functions/NEMatMul.h
new file mode 100644
index 0000000000..58dd7a6f6b
--- /dev/null
+++ b/arm_compute/runtime/NEON/functions/NEMatMul.h
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEMATMUL_H
+#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEMATMUL_H
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+/** Settings for MatMul Cpu implementation*/
+class CpuMatMulSettings
+{
+public:
+ // get fast math flag
+ bool fast_math() const
+ {
+ return _fast_math;
+ }
+ // get fixed format flag
+ bool fixed_format() const
+ {
+ return _fixed_format;
+ }
+ // Set fast math flag
+ CpuMatMulSettings &fast_math(bool fmath)
+ {
+ _fast_math = fmath;
+ return *this;
+ }
+ // Set fixed format flag
+ CpuMatMulSettings &fixed_format(bool fixed_format)
+ {
+ _fixed_format = fixed_format;
+ return *this;
+ }
+
+private:
+ bool _fast_math{false};
+ bool _fixed_format{false};
+};
+
+// Forward declarations
+class ITensor;
+class ITensorInfo;
+class MatMulInfo;
+class Status;
+
+/** Basic function to run the following operators:
+ *
+ * -# @ref cpu::CpuMatMul
+ */
+class NEMatMul : public IFunction
+{
+public:
+ /** Constructor */
+ NEMatMul();
+ /** Destructor */
+ ~NEMatMul();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEMatMul(const NEMatMul &) = delete;
+ /** Default move constructor */
+ NEMatMul(NEMatMul &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEMatMul &operator=(const NEMatMul &) = delete;
+ /** Default move assignment operator */
+ NEMatMul &operator=(NEMatMul &&) = default;
+ /** Initialize
+ *
+ * Valid data layouts:
+ * - Any
+ *
+ * Valid data type configurations:
+ * |lhs |rhs |dst |
+ * |:--------------|:------------------|:--------------|
+ * |F32 |F32 |F32 |
+ * |F16 |F16 |F16 |
+ * |BFLOAT16 |BFLOAT16 |BFLOAT16 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |QASYMM8 |QASYMM8 |QASYMM8 |
+ *
+ * @param[in] lhs Left-hand side tensor info. Data types supported: F16/F32/QASYMM8_SIGNED/QASYMM8.
+ * @param[in] rhs Right-hand side tensor info. Data types supported: same as @p lhs.
+ * @param[out] dst Output tensor to store the result of the batched matrix multiplication. Data types supported: same as @p lhs / @p rhs.
+ * @param[in] info Contains MatMul operation information described in @ref MatMulInfo.
+ * @param[in] settings Contains flags for function level settings i.e fast math
+ * @param[in] act_info (Optional) Contains activation function and lower and upper bound values for bounded activation functions.
+ */
+ void configure(ITensor *lhs,
+ ITensor *rhs,
+ ITensor *dst,
+ const MatMulInfo &info,
+ const CpuMatMulSettings &settings,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ /** Static function to check if given info will lead to a valid configuration of @ref NEMatMul
+ *
+ * @param[in] lhs Left-hand side tensor info. Data types supported: F16/F32/QASYMM8_SIGNED/QASYMM8.
+ * @param[in] rhs Right-hand side tensor info. Data types supported: same as @p lhs.
+ * @param[out] dst Output tensor info to store the result of the batched matrix multiplication. Data types supported: same as @p lhs / @p rhs.
+ * @param[in] info Contains MatMul operation information described in @ref MatMulInfo.
+ * @param[in] settings Contains flags for function level settings i.e fast math
+ * @param[in] act_info (Optional) Contains activation function and lower and upper bound values for bounded activation functions.
+ *
+ * @return Status
+ */
+ static Status validate(const ITensorInfo *lhs,
+ const ITensorInfo *rhs,
+ const ITensorInfo *dst,
+ const MatMulInfo &info,
+ const CpuMatMulSettings &settings,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
+
+ // Inherited methods overridden
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
+};
+} // namespace arm_compute
+#endif // ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEMATMUL_H
diff --git a/arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h b/arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h
index 5b5bb5cb78..e00fc4544f 100644
--- a/arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020 Arm Limited.
+ * Copyright (c) 2020-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,19 +26,18 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
+
#include <memory>
namespace arm_compute
{
class ITensor;
class ITensorInfo;
-class NEMemsetKernel;
-class NEMaxUnpoolingLayerKernel;
+class NEFill;
-/** Function to perform MaxUnpooling. This function calls the following NEON kernels:
+/** Function to perform MaxUnpooling. This function calls the following kernels:
*
- * -# @ref NEMemsetKernel
- * -# @ref NEMaxUnpoolingLayerKernel
+ * -# @ref NEFill
*/
class NEMaxUnpoolingLayer : public IFunction
{
@@ -57,6 +56,18 @@ public:
~NEMaxUnpoolingLayer();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |F16 |F16 |
+ * |F32 |F32 |
+ *
* @note Only supported pool size 2
*
* @param[in, out] input Source tensor. (Written to only when padding != 0) Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
@@ -76,14 +87,18 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, const PoolingLayerInfo &pool_info);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *indices,
+ const ITensorInfo *output,
+ const PoolingLayerInfo &pool_info);
// Inherited methods overridden:
void run() override;
private:
- std::unique_ptr<NEMemsetKernel> _memset_kernel;
- std::unique_ptr<NEMaxUnpoolingLayerKernel> _unpooling_layer_kernel;
+ std::unique_ptr<NEFill> _fill_func;
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
-}
+} // namespace arm_compute
#endif /* ARM_COMPUTE_NEMAXUNPOOLINGLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEMeanStdDev.h b/arm_compute/runtime/NEON/functions/NEMeanStdDev.h
deleted file mode 100644
index 875c3630c1..0000000000
--- a/arm_compute/runtime/NEON/functions/NEMeanStdDev.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEMEANSTDDEV_H
-#define ARM_COMPUTE_NEMEANSTDDEV_H
-
-#include "arm_compute/core/IMultiImage.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/IFunction.h"
-#include <memory>
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class NEMeanStdDevKernel;
-class NEFillBorderKernel;
-
-/** Basic function to execute mean and std deviation. This function calls the following NEON kernels:
- *
- * @ref NEMeanStdDevKernel
- *
- */
-class NEMeanStdDev : public IFunction
-{
-public:
- /** Default Constructor. */
- NEMeanStdDev();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEMeanStdDev(const NEMeanStdDev &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEMeanStdDev &operator=(const NEMeanStdDev &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEMeanStdDev(NEMeanStdDev &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEMeanStdDev &operator=(NEMeanStdDev &&) = delete;
- /** Default destructor */
- ~NEMeanStdDev();
- /** Initialise the kernel's inputs and outputs.
- *
- * @param[in, out] input Input image. Data types supported: U8. (Written to only for border filling)
- * @param[out] mean Output average pixel value.
- * @param[out] stddev (Optional) Output standard deviation of pixel values.
- */
- void configure(IImage *input, float *mean, float *stddev = nullptr);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- std::unique_ptr<NEMeanStdDevKernel> _mean_stddev_kernel; /**< Kernel that standard deviation calculation. */
- std::unique_ptr<NEFillBorderKernel> _fill_border_kernel; /**< Kernel that fills tensor's borders with zeroes. */
- uint64_t _global_sum; /**< Variable that holds the global sum among calls in order to ease reduction */
- uint64_t _global_sum_squared; /**< Variable that holds the global sum of squared values among calls in order to ease reduction */
-};
-}
-#endif /*ARM_COMPUTE_NEMEANSTDDEV_H */
diff --git a/arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h b/arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h
index 31e376191c..41aa81946b 100644
--- a/arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -50,6 +50,16 @@ public:
~NEMeanStdDevNormalizationLayer();
/** Initialise the function's input and outputs.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------|:---------|
+ * |F32 |F32 |
+ * |F16 |F16 |
+ *
* @note If the output tensor is a nullptr, the normalization will be performed in-place.
*
* @param[in, out] input Input tensor with 2 dimensions. Data types supported: F16/F32.
diff --git a/arm_compute/runtime/NEON/functions/NEMedian3x3.h b/arm_compute/runtime/NEON/functions/NEMedian3x3.h
deleted file mode 100644
index 7e1ec905c6..0000000000
--- a/arm_compute/runtime/NEON/functions/NEMedian3x3.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEMEDIAN3x3_H
-#define ARM_COMPUTE_NEMEDIAN3x3_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to execute median filter. This function calls the following NEON kernels:
- *
- * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref NEMedian3x3Kernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class NEMedian3x3 : public INESimpleFunction
-{
-public:
- /** Initialise the function's source, destinations and border mode.
- *
- * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination tensor, Data type supported: U8.
- * @param[in] border_mode Border mode to use for the convolution.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- *
- */
- void configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value = 0);
-};
-}
-#endif /*ARM_COMPUTE_NEMEDIAN3x3_H */
diff --git a/arm_compute/runtime/NEON/functions/NEMinMaxLocation.h b/arm_compute/runtime/NEON/functions/NEMinMaxLocation.h
deleted file mode 100644
index 312d1cb668..0000000000
--- a/arm_compute/runtime/NEON/functions/NEMinMaxLocation.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEMINMAXLOCATION_H
-#define ARM_COMPUTE_NEMINMAXLOCATION_H
-
-#include "arm_compute/core/IArray.h"
-#include "arm_compute/runtime/Array.h"
-#include "arm_compute/runtime/IFunction.h"
-
-#include <cstdint>
-#include <memory>
-
-namespace arm_compute
-{
-class ITensor;
-class NEMinMaxKernel;
-class NEMinMaxLocationKernel;
-using IImage = ITensor;
-
-/** Basic function to execute min and max location. This function calls the following NEON kernels:
- *
- * -# NEMinMaxKernel
- * -# NEMinMaxLocationKernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class NEMinMaxLocation : public IFunction
-{
-public:
- /** Constructor */
- NEMinMaxLocation();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEMinMaxLocation(const NEMinMaxLocation &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEMinMaxLocation &operator=(const NEMinMaxLocation &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEMinMaxLocation(NEMinMaxLocation &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEMinMaxLocation &operator=(NEMinMaxLocation &&) = delete;
- /** Default destructor */
- ~NEMinMaxLocation();
- /** Initialise the kernel's inputs and outputs.
- *
- * @param[in] input Input image. Data types supported: U8/S16/F32.
- * @param[out] min Minimum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32.
- * @param[out] max Maximum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32.
- * @param[out] min_loc (Optional) Array of minimum value locations.
- * @param[out] max_loc (Optional) Array of maximum value locations.
- * @param[out] min_count (Optional) Number of minimum value encounters.
- * @param[out] max_count (Optional) Number of maximum value encounters.
- */
- void configure(const IImage *input, void *min, void *max,
- ICoordinates2DArray *min_loc = nullptr, ICoordinates2DArray *max_loc = nullptr,
- uint32_t *min_count = nullptr, uint32_t *max_count = nullptr);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- std::unique_ptr<NEMinMaxKernel> _min_max; /**< Kernel that performs min/max */
- std::unique_ptr<NEMinMaxLocationKernel> _min_max_loc; /**< Kernel that extracts min/max locations */
-};
-}
-#endif /*ARM_COMPUTE_NEMINMAXLOCATION_H */
diff --git a/arm_compute/runtime/NEON/functions/NENonLinearFilter.h b/arm_compute/runtime/NEON/functions/NENonLinearFilter.h
deleted file mode 100644
index 8642350736..0000000000
--- a/arm_compute/runtime/NEON/functions/NENonLinearFilter.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NENONLINEARFILTER_H
-#define ARM_COMPUTE_NENONLINEARFILTER_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to execute non linear filter. This function calls the following NEON kernels:
- *
- * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref NENonLinearFilterKernel
- *
- * @note Supported mask dimensions squares of sizes 3, 5
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class NENonLinearFilter : public INESimpleFunction
-{
-public:
- /** Initialize the function's source, destination, conv and border_mode.
- *
- * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination tensor. Data type supported: U8
- * @param[in] function Non linear function to perform
- * @param[in] mask_size Mask size. Supported sizes: 3, 5
- * @param[in] pattern Mask pattern
- * @param[in] mask The given mask. Will be used only if pattern is specified to PATTERN_OTHER
- * @param[in] border_mode Strategy to use for borders.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(ITensor *input, ITensor *output, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, BorderMode border_mode,
- uint8_t constant_border_value = 0);
-};
-}
-#endif /*ARM_COMPUTE_NENONLINEARFILTER_H */
diff --git a/arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h b/arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h
deleted file mode 100644
index 5b71d52e3e..0000000000
--- a/arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NENONMAXIMASUPPRESSION3X3_H
-#define ARM_COMPUTE_NENONMAXIMASUPPRESSION3X3_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to execute non-maxima suppression over a 3x3 window. This function calls the following NEON kernels:
- *
- * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref NENonMaximaSuppression3x3Kernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class NENonMaximaSuppression3x3 : public INESimpleFunction
-{
-public:
- /** Initialise the function's source, destinations and border mode.
- *
- * @note The implementation supports just 2 border modes: UNDEFINED and CONSTANT
- * The constant values used with CONSTANT border mode is 0
- *
- * @param[in, out] input Source tensor. Data type supported: U8/F32. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination for the Non-Maxima suppressions 3x3. Data type supported: same as @p input
- * @param[in] border_mode Border mode to use for non-maxima suppression. The implementation supports just 2 border modes: UNDEFINED and CONSTANT
- *
- */
- void configure(ITensor *input, ITensor *output, BorderMode border_mode);
-};
-}
-#endif /* ARM_COMPUTE_NENONMAXIMASUPPRESSION3X3_H */
diff --git a/arm_compute/runtime/NEON/functions/NENormalizationLayer.h b/arm_compute/runtime/NEON/functions/NENormalizationLayer.h
index 6519f9b4e6..27e3fa674e 100644
--- a/arm_compute/runtime/NEON/functions/NENormalizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NENormalizationLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,9 +24,8 @@
#ifndef ARM_COMPUTE_NENORMALIZATIONLAYER_H
#define ARM_COMPUTE_NENORMALIZATIONLAYER_H
-#include "arm_compute/runtime/IFunction.h"
-
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h"
@@ -39,7 +38,7 @@ namespace arm_compute
class ITensor;
class NENormalizationLayerKernel;
-/** Basic function to compute a normalization layer. This function calls the following NEON kernels:
+/** Basic function to compute a normalization layer. This function calls the following kernels:
*
* -# @ref NEPixelWiseMultiplication
* -# @ref NEFillBorderKernel
@@ -63,6 +62,16 @@ public:
~NENormalizationLayer();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------|:---------|
+ * |F32 |F32 |
+ * |F16 |F16 |
+ *
* @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
* and an optional 4th dimension for batch of inputs. Data type supported: F16/F32. Data layouts supported: NCHW/NHWC.
* @param[out] output Destination with the same dimensions, data type, data layout and number of channels of @p input
@@ -78,16 +87,17 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const NormalizationLayerInfo &norm_info);
+ static Status
+ validate(const ITensorInfo *input, const ITensorInfo *output, const NormalizationLayerInfo &norm_info);
// Inherited methods overridden:
void run() override;
private:
- MemoryGroup _memory_group; /**< Function memory group */
- std::unique_ptr<NENormalizationLayerKernel> _norm_kernel; /**< Normalization layer kernel */
- NEPixelWiseMultiplication _multiply_f; /**< Pixel multiplication function */
- Tensor _input_squared; /**< The intermediate buffer which stores results of squaring input */
+ MemoryGroup _memory_group; /**< Function memory group */
+ std::unique_ptr<NENormalizationLayerKernel> _norm_kernel; /**< Normalization layer kernel */
+ NEPixelWiseMultiplication _multiply_f; /**< Pixel multiplication function */
+ Tensor _input_squared; /**< The intermediate buffer which stores results of squaring input */
};
-}
+} // namespace arm_compute
#endif /* ARM_COMPUTE_NENORMALIZATIONLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEOpticalFlow.h b/arm_compute/runtime/NEON/functions/NEOpticalFlow.h
deleted file mode 100644
index d1624ec68a..0000000000
--- a/arm_compute/runtime/NEON/functions/NEOpticalFlow.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEOPTICALFLOW_H
-#define ARM_COMPUTE_NEOPTICALFLOW_H
-
-#include "arm_compute/core/IArray.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/Array.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/functions/NEScharr3x3.h"
-#include "arm_compute/runtime/Tensor.h"
-
-#include <cstddef>
-#include <cstdint>
-#include <memory>
-
-namespace arm_compute
-{
-class Pyramid;
-class NELKTrackerKernel;
-
-/** Array of LK Internel Keypoints */
-using LKInternalKeypointArray = Array<NELKInternalKeypoint>;
-/** Basic function to execute optical flow. This function calls the following NEON kernels and functions:
- *
- * -# @ref NEScharr3x3
- * -# @ref NELKTrackerKernel
- *
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class NEOpticalFlow : public IFunction
-{
-public:
- /** Constructor
- *
- * @param[in] memory_manager (Optional) Memory manager.
- */
- NEOpticalFlow(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEOpticalFlow(const NEOpticalFlow &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEOpticalFlow &operator=(const NEOpticalFlow &) = delete;
- /** Default destructor */
- ~NEOpticalFlow();
- /** Initialise the function input and output
- *
- * @param[in] old_pyramid Pointer to the pyramid for the old tensor. Data type supported U8
- * @param[in] new_pyramid Pointer to the pyramid for the new tensor. Data type supported U8
- * @param[in] old_points Pointer to the IKeyPointArray storing old key points
- * @param[in] new_points_estimates Pointer to the IKeyPointArray storing new estimates key points
- * @param[out] new_points Pointer to the IKeyPointArray storing new key points
- * @param[in] termination The criteria to terminate the search of each keypoint.
- * @param[in] epsilon The error for terminating the algorithm
- * @param[in] num_iterations The maximum number of iterations before terminate the alogrithm
- * @param[in] window_dimension The size of the window on which to perform the algorithm
- * @param[in] use_initial_estimate The flag to indicate whether the initial estimated position should be used
- * @param[in] border_mode The border mode applied at scharr kernel stage
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT
- *
- */
- void configure(const Pyramid *old_pyramid, const Pyramid *new_pyramid, const IKeyPointArray *old_points, const IKeyPointArray *new_points_estimates,
- IKeyPointArray *new_points, Termination termination, float epsilon, unsigned int num_iterations, size_t window_dimension,
- bool use_initial_estimate, BorderMode border_mode, uint8_t constant_border_value = 0);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- MemoryGroup _memory_group;
- std::vector<NEScharr3x3> _func_scharr;
- std::vector<std::unique_ptr<NELKTrackerKernel>> _kernel_tracker;
- std::vector<Tensor> _scharr_gx;
- std::vector<Tensor> _scharr_gy;
- IKeyPointArray *_new_points;
- const IKeyPointArray *_new_points_estimates;
- const IKeyPointArray *_old_points;
- LKInternalKeypointArray _new_points_internal;
- LKInternalKeypointArray _old_points_internal;
- unsigned int _num_levels;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEOPTICALFLOW_H */
diff --git a/arm_compute/runtime/NEON/functions/NEPReluLayer.h b/arm_compute/runtime/NEON/functions/NEPReluLayer.h
index 358e633000..81d5fd162c 100644
--- a/arm_compute/runtime/NEON/functions/NEPReluLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEPReluLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,42 +26,15 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/NEON/INEOperator.h"
+
+#include <memory>
namespace arm_compute
{
class ITensor;
class ITensorInfo;
-namespace experimental
-{
-/** Basic function to run @ref NEArithmeticOperationKernel for PRELU
- *
- * @note The function implements an activation layer with the PRELU activation function.
- */
-class NEPRelu : public INEOperator
-{
-public:
- /** Set the input and output tensor.
- *
- * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] alpha Source alpha tensor info. Data types supported: same of @p input.
- * @param[out] output Destination tensor info. Data type supported: same as @p input
- */
- void configure(const ITensorInfo *input, const ITensorInfo *alpha, ITensorInfo *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEComparisonOperationKernel
- *
- * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] alpha Source alpha tensor info. Data types supported: same of @p input.
- * @param[in] output Destination tensor info. Data type supported: same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *alpha, const ITensorInfo *output);
-};
-} // namespace experimental
-
-/** Basic function to run @ref NEArithmeticOperationKernel for PRELU
+/** Basic function to run @ref cpu::kernels::CpuArithmeticKernel for PRELU
*
* @note The function implements an activation layer with the PRELU activation function.
*/
@@ -82,6 +55,17 @@ public:
NEPReluLayer &operator=(NEPReluLayer &&);
/** Set the input and output tensor.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |F16 |F16 |
+ * |F32 |F32 |
+ *
* @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
* @param[in] alpha Source alpha tensor. Data types supported: same of @p input.
* @param[out] output Destination tensor. Data type supported: same as @p input
diff --git a/arm_compute/runtime/NEON/functions/NEPadLayer.h b/arm_compute/runtime/NEON/functions/NEPadLayer.h
index 3fdbb0d73c..494b1c0641 100644
--- a/arm_compute/runtime/NEON/functions/NEPadLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEPadLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,26 +24,26 @@
#ifndef ARM_COMPUTE_NEPADLAYER_H
#define ARM_COMPUTE_NEPADLAYER_H
+#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h"
+#include "arm_compute/runtime/NEON/functions/NECopy.h"
#include "arm_compute/runtime/NEON/functions/NEStridedSlice.h"
#include "arm_compute/runtime/SubTensor.h"
-
-#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/Tensor.h"
+
#include <memory>
namespace arm_compute
{
-class NECopyKernel;
class NEPadLayerKernel;
-/** Basic function to pad a tensor. This function calls the following NEON functions/kernels:
+/** Basic function to pad a tensor. This function calls the following functions/kernels:
*
* - For padding mode = PaddingMode::CONSTANT:
* -# @ref NEPadLayerKernel
* - Otherwise:
- * -# @ref NECopyKernel
+ * -# @ref NECopy
* -# @ref NEStridedSlice
* -# @ref NEConcatenateLayer
*
@@ -65,6 +65,15 @@ public:
~NEPadLayer();
/** Initialize the function
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------|:---------|
+ * |All |All |
+ *
* @param[in] input Source tensor. Data types supported: All.
* @param[out] output Output tensor. Data type supported: same as @p input
* @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i]
@@ -73,7 +82,11 @@ public:
* @param[in] mode (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT,
* or reflect the input, either including the border values (SYMMETRIC) or not (REFLECT).
*/
- void configure(ITensor *input, ITensor *output, const PaddingList &padding, const PixelValue constant_value = PixelValue(), const PaddingMode mode = PaddingMode::CONSTANT);
+ void configure(ITensor *input,
+ ITensor *output,
+ const PaddingList &padding,
+ const PixelValue constant_value = PixelValue(),
+ const PaddingMode mode = PaddingMode::CONSTANT);
/** Static function to check if given info will lead to a valid configuration of @ref NEPadLayer.
*
* @param[in] input Source tensor info. Data types supported: All.
@@ -86,7 +99,11 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding, const PixelValue constant_value = PixelValue(), const PaddingMode mode = PaddingMode::CONSTANT);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *output,
+ const PaddingList &padding,
+ const PixelValue constant_value = PixelValue(),
+ const PaddingMode mode = PaddingMode::CONSTANT);
// Inherited methods overridden:
void run() override;
@@ -100,7 +117,10 @@ private:
* specifies the front and the end padding in the i-th dimension.
* @param[in] constant_value Constant value to be used for the padding
*/
- void configure_constant_mode(ITensor *input, ITensor *output, const PaddingList &padding, const PixelValue constant_value);
+ void configure_constant_mode(ITensor *input,
+ ITensor *output,
+ const PaddingList &padding,
+ const PixelValue constant_value);
/** Configure functions for when reflect or symmetric padding is used.
*
* @param[in] input Source tensor. Data types supported: All.
@@ -109,7 +129,7 @@ private:
void configure_reflect_symmetric_mode(ITensor *input, ITensor *output);
private:
- std::unique_ptr<NECopyKernel> _copy_kernel;
+ NECopy _copy_function;
std::unique_ptr<NEPadLayerKernel> _pad_kernel;
PaddingMode _mode;
PaddingList _padding;
diff --git a/arm_compute/runtime/NEON/functions/NEPermute.h b/arm_compute/runtime/NEON/functions/NEPermute.h
index ef8854b360..2cef64764d 100644
--- a/arm_compute/runtime/NEON/functions/NEPermute.h
+++ b/arm_compute/runtime/NEON/functions/NEPermute.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,9 +24,10 @@
#ifndef ARM_COMPUTE_NEPERMUTE_H
#define ARM_COMPUTE_NEPERMUTE_H
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
namespace arm_compute
{
@@ -34,11 +35,32 @@ namespace arm_compute
class ITensor;
class ITensorInfo;
-/** Basic function to run @ref NEPermuteKernel */
-class NEPermute : public INESimpleFunctionNoBorder
+/** Basic function to run @ref cpu::kernels::CpuPermuteKernel */
+class NEPermute : public IFunction
{
public:
- /** Configure the permute NEON kernel
+ /** Default Constructor */
+ NEPermute();
+ /** Default Destructor */
+ ~NEPermute();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEPermute(const NEPermute &) = delete;
+ /** Default move constructor */
+ NEPermute(NEPermute &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEPermute &operator=(const NEPermute &) = delete;
+ /** Default move assignment operator */
+ NEPermute &operator=(NEPermute &&) = default;
+ /** Configure the permute function
+ *
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:------|:------|
+ * |All |All |
*
* @note Arbitrary permutation vectors are supported with rank not greater than 4
*
@@ -58,6 +80,13 @@ public:
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PermutationVector &perm);
+
+ // Inherited methods overridden
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEPERMUTE_H */
diff --git a/arm_compute/runtime/NEON/functions/NEPhase.h b/arm_compute/runtime/NEON/functions/NEPhase.h
deleted file mode 100644
index 1202f1878d..0000000000
--- a/arm_compute/runtime/NEON/functions/NEPhase.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEPHASE_H
-#define ARM_COMPUTE_NEPHASE_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
-namespace arm_compute
-{
-class ITensor;
-class ITensorInfo;
-
-/** Basic function to run @ref NEMagnitudePhaseKernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
-*/
-class NEPhase : public INESimpleFunctionNoBorder
-{
-public:
- /** Initialise the kernel's inputs, output.
- *
- * @param[in] input1 First tensor input. Data type supported: S16.
- * @param[in] input2 Second tensor input. Data type supported: S16.
- * @param[out] output Output tensor. Data type supported: U8.
- * @param[in] phase_type (Optional) Phase calculation type. Default: SIGNED.
- */
- void configure(const ITensor *input1, const ITensor *input2, ITensor *output, PhaseType phase_type = PhaseType::SIGNED);
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEPHASE_H */
diff --git a/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h b/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h
index 91cf44ff2e..3d81bf6087 100644
--- a/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h
+++ b/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,115 +24,19 @@
#ifndef ARM_COMPUTE_NEPIXELWISEMULTIPLICATION_H
#define ARM_COMPUTE_NEPIXELWISEMULTIPLICATION_H
+#include "arm_compute/core/Rounding.h"
#include "arm_compute/core/Types.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/NEON/INEOperator.h"
+
+#include <memory>
namespace arm_compute
{
class ITensor;
class ITensorInfo;
-namespace experimental
-{
-/** Basic function to run @ref NEPixelWiseMultiplicationKernel */
-class NEPixelWiseMultiplication : public INEOperator
-{
-public:
- /** Initialise the kernel's inputs, output and convertion policy.
- *
- * Valid configurations (Input1,Input2) -> Output :
- *
- * Support: Broadcast? Scale=1/255?
- * - (U8,U8) -> U8, S16 N Y
- * - (U8,S16) -> S16 N Y
- * - (S16,U8) -> S16 N Y
- * - (S16,S16) -> S16 N Y
- * - (S32,S32) -> S32 Y N
- * - (F16,F16) -> F16 N Y
- * - (F32,F32) -> F32 Y Y
- * - (QASYMM8,QASYMM8) -> QASYMM8 Y Y
- * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED Y Y
- * - (QSYMM16,QSYMM16) -> QSYMM16, S32 N Y
- *
- * @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported.
- * For all other scale values only round to zero (implemented as round towards minus infinity) is supported.
- *
- * @param[in, out] input1 First input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/QSYMM16/F16/F32
- * This input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
- * @param[in, out] input2 Second input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/QSYMM16/F16/F32
- * This input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
- * @param[out] output Output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32/S32
- * @param[in] scale Scale to apply after multiplication.
- * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
- * If both @p input1, @p input2 and @p output are of datatype S32, scale cannot be 1/255
- * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if any of the inputs is of quantized datatype
- * @param[in] rounding_policy Rounding policy.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
- */
- void configure(ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy,
- const ActivationLayerInfo &act_info = ActivationLayerInfo());
- /** Static function to check if given info will lead to a valid configuration of @ref NEPixelWiseMultiplication
- *
- * Valid configurations (Input1,Input2) -> Output :
- *
- * Support: Broadcast? Scale=1/255?
- * - (U8,U8) -> U8, S16 N Y
- * - (U8,S16) -> S16 N Y
- * - (S16,U8) -> S16 N Y
- * - (S16,S16) -> S16 N Y
- * - (S32,S32) -> S32 Y N
- * - (F16,F16) -> F16 N Y
- * - (F32,F32) -> F32 Y Y
- * - (QASYMM8,QASYMM8) -> QASYMM8 Y Y
- * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED Y Y
- * - (QSYMM16,QSYMM16) -> QSYMM16, S32 N Y
- *
- * @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported.
- * For all other scale values only round to zero (implemented as round towards minus infinity) is supported.
- *
- * @param[in] input1 First input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/QSYMM16/F16/F32
- * @param[in] input2 Second input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/QSYMM16/F16/F32
- * @param[in] output Output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32/S32
- * @param[in] scale Scale to apply after multiplication.
- * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
- * If both @p input1, @p input2 and @p output are of datatype S32, scale cannot be 1/255
- * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if any of the inputs is of quantized datatype
- * @param[in] rounding_policy Rounding policy.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy,
- const ActivationLayerInfo &act_info = ActivationLayerInfo());
-};
-
-/** Basic function to run @ref NEComplexPixelWiseMultiplicationKernel. */
-class NEComplexPixelWiseMultiplication : public INEOperator
-{
-public:
- /** Initialise the kernel's inputs, output.
- *
- * @param[in, out] input1 An input tensor. Data types supported: F32. Number of channels supported: 2 (complex tensor).
- * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
- * @param[in, out] input2 An input tensor. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
- * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
- * @param[out] output The output tensor. Data types supported: same as @p input1. Number of channels: same as @p input1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
- */
- void configure(ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
- /** Static function to check if given info will lead to a valid configuration of @ref NEComplexPixelWiseMultiplication
- *
- * @param[in] input1 An input tensor info. Data types supported: F32. Number of channels supported: 2 (complex tensor).
- * @param[in] input2 An input tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
- * @param[in] output The output tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
- */
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
-};
-} // namespace experimental
-
-/** Basic function to run @ref NEPixelWiseMultiplicationKernel */
+/** Basic function to run @ref cpu::CpuMul */
class NEPixelWiseMultiplication : public IFunction
{
public:
@@ -143,13 +47,31 @@ public:
/** Prevent instances of this class from being copied (As this class contains pointers) */
NEPixelWiseMultiplication(const NEPixelWiseMultiplication &) = delete;
/** Default move constructor */
- NEPixelWiseMultiplication(NEPixelWiseMultiplication &&);
+ NEPixelWiseMultiplication(NEPixelWiseMultiplication &&) = default;
/** Prevent instances of this class from being copied (As this class contains pointers) */
NEPixelWiseMultiplication &operator=(const NEPixelWiseMultiplication &) = delete;
/** Default move assignment operator */
- NEPixelWiseMultiplication &operator=(NEPixelWiseMultiplication &&);
+ NEPixelWiseMultiplication &operator=(NEPixelWiseMultiplication &&) = default;
/** Initialise the kernel's inputs, output and convertion policy.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |QSYMM16 |QSYMM16 |QASYMM16 |
+ * |QSYMM16 |QSYMM16 |S32 |
+ * |U8 |U8 |U8 |
+ * |U8 |U8 |S16 |
+ * |U8 |S16 |S16 |
+ * |S16 |U8 |S16 |
+ * |S16 |S16 |S16 |
+ * |F16 |F16 |F16 |
+ * |F32 |S32 |F32 |
+ *
* @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported.
* For all other scale values only round to zero (implemented as round towards minus infinity) is supported.
*
@@ -173,7 +95,12 @@ public:
* @param[in] rounding_policy Rounding policy.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
*/
- void configure(const ITensor *input1, const ITensor *input2, ITensor *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy,
+ void configure(const ITensor *input1,
+ const ITensor *input2,
+ ITensor *output,
+ float scale,
+ ConvertPolicy overflow_policy,
+ RoundingPolicy rounding_policy,
const ActivationLayerInfo &act_info = ActivationLayerInfo());
/** Static function to check if given info will lead to a valid configuration of @ref NEPixelWiseMultiplication
*
@@ -200,7 +127,12 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy,
+ static Status validate(const ITensorInfo *input1,
+ const ITensorInfo *input2,
+ const ITensorInfo *output,
+ float scale,
+ ConvertPolicy overflow_policy,
+ RoundingPolicy rounding_policy,
const ActivationLayerInfo &act_info = ActivationLayerInfo());
// Inherited methods overridden:
@@ -211,7 +143,7 @@ private:
std::unique_ptr<Impl> _impl;
};
-/** Basic function to run @ref NEComplexPixelWiseMultiplicationKernel. */
+/** Basic function to run @ref cpu::CpuComplexMul. */
class NEComplexPixelWiseMultiplication : public IFunction
{
public:
@@ -222,11 +154,11 @@ public:
/** Prevent instances of this class from being copied (As this class contains pointers) */
NEComplexPixelWiseMultiplication(const NEComplexPixelWiseMultiplication &) = delete;
/** Default move constructor */
- NEComplexPixelWiseMultiplication(NEComplexPixelWiseMultiplication &&);
+ NEComplexPixelWiseMultiplication(NEComplexPixelWiseMultiplication &&) = default;
/** Prevent instances of this class from being copied (As this class contains pointers) */
NEComplexPixelWiseMultiplication &operator=(const NEComplexPixelWiseMultiplication &) = delete;
/** Default move assignment operator */
- NEComplexPixelWiseMultiplication &operator=(NEComplexPixelWiseMultiplication &&);
+ NEComplexPixelWiseMultiplication &operator=(NEComplexPixelWiseMultiplication &&) = default;
/** Initialise the kernel's inputs, output.
*
* @param[in, out] input1 An input tensor. Data types supported: F32. Number of channels supported: 2 (complex tensor).
@@ -236,7 +168,10 @@ public:
* @param[out] output The output tensor. Data types supported: same as @p input1. Number of channels: same as @p input1.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
*/
- void configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ void configure(ITensor *input1,
+ ITensor *input2,
+ ITensor *output,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
/** Static function to check if given info will lead to a valid configuration of @ref NEComplexPixelWiseMultiplication
*
* @param[in] input1 An input tensor info. Data types supported: F32. Number of channels supported: 2 (complex tensor).
@@ -244,7 +179,10 @@ public:
* @param[in] output The output tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
*/
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ static Status validate(const ITensorInfo *input1,
+ const ITensorInfo *input2,
+ const ITensorInfo *output,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
// Inherited methods overridden:
void run() override;
@@ -253,5 +191,5 @@ private:
struct Impl;
std::unique_ptr<Impl> _impl;
};
-}
+} // namespace arm_compute
#endif /*ARM_COMPUTE_NEPIXELWISEMULTIPLICATION_H */
diff --git a/arm_compute/runtime/NEON/functions/NEUpsampleLayer.h b/arm_compute/runtime/NEON/functions/NEPooling3dLayer.h
index 168845d203..09251f2a5f 100644
--- a/arm_compute/runtime/NEON/functions/NEUpsampleLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEPooling3dLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,65 +21,76 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_NEUPSAMPLELAYER_H
-#define ARM_COMPUTE_NEUPSAMPLELAYER_H
+#ifndef ARM_COMPUTE_NEPOOLING3DLAYER_H
+#define ARM_COMPUTE_NEPOOLING3DLAYER_H
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/NEON/NEScheduler.h"
-#include "arm_compute/runtime/Tensor.h"
#include <memory>
namespace arm_compute
{
+// Forward declarations
class ITensor;
-class NEUpsampleLayerKernel;
-
-/** Function to run upsample layer */
-class NEUpsampleLayer : public IFunction
+class ITensorInfo;
+class IMemoryManager;
+/** Basic function to simulate a pooling 3d layer with the specified pooling operation. This function calls the following kernels:
+ *
+ * -# @ref cpu::CpuPool3d
+ */
+class NEPooling3dLayer : public IFunction
{
public:
/** Constructor */
- NEUpsampleLayer();
+ NEPooling3dLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
/** Prevent instances of this class from being copied (As this class contains pointers) */
- NEUpsampleLayer(const NEUpsampleLayer &) = delete;
+ NEPooling3dLayer(const NEPooling3dLayer &) = delete;
/** Prevent instances of this class from being copied (As this class contains pointers) */
- NEUpsampleLayer &operator=(const NEUpsampleLayer &) = delete;
+ NEPooling3dLayer &operator=(const NEPooling3dLayer &) = delete;
/** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEUpsampleLayer(NEUpsampleLayer &&) = delete;
+ NEPooling3dLayer(NEPooling3dLayer &&) = delete;
/** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEUpsampleLayer &operator=(NEUpsampleLayer &&) = delete;
+ NEPooling3dLayer &operator=(NEPooling3dLayer &&) = delete;
/** Default destructor */
- ~NEUpsampleLayer();
- /** Set the input output tensors.
+ ~NEPooling3dLayer();
+ /** Set the input and output tensors.
*
- * @param[in] input Source tensor. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
- * @param[out] output Destination tensor. Data types supported: same as @p input.
- * @param[in] info Contains stride information described in @ref Size2D.
- * @param[in] policy Defines the policy to fill the intermediate pixels.
+ * Valid data layouts:
+ * - NDHWC
*
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |F16 |F16 |
+ * |F32 |F32 |
+ * |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ *
+ * @note Source tensor is padded with -inf for MAX pooling and 0 otherwise
+ *
+ * @param[in] input Source tensor. Data types supported: F16/F32/QASYMM8/QASYMM8_SIGNED.
+ * @param[out] output Destination tensor.
+ * @param[in] pool_info Contains pooling operation information described in @ref Pooling3dLayerInfo.
*/
- void configure(const ITensor *input, ITensor *output, const Size2D &info,
- const InterpolationPolicy &policy);
- /** Static function to check if given info will lead to a valid configuration of @ref NEUpsampleLayer
+ void configure(const ITensor *input, ITensor *output, const Pooling3dLayerInfo &pool_info);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEPooling3dLayer
+ *
*
- * @param[in] input Source tensor info. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
- * @param[out] output Destination tensor info. Data types supported: same as @p input.
- * @param[in] info Contains stride information described in @ref Size2D.
- * @param[in] policy Defines the policy to fill the intermediate pixels.
+ * @param[in] input Source tensor info. Data types supported: F16/F32/QASYMM8/QASYMM8_SIGNED.
+ * @param[in] output Destination tensor info.
+ * @param[in] pool_info Contains pooling operation information described in @ref Pooling3dLayerInfo.
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &info,
- const InterpolationPolicy &policy);
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Pooling3dLayerInfo &pool_info);
// Inherited methods overridden:
void run() override;
private:
- std::unique_ptr<NEUpsampleLayerKernel> _kernel;
- DataLayout _data_layout;
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
-} // arm_compute
-#endif /* ARM_COMPUTE_NEUPSAMPLELAYER_H */
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NEPOOLING3DLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEPoolingLayer.h b/arm_compute/runtime/NEON/functions/NEPoolingLayer.h
index b45290fb46..768ad0d818 100644
--- a/arm_compute/runtime/NEON/functions/NEPoolingLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEPoolingLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,28 +24,28 @@
#ifndef ARM_COMPUTE_NEPOOLINGLAYER_H
#define ARM_COMPUTE_NEPOOLINGLAYER_H
+#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IMemoryManager.h"
+#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/core/Types.h"
#include <memory>
namespace arm_compute
{
+// Forward declarations
class ITensor;
class ITensorInfo;
-class NEPoolingLayerKernel;
-class NEFillBorderKernel;
-/** Basic function to simulate a pooling layer with the specified pooling operation. This function calls the following NEON kernels:
+/** Basic function to simulate a pooling layer with the specified pooling operation. This function calls the following kernels:
*
- * -# @ref NEFillBorderKernel (executed if padding size is different from zero)
- * -# @ref NEPoolingLayerKernel
+ * -# @ref cpu::CpuPool2d
*/
class NEPoolingLayer : public IFunction
{
public:
/** Constructor */
- NEPoolingLayer();
+ NEPoolingLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
/** Prevent instances of this class from being copied (As this class contains pointers) */
NEPoolingLayer(const NEPoolingLayer &) = delete;
/** Prevent instances of this class from being copied (As this class contains pointers) */
@@ -58,7 +58,21 @@ public:
~NEPoolingLayer();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |F16 |F16 |
+ * |F32 |F32 |
+ *
* @note F16 is supported for pool sizes 2 and 3 only
+ * @note Source tensor is padded with -inf for MAX pooling and 0 otherwise
+ * Cases where pooling region is completely outside input tensor are only supported for floating point data type
*
* @param[in, out] input Source tensor. (Written to only when padding != 0) Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
* @param[out] output Destination tensor. Data types supported: Same as @p input.
@@ -77,16 +91,17 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices = nullptr);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *output,
+ const PoolingLayerInfo &pool_info,
+ const ITensorInfo *indices = nullptr);
// Inherited methods overridden:
void run() override;
private:
- std::unique_ptr<NEPoolingLayerKernel> _pooling_layer_kernel;
- std::unique_ptr<NEFillBorderKernel> _border_handler;
- bool _is_global_pooling_layer;
- DataLayout _data_layout;
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
-}
+} // namespace arm_compute
#endif /* ARM_COMPUTE_NEPOOLINGLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h b/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h
index 3cc79fa28e..858e3299af 100644
--- a/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -38,6 +38,15 @@ class NEPriorBoxLayer : public INESimpleFunctionNoBorder
public:
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------|:--------|:--------|
+ * |F32 |F32 |F32 |
+ *
* @param[in] input1 First source tensor. Data types supported: F32. Data layouts supported: NCHW/NHWC.
* @param[in] input2 Second source tensor. Data types and layouts supported: same as @p input1
* @param[out] output Destination tensor. Output dimensions are [W * H * num_priors * 4, 2]. Data type supported: same as @p input
@@ -53,7 +62,10 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const PriorBoxLayerInfo &info);
+ static Status validate(const ITensorInfo *input1,
+ const ITensorInfo *input2,
+ const ITensorInfo *output,
+ const PriorBoxLayerInfo &info);
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEPRIORBOXLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h b/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h
index fcabc1d0c4..009a4e0911 100644
--- a/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020 Arm Limited.
+ * Copyright (c) 2020-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,17 +25,18 @@
#define ARM_COMPUTE_NEQLSTMLAYER_H
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/common/LSTMParams.h"
#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
#include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h"
#include "arm_compute/runtime/NEON/functions/NECopy.h"
+#include "arm_compute/runtime/NEON/functions/NEDequantizationLayer.h"
#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h"
#include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h"
#include "arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h"
+#include "arm_compute/runtime/NEON/functions/NEQuantizationLayer.h"
#include "arm_compute/runtime/NEON/functions/NETranspose.h"
-#include "support/MemorySupport.h"
-#include "arm_compute/runtime/common/LSTMParams.h"
#include <memory>
namespace arm_compute
@@ -44,20 +45,25 @@ namespace arm_compute
class ITensor;
class ITensorInfo;
class NEQLSTMLayerNormalizationKernel;
-class NEGEMMLowpMatrixAReductionKernel;
-
+namespace cpu
+{
+namespace kernels
+{
+class CpuGemmLowpMatrixAReductionKernel;
+} // namespace kernels
+} // namespace cpu
/** Basic function to run @ref NEQLSTMLayer
*
- * This function calls the following NEON functions/kernels:
+ * This function calls the following kernels:
*
* -# @ref NEActivationLayer Activation functions (tanh and logistic)
- * -# @ref NEArithmeticAddition Elementwise addition
- * -# @ref NEArithmeticSubtractionKernel Elementwise subtraction
- * -# @ref NECopyKernel Copy kernel for copying output_state_out to output
+ * -# @ref NEArithmeticAddition Elementwise addition
+ * -# @ref NEArithmeticSubtraction Elementwise subtraction
+ * -# @ref NECopy Copy kernel for copying output_state_out to output
* -# @ref NEGEMMLowpMatrixMultiplyCore Quantized matrix multiplication core. Accumulators are 32-bit integers
- * -# @ref NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint Convert 32-bit integers into QSYMM16
- * -# @ref NEGEMMLowpMatrixAReductionKernel For precomputing effective biases to use
- * -# @ref NEPixelWiseMultiplication Elementwise multiplication
+ * -# @ref NEGEMMLowpOutputStage Convert 32-bit integers into QSYMM16
+ * -# @ref cpu::kernels::CpuGemmLowpMatrixAReductionKernel For precomputing effective biases to use
+ * -# @ref NEPixelWiseMultiplication Elementwise multiplication
* -# @ref NETranspose Transpose function for reshaping the weights
* */
class NEQLSTMLayer : public IFunction
@@ -77,6 +83,14 @@ public:
~NEQLSTMLayer();
/** Initialize function's tensors.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 - src6 |src7 -src9 |src10 |src11 |dst0 |dst1 - dst2 |
+ * |:-------------|:------------|:------------|:------|:-------------|:------|:-----------------|
+ * |QASYMM8_SIGNED|QASYMM8 |S32 |QSYMM16|QASYMM8_SIGNED|QSYMM16|QASYMM8_SIGNED |
+ *
* @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: QASYMM8_SIGNED.
* @param[in] input_to_forget_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8.
* @param[in] input_to_cell_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8.
@@ -116,12 +130,21 @@ public:
* projection_threshold (Optional) The clipping threshold for the output from the projection layer, such that values are bound within
* [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.
*/
- void configure(const ITensor *input,
- const ITensor *input_to_forget_weights, const ITensor *input_to_cell_weights, const ITensor *input_to_output_weights,
- const ITensor *recurrent_to_forget_weights, const ITensor *recurrent_to_cell_weights, const ITensor *recurrent_to_output_weights,
- const ITensor *forget_gate_bias, const ITensor *cell_bias, const ITensor *output_gate_bias,
- const ITensor *cell_state_in, ITensor *output_state_in,
- ITensor *cell_state_out, ITensor *output_state_out, ITensor *output,
+ void configure(const ITensor *input,
+ const ITensor *input_to_forget_weights,
+ const ITensor *input_to_cell_weights,
+ const ITensor *input_to_output_weights,
+ const ITensor *recurrent_to_forget_weights,
+ const ITensor *recurrent_to_cell_weights,
+ const ITensor *recurrent_to_output_weights,
+ const ITensor *forget_gate_bias,
+ const ITensor *cell_bias,
+ const ITensor *output_gate_bias,
+ const ITensor *cell_state_in,
+ ITensor *output_state_in,
+ ITensor *cell_state_out,
+ ITensor *output_state_out,
+ ITensor *output,
const LSTMParams<ITensor> &lstm_params);
/** Static function to check if given info will lead to a valid configuration of @ref NEQLSTMLayer
@@ -166,12 +189,21 @@ public:
* [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.
* @return a status
*/
- static Status validate(const ITensorInfo *input,
- const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights,
- const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights,
- const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias,
- const ITensorInfo *cell_state_in, const ITensorInfo *output_state_in,
- const ITensorInfo *cell_state_out, const ITensorInfo *output_state_out, const ITensorInfo *output,
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *input_to_forget_weights,
+ const ITensorInfo *input_to_cell_weights,
+ const ITensorInfo *input_to_output_weights,
+ const ITensorInfo *recurrent_to_forget_weights,
+ const ITensorInfo *recurrent_to_cell_weights,
+ const ITensorInfo *recurrent_to_output_weights,
+ const ITensorInfo *forget_gate_bias,
+ const ITensorInfo *cell_bias,
+ const ITensorInfo *output_gate_bias,
+ const ITensorInfo *cell_state_in,
+ const ITensorInfo *output_state_in,
+ const ITensorInfo *cell_state_out,
+ const ITensorInfo *output_state_out,
+ const ITensorInfo *output,
const LSTMParams<ITensorInfo> &lstm_params);
// Inherited methods overridden:
@@ -204,10 +236,17 @@ private:
* @param[in] mm_res_info Tensor info to be used to initialize output stage result tensor.
*
*/
- void configure_mm(NEGEMMLowpMatrixMultiplyCore &mm, NEGEMMLowpOutputStage &outstage, GEMMLowpOutputStageInfo &gemmlowp_info,
- const ITensor *mm_input, const ITensor *mm_weights, const ITensor *bias, Tensor *mm_res,
- Tensor *outstage_res, float gemmlowp_scale,
- const TensorInfo &mm_res_info, const TensorInfo &outstage_tensor_info);
+ void configure_mm(NEGEMMLowpMatrixMultiplyCore &mm,
+ NEGEMMLowpOutputStage &outstage,
+ GEMMLowpOutputStageInfo &gemmlowp_info,
+ const ITensor *mm_input,
+ const ITensor *mm_weights,
+ const ITensor *bias,
+ Tensor *mm_res,
+ Tensor *outstage_res,
+ float gemmlowp_scale,
+ const TensorInfo &mm_res_info,
+ const TensorInfo &outstage_tensor_info);
MemoryGroup _memory_group;
@@ -216,8 +255,8 @@ private:
{
static constexpr uint32_t max_dimension_supported = 2;
- ITensor *_src{ nullptr };
- ITensor *_dst{ nullptr };
+ ITensor *_src{nullptr};
+ ITensor *_dst{nullptr};
size_t _row_size{};
Window _window{};
@@ -243,70 +282,73 @@ private:
};
// Functions used
- NETranspose _transpose_input_to_forget_weights;
- NETranspose _transpose_input_to_cell_weights;
- NETranspose _transpose_input_to_output_weights;
- NETranspose _transpose_input_to_input_weights;
- NETranspose _transpose_recurrent_to_forget_weights;
- NETranspose _transpose_recurrent_to_cell_weights;
- NETranspose _transpose_recurrent_to_output_weights;
- NETranspose _transpose_recurrent_to_input_weights;
- NETranspose _transpose_projection_weights;
- std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _input_to_input_reduction;
- std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _recurrent_to_input_reduction;
- std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _input_to_forget_reduction;
- std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _recurrent_to_forget_reduction;
- std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _input_to_cell_reduction;
- std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _recurrent_to_cell_reduction;
- std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _input_to_output_reduction;
- std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _recurrent_to_output_reduction;
- std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _projection_reduction;
- NEArithmeticAddition _projection_bias_add;
- NEGEMMLowpMatrixMultiplyCore _mm_input_to_forget;
- NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_forget;
- NEPixelWiseMultiplication _pixelwise_mul_cell_to_forget;
- NEGEMMLowpOutputStage _input_to_forget_outstage;
- NEGEMMLowpOutputStage _recurrent_to_forget_outstage;
- NEGEMMLowpOutputStage _cell_to_forget_outstage;
- NEArithmeticAddition _accumulate_input_recurrent_forget;
- NEArithmeticAddition _accumulate_cell_forget;
- NEActivationLayer _forget_gate_sigmoid;
- NEGEMMLowpMatrixMultiplyCore _mm_input_to_cell;
- NEGEMMLowpOutputStage _input_to_cell_outstage;
- NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_cell;
- NEGEMMLowpOutputStage _recurrent_to_cell_outstage;
- NEArithmeticAddition _accumulate_input_recurrent_modulation;
- NEActivationLayer _cell_gate_tanh;
- NEArithmeticSubtraction _input_gate_sub;
- NEGEMMLowpMatrixMultiplyCore _mm_input_to_input;
- NEGEMMLowpOutputStage _input_to_input_outstage;
- NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_input;
- NEGEMMLowpOutputStage _recurrent_to_input_outstage;
- NEArithmeticAddition _accumulate_input_recurrent_input;
- NEPixelWiseMultiplication _pixelwise_mul_cell_to_input;
- NEGEMMLowpOutputStage _cell_to_input_outstage;
- NEArithmeticAddition _accumulate_cell_input;
- NEActivationLayer _input_gate_sigmoid;
- NEPixelWiseMultiplication _pixelwise_mul_forget_cell;
- NEPixelWiseMultiplication _pixelwise_mul_input_cell;
- NEArithmeticAddition _add_forget_cell;
- NEActivationLayer _cell_clip;
- NEGEMMLowpMatrixMultiplyCore _mm_input_to_output;
- NEGEMMLowpOutputStage _input_to_output_outstage;
- NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_output;
- NEGEMMLowpOutputStage _recurrent_to_output_outstage;
- NEArithmeticAddition _accumulate_input_recurrent_output;
- NEPixelWiseMultiplication _pixelwise_mul_cell_to_output;
- NEGEMMLowpOutputStage _cell_to_output_outstage;
- NEArithmeticAddition _accumulate_cell_to_output;
- NEActivationLayer _output_gate_sigmoid;
- NEActivationLayer _hidden_tanh;
- NEPixelWiseMultiplication _pixelwise_mul_hidden;
- NEGEMMLowpOutputStage _hidden_outstage;
- NEGEMMLowpMatrixMultiplyCore _mm_projection;
- NEGEMMLowpOutputStage _projection_outstage;
- NEArithmeticAddition _accumulate_projection;
- NEActivationLayer _projection_clip;
+
+ NEDequantizationLayer _dequantize_input_to_forget_weights;
+ NEQuantizationLayer _quantize_input_to_forget_weights;
+ NETranspose _transpose_input_to_forget_weights;
+ NETranspose _transpose_input_to_cell_weights;
+ NETranspose _transpose_input_to_output_weights;
+ NETranspose _transpose_input_to_input_weights;
+ NETranspose _transpose_recurrent_to_forget_weights;
+ NETranspose _transpose_recurrent_to_cell_weights;
+ NETranspose _transpose_recurrent_to_output_weights;
+ NETranspose _transpose_recurrent_to_input_weights;
+ NETranspose _transpose_projection_weights;
+ std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _input_to_input_reduction;
+ std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _recurrent_to_input_reduction;
+ std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _input_to_forget_reduction;
+ std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _recurrent_to_forget_reduction;
+ std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _input_to_cell_reduction;
+ std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _recurrent_to_cell_reduction;
+ std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _input_to_output_reduction;
+ std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _recurrent_to_output_reduction;
+ std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _projection_reduction;
+ NEArithmeticAddition _projection_bias_add;
+ NEGEMMLowpMatrixMultiplyCore _mm_input_to_forget;
+ NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_forget;
+ NEPixelWiseMultiplication _pixelwise_mul_cell_to_forget;
+ NEGEMMLowpOutputStage _input_to_forget_outstage;
+ NEGEMMLowpOutputStage _recurrent_to_forget_outstage;
+ NEGEMMLowpOutputStage _cell_to_forget_outstage;
+ NEArithmeticAddition _accumulate_input_recurrent_forget;
+ NEArithmeticAddition _accumulate_cell_forget;
+ NEActivationLayer _forget_gate_sigmoid;
+ NEGEMMLowpMatrixMultiplyCore _mm_input_to_cell;
+ NEGEMMLowpOutputStage _input_to_cell_outstage;
+ NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_cell;
+ NEGEMMLowpOutputStage _recurrent_to_cell_outstage;
+ NEArithmeticAddition _accumulate_input_recurrent_modulation;
+ NEActivationLayer _cell_gate_tanh;
+ NEArithmeticSubtraction _input_gate_sub;
+ NEGEMMLowpMatrixMultiplyCore _mm_input_to_input;
+ NEGEMMLowpOutputStage _input_to_input_outstage;
+ NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_input;
+ NEGEMMLowpOutputStage _recurrent_to_input_outstage;
+ NEArithmeticAddition _accumulate_input_recurrent_input;
+ NEPixelWiseMultiplication _pixelwise_mul_cell_to_input;
+ NEGEMMLowpOutputStage _cell_to_input_outstage;
+ NEArithmeticAddition _accumulate_cell_input;
+ NEActivationLayer _input_gate_sigmoid;
+ NEPixelWiseMultiplication _pixelwise_mul_forget_cell;
+ NEPixelWiseMultiplication _pixelwise_mul_input_cell;
+ NEArithmeticAddition _add_forget_cell;
+ NEActivationLayer _cell_clip;
+ NEGEMMLowpMatrixMultiplyCore _mm_input_to_output;
+ NEGEMMLowpOutputStage _input_to_output_outstage;
+ NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_output;
+ NEGEMMLowpOutputStage _recurrent_to_output_outstage;
+ NEArithmeticAddition _accumulate_input_recurrent_output;
+ NEPixelWiseMultiplication _pixelwise_mul_cell_to_output;
+ NEGEMMLowpOutputStage _cell_to_output_outstage;
+ NEArithmeticAddition _accumulate_cell_to_output;
+ NEActivationLayer _output_gate_sigmoid;
+ NEActivationLayer _hidden_tanh;
+ NEPixelWiseMultiplication _pixelwise_mul_hidden;
+ NEGEMMLowpOutputStage _hidden_outstage;
+ NEGEMMLowpMatrixMultiplyCore _mm_projection;
+ NEGEMMLowpOutputStage _projection_outstage;
+ NEArithmeticAddition _accumulate_projection;
+ NEActivationLayer _projection_clip;
TensorCopyKernel _projection_bias_copy;
TensorCopyKernel _projection_output_to_accumulate_copy;
@@ -318,19 +360,16 @@ private:
NECopy _copy_output;
// Tensor pointers
- const ITensor *_input_to_input_weights
- {
- nullptr
- };
- const ITensor *_recurrent_to_input_weights{ nullptr };
- const ITensor *_projection_bias{ nullptr };
- const ITensor *_input_to_forget_weights{ nullptr };
- const ITensor *_input_to_cell_weights{ nullptr };
- const ITensor *_input_to_output_weights{ nullptr };
- const ITensor *_recurrent_to_forget_weights{ nullptr };
- const ITensor *_recurrent_to_cell_weights{ nullptr };
- const ITensor *_recurrent_to_output_weights{ nullptr };
- const ITensor *_projection_weights{ nullptr };
+ const ITensor *_input_to_input_weights{nullptr};
+ const ITensor *_recurrent_to_input_weights{nullptr};
+ const ITensor *_projection_bias{nullptr};
+ const ITensor *_input_to_forget_weights{nullptr};
+ const ITensor *_input_to_cell_weights{nullptr};
+ const ITensor *_input_to_output_weights{nullptr};
+ const ITensor *_recurrent_to_forget_weights{nullptr};
+ const ITensor *_recurrent_to_cell_weights{nullptr};
+ const ITensor *_recurrent_to_output_weights{nullptr};
+ const ITensor *_projection_weights{nullptr};
std::array<const ITensor *, _layer_norm_count> _layer_norm_weights{};
std::array<const ITensor *, _layer_norm_count> _layer_norm_bias{};
@@ -365,63 +404,66 @@ private:
return _layer_norms[getGateIndex(g)];
}
- void configure_layer_norm(LayerNormGate g, const ITensor *in);
+ void configure_layer_norm(LayerNormGate g, const ITensor *in);
static Status validate_layer_norm(const ITensorInfo &in, const ITensorInfo &weight, const ITensorInfo &bias);
// Temporary tensors
- Tensor _input_to_forget_weights_transposed{ nullptr };
- Tensor _input_to_cell_weights_transposed{ nullptr };
- Tensor _input_to_output_weights_transposed{ nullptr };
- Tensor _input_to_input_weights_transposed{ nullptr };
- Tensor _recurrent_to_forget_weights_transposed{ nullptr };
- Tensor _recurrent_to_cell_weights_transposed{ nullptr };
- Tensor _recurrent_to_output_weights_transposed{ nullptr };
- Tensor _recurrent_to_input_weights_transposed{ nullptr };
- Tensor _projection_weights_transposed{ nullptr };
- Tensor _input_to_input_eff_bias{ nullptr };
- Tensor _recurrent_to_input_eff_bias{ nullptr };
- Tensor _input_to_forget_eff_bias{ nullptr };
- Tensor _recurrent_to_forget_eff_bias{ nullptr };
- Tensor _input_to_cell_eff_bias{ nullptr };
- Tensor _recurrent_to_cell_eff_bias{ nullptr };
- Tensor _input_to_output_eff_bias{ nullptr };
- Tensor _recurrent_to_output_eff_bias{ nullptr };
- Tensor _projection_reduction_res{ nullptr };
- Tensor _projection_eff_bias{ nullptr };
- Tensor _mm_input_to_forget_res{ nullptr };
- Tensor _mm_recurrent_to_forget_res{ nullptr };
- Tensor _mul_cell_to_forget_res{ nullptr };
- Tensor _input_to_forget_outstage_res{ nullptr };
- Tensor _cell_to_forget_outstage_res{ nullptr };
- Tensor _recurrent_to_forget_outstage_res{ nullptr };
- Tensor _forget_gate{ nullptr };
- Tensor _mm_input_to_cell_res{ nullptr };
- Tensor _input_to_cell_outstage_res{ nullptr };
- Tensor _mm_recurrent_to_cell_res{ nullptr };
- Tensor _recurrent_to_cell_outstage_res{ nullptr };
- Tensor _cell_gate{ nullptr };
- Tensor _mul_input_cell_res{ nullptr };
- Tensor _mm_input_to_input_res{ nullptr };
- Tensor _input_to_input_outstage_res{ nullptr };
- Tensor _mm_recurrent_to_input_res{ nullptr };
- Tensor _mul_cell_to_input_res{ nullptr };
- Tensor _cell_to_input_outstage_res{ nullptr };
- Tensor _recurrent_to_input_outstage_res{ nullptr };
- Tensor _input_gate{ nullptr };
- Tensor _mm_input_to_output_res{ nullptr };
- Tensor _input_to_output_outstage_res{ nullptr };
- Tensor _mm_recurrent_to_output_res{ nullptr };
- Tensor _mul_cell_to_output_res{ nullptr };
- Tensor _cell_to_output_outstage_res{ nullptr };
- Tensor _recurrent_to_output_outstage_res{ nullptr };
- Tensor _output_gate{ nullptr };
- Tensor _hidden_mul_res{ nullptr };
- Tensor _hidden_gate{ nullptr };
- Tensor _mm_projection_res{ nullptr };
- Tensor _projection_outstage_res{ nullptr };
- Tensor _projection_out_res{ nullptr };
- Tensor _projection_accumulate_res{ nullptr };
- Tensor _ones{ nullptr };
+ Tensor _input_to_forget_weights_f32{nullptr};
+ Tensor _input_to_forget_weights_symm8{nullptr};
+
+ Tensor _input_to_forget_weights_transposed{nullptr};
+ Tensor _input_to_cell_weights_transposed{nullptr};
+ Tensor _input_to_output_weights_transposed{nullptr};
+ Tensor _input_to_input_weights_transposed{nullptr};
+ Tensor _recurrent_to_forget_weights_transposed{nullptr};
+ Tensor _recurrent_to_cell_weights_transposed{nullptr};
+ Tensor _recurrent_to_output_weights_transposed{nullptr};
+ Tensor _recurrent_to_input_weights_transposed{nullptr};
+ Tensor _projection_weights_transposed{nullptr};
+ Tensor _input_to_input_eff_bias{nullptr};
+ Tensor _recurrent_to_input_eff_bias{nullptr};
+ Tensor _input_to_forget_eff_bias{nullptr};
+ Tensor _recurrent_to_forget_eff_bias{nullptr};
+ Tensor _input_to_cell_eff_bias{nullptr};
+ Tensor _recurrent_to_cell_eff_bias{nullptr};
+ Tensor _input_to_output_eff_bias{nullptr};
+ Tensor _recurrent_to_output_eff_bias{nullptr};
+ Tensor _projection_reduction_res{nullptr};
+ Tensor _projection_eff_bias{nullptr};
+ Tensor _mm_input_to_forget_res{nullptr};
+ Tensor _mm_recurrent_to_forget_res{nullptr};
+ Tensor _mul_cell_to_forget_res{nullptr};
+ Tensor _input_to_forget_outstage_res{nullptr};
+ Tensor _cell_to_forget_outstage_res{nullptr};
+ Tensor _recurrent_to_forget_outstage_res{nullptr};
+ Tensor _forget_gate{nullptr};
+ Tensor _mm_input_to_cell_res{nullptr};
+ Tensor _input_to_cell_outstage_res{nullptr};
+ Tensor _mm_recurrent_to_cell_res{nullptr};
+ Tensor _recurrent_to_cell_outstage_res{nullptr};
+ Tensor _cell_gate{nullptr};
+ Tensor _mul_input_cell_res{nullptr};
+ Tensor _mm_input_to_input_res{nullptr};
+ Tensor _input_to_input_outstage_res{nullptr};
+ Tensor _mm_recurrent_to_input_res{nullptr};
+ Tensor _mul_cell_to_input_res{nullptr};
+ Tensor _cell_to_input_outstage_res{nullptr};
+ Tensor _recurrent_to_input_outstage_res{nullptr};
+ Tensor _input_gate{nullptr};
+ Tensor _mm_input_to_output_res{nullptr};
+ Tensor _input_to_output_outstage_res{nullptr};
+ Tensor _mm_recurrent_to_output_res{nullptr};
+ Tensor _mul_cell_to_output_res{nullptr};
+ Tensor _cell_to_output_outstage_res{nullptr};
+ Tensor _recurrent_to_output_outstage_res{nullptr};
+ Tensor _output_gate{nullptr};
+ Tensor _hidden_mul_res{nullptr};
+ Tensor _hidden_gate{nullptr};
+ Tensor _mm_projection_res{nullptr};
+ Tensor _projection_outstage_res{nullptr};
+ Tensor _projection_out_res{nullptr};
+ Tensor _projection_accumulate_res{nullptr};
+ Tensor _ones{nullptr};
std::array<Tensor, _layer_norm_count> _layer_norm_output{};
inline Tensor &get_layer_norm_output(LayerNormGate g)
@@ -429,14 +471,15 @@ private:
return _layer_norm_output[getGateIndex(g)];
}
- bool _is_prepared{ false };
- bool _has_cifg{ false };
- bool _has_cell_clipping{ false };
- bool _has_projection{ false };
- bool _has_projection_clipping{ false };
- bool _has_peephole{ false };
- bool _has_layer_norm{ false };
- bool _projection_tensor_copy_required{ false };
+ bool _is_prepared{false};
+ bool _has_cifg{false};
+ bool _has_cell_clipping{false};
+ bool _has_projection{false};
+ bool _has_projection_clipping{false};
+ bool _has_peephole{false};
+ bool _has_layer_norm{false};
+ bool _projection_tensor_copy_required{false};
+ bool _convert_input_to_forget_weights_to_qsymm8{false};
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEQLSTMLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h b/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h
index 36302f4741..7bf97e28a5 100644
--- a/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,28 +24,45 @@
#ifndef ARM_COMPUTE_NEQUANTIZATIONLAYER_H
#define ARM_COMPUTE_NEQUANTIZATIONLAYER_H
+#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IRuntimeContext.h"
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
-#include "arm_compute/core/Types.h"
+#include <memory>
namespace arm_compute
{
class ITensor;
class ITensorInfo;
-/** Basic function to simulate a quantization layer. This function calls the following NEON kernels:
- *
- *
- * -# @ref NEQuantizationLayerKernel
- *
- */
-class NEQuantizationLayer : public INESimpleFunctionNoBorder
+/** Basic function to run a quantization layer using @ref cpu::CpuQuantize */
+class NEQuantizationLayer : public IFunction
{
public:
+ NEQuantizationLayer();
+ /** Default Destructor */
+ ~NEQuantizationLayer();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEQuantizationLayer(const NEQuantizationLayer &) = delete;
+ /** Default move constructor */
+ NEQuantizationLayer(NEQuantizationLayer &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEQuantizationLayer &operator=(const NEQuantizationLayer &) = delete;
+ /** Default move assignment operator */
+ NEQuantizationLayer &operator=(NEQuantizationLayer &&) = default;
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:------------------|:--------------------------------------|
+ * |QASYMM8 |QASYMM8, QASYMM8_SIGNED, QASYMM16 |
+ * |QASYMM8_SIGNED |QASYMM8, QASYMM8_SIGNED, QASYMM16 |
+ * |F16 |QASYMM8, QASYMM8_SIGNED, QASYMM16 |
+ * |F32 |QASYMM8, QASYMM8_SIGNED, QASYMM16 |
+ *
* @param[in] input Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
* @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16
*/
@@ -58,6 +75,13 @@ public:
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEQUANTIZATIONLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NERNNLayer.h b/arm_compute/runtime/NEON/functions/NERNNLayer.h
index c42b303a89..af7f464ac9 100644
--- a/arm_compute/runtime/NEON/functions/NERNNLayer.h
+++ b/arm_compute/runtime/NEON/functions/NERNNLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -27,6 +27,7 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
+#include "arm_compute/runtime/NEON/functions/NECopy.h"
#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h"
#include "arm_compute/runtime/NEON/functions/NEGEMM.h"
@@ -34,7 +35,6 @@ namespace arm_compute
{
// Forward declarations
class ITensor;
-class NECopyKernel;
/** Basic function to run @ref NERNNLayer */
class NERNNLayer : public IFunction
@@ -54,6 +54,16 @@ public:
~NERNNLayer();
/** Initialize the function
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |src3 |dst0 |dst1 |
+ * |:------|:------|:------|:------|:------|:------|
+ * |F16 |F16 |F16 |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |F32 |F32 |F32 |
+ *
* @param[in] input Input is a 2-D tensor of shape [input_size, batch_size]. Data types supported: F16/F32
* @param[in] weights Weights tensor of shape [input_size, num_units] that multiplies the input. Data types supported: Same as @p input
* @param[in] recurrent_weights Weights tensor of shape [num_units, num_units] that multiplies the current 'state'. Data types supported: Same as @p input
@@ -62,7 +72,13 @@ public:
* @param[in,out] hidden_state Output tensor of shape [num_units, batch_size]. Data types supported: Same as @p input
* @param[in] info Activation layer parameter.
*/
- void configure(const ITensor *input, const ITensor *weights, const ITensor *recurrent_weights, const ITensor *bias, ITensor *hidden_state, ITensor *output, ActivationLayerInfo &info);
+ void configure(const ITensor *input,
+ const ITensor *weights,
+ const ITensor *recurrent_weights,
+ const ITensor *bias,
+ ITensor *hidden_state,
+ ITensor *output,
+ ActivationLayerInfo &info);
/** Initialize the function
*
* @param[in] input Input is a 2-D tensor of shape [input_size, batch_size]. Data types supported: F16/F32
@@ -75,7 +91,12 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *recurrent_weights, const ITensorInfo *bias, const ITensorInfo *hidden_state, const ITensorInfo *output,
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *recurrent_weights,
+ const ITensorInfo *bias,
+ const ITensorInfo *hidden_state,
+ const ITensorInfo *output,
const ActivationLayerInfo &info);
// Inherited methods overridden:
@@ -83,16 +104,16 @@ public:
void prepare() override;
private:
- MemoryGroup _memory_group;
- NEGEMM _gemm_state_f;
- NEArithmeticAddition _add_f;
- NEActivationLayer _activation;
- NEFullyConnectedLayer _fully_connected;
- std::unique_ptr<NECopyKernel> _copy_kernel;
- Tensor _fully_connected_out;
- Tensor _gemm_output;
- Tensor _add_output;
- bool _is_prepared;
+ MemoryGroup _memory_group;
+ NEGEMM _gemm_state_f;
+ NEArithmeticAddition _add_f;
+ NEActivationLayer _activation;
+ NEFullyConnectedLayer _fully_connected;
+ NECopy _copy_f;
+ Tensor _fully_connected_out;
+ Tensor _gemm_output;
+ Tensor _add_output;
+ bool _is_prepared;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NERNNLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEROIAlignLayer.h b/arm_compute/runtime/NEON/functions/NEROIAlignLayer.h
index ea3be18932..b06ebe899d 100644
--- a/arm_compute/runtime/NEON/functions/NEROIAlignLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEROIAlignLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -32,21 +32,27 @@ namespace arm_compute
class ITensor;
class ITensorInfo;
-/** Basic function to run @ref NEROIAlignLayerKernel.
- *
- * This function calls the following NEON kernels:
- * -# @ref NEROIAlignLayerKernel
- *
- */
+/** Basic function to run @ref NEROIAlignLayerKernel. */
class NEROIAlignLayer : public INESimpleFunctionNoBorder
{
public:
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:--------------|:--------------|
+ * |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |
+ * |QASYMM8 |QASYMM16 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM16 |QASYMM8_SIGNED |
+ *
* @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
* @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner
* as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ].
- * Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8, otherwise same as @p input
+ * Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8/QASYMM8_SIGNED, otherwise same as @p input
* @param[out] output Destination tensor. Data types supported: Same as @p input.
* @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo.
*
@@ -59,7 +65,7 @@ public:
/** Static function to check if given info will lead to a valid configuration of @ref NEROIAlignLayerKernel
*
* @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] rois ROIs tensor info. Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8,
+ * @param[in] rois ROIs tensor info. Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8/QASYMM8_SIGNED,
* otherwise same as @p input
* @param[in] output Destination tensor info. Data types supported: Same as @p input.
* @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo.
@@ -71,7 +77,10 @@ public:
*
* @return a Status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *rois, ITensorInfo *output, const ROIPoolingLayerInfo &pool_info);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *rois,
+ ITensorInfo *output,
+ const ROIPoolingLayerInfo &pool_info);
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEROIALIGNLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h b/arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h
index 0b9b4f75fc..929111ad4b 100644
--- a/arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,23 +24,19 @@
#ifndef ARM_COMPUTE_NEROIPOOLINGLAYER_H
#define ARM_COMPUTE_NEROIPOOLINGLAYER_H
+#include "arm_compute/core/IArray.h"
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/IArray.h"
#include <memory>
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
class NEROIPoolingLayerKernel;
class ROIPoolingLayerInfo;
-/** Basic function to run @ref NEROIPoolingLayerKernel.
- *
- * This function calls the following NEON kernels:
- * -# @ref NEROIPoolingLayerKernel
- *
- */
+/** Basic function to run @ref NEROIPoolingLayerKernel. */
class NEROIPoolingLayer : public IFunction
{
public:
@@ -58,7 +54,16 @@ public:
~NEROIPoolingLayer();
/** Set the input and output tensors.
*
- * @param[in] input Source tensor. Data types supported: F32.
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:--------------|:--------------|
+ * |F32 |U16 |F32 |
+ * |QASYMM8 |U16 |QASYMM8 |
+ *
+ * @param[in] input Source tensor. Data types supported: QASYMM8/F32
* @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner
* as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. Data types supported: U16
* @param[out] output Destination tensor. Data types supported: Same as @p input.
@@ -69,11 +74,30 @@ public:
* @note The z dimensions of @p output tensor and @p input tensor must be the same.
* @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
*/
- void configure(const ITensor *input, const ITensor *rois, ITensor *output, const ROIPoolingLayerInfo &pool_info);
+ void
+ configure(const ITensor *input, const ITensor *rois, const ITensor *output, const ROIPoolingLayerInfo &pool_info);
// Inherited methods overridden:
void run() override;
+ /** Static function to check if given info will lead to a valid configuration of @ref NEROIPoolingLayerKernel
+ *
+ * @param[in] input Source tensor info. Data types supported: QASYMM8/F32.
+ * @param[in] rois TensorInfo for rois tensor which is a 2D tensor of size [5,N] (where 5 is the number ROIs). Data types supported: U16
+ * @param[in] output Destination tensor info. Data types supported: Same as @p input.
+ * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo.
+ *
+ * @note The x and y dimensions of @p output tensor must be the same as that specified by @p pool_info 's pooled
+ * width and pooled height.
+ * @note The z dimensions of @p output tensor and @p input tensor must be the same.
+ * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
+ * @return a Status
+ */
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *rois,
+ const ITensorInfo *output,
+ const ROIPoolingLayerInfo &pool_info);
+
private:
std::unique_ptr<NEROIPoolingLayerKernel> _roi_kernel;
};
diff --git a/arm_compute/runtime/NEON/functions/NERange.h b/arm_compute/runtime/NEON/functions/NERange.h
index 28976001d7..609456a4ef 100644
--- a/arm_compute/runtime/NEON/functions/NERange.h
+++ b/arm_compute/runtime/NEON/functions/NERange.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,6 +26,7 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
+
#include <memory>
namespace arm_compute
@@ -57,6 +58,21 @@ public:
~NERange();
/** Initialize the kernel's start, end, step and output tensor.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |dst |
+ * |:---------|
+ * |U8 |
+ * |S8 |
+ * |U16 |
+ * |S16 |
+ * |U32 |
+ * |S32 |
+ * |F16 |
+ * |F32 |
+ *
* @param[out] output Output tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
* @param[in] start The starting value of the sequence.
* @param[in] end The ending (not including) value of the sequence.
diff --git a/arm_compute/runtime/NEON/functions/NEReduceMean.h b/arm_compute/runtime/NEON/functions/NEReduceMean.h
index 89cd09812b..5b8d8cdf2b 100644
--- a/arm_compute/runtime/NEON/functions/NEReduceMean.h
+++ b/arm_compute/runtime/NEON/functions/NEReduceMean.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,12 +24,9 @@
#ifndef ARM_COMPUTE_NEON_REDUCE_MEAN_H
#define ARM_COMPUTE_NEON_REDUCE_MEAN_H
-#include "arm_compute/runtime/IFunction.h"
-
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/functions/NEDequantizationLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEQuantizationLayer.h"
#include "arm_compute/runtime/NEON/functions/NEReductionOperation.h"
#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h"
#include "arm_compute/runtime/Tensor.h"
@@ -54,6 +51,17 @@ public:
~NEReduceMean();
/** Configure kernel
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |F16 |F16 |
+ * |F32 |F32 |
+ *
* @note Supported tensor rank: up to 4
*
* @param[in] input Source tensor. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32
@@ -72,7 +80,8 @@ public:
*
* @return A status
*/
- static Status validate(const ITensorInfo *input, const Coordinates &reduction_axis, bool keep_dims, const ITensorInfo *output);
+ static Status
+ validate(const ITensorInfo *input, const Coordinates &reduction_axis, bool keep_dims, const ITensorInfo *output);
// Inherited methods overridden:
void run() override;
@@ -82,13 +91,8 @@ private:
std::vector<NEReductionOperation> _reduction_kernels;
std::vector<Tensor> _reduced_outs;
NEReshapeLayer _reshape;
- NEDequantizationLayer _dequant;
- NEQuantizationLayer _requant;
int _reduction_ops;
bool _keep_dims;
- bool _do_requant;
- Tensor _input_no_quant;
- Tensor _output_no_quant;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEON_REDUCE_MEAN_H */
diff --git a/arm_compute/runtime/NEON/functions/NEReductionOperation.h b/arm_compute/runtime/NEON/functions/NEReductionOperation.h
index 8186e2e355..f5391a6d0e 100644
--- a/arm_compute/runtime/NEON/functions/NEReductionOperation.h
+++ b/arm_compute/runtime/NEON/functions/NEReductionOperation.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,9 +25,9 @@
#define ARM_COMPUTE_NEREDUCTIONOPERATION_H
#include "arm_compute/runtime/IFunction.h"
-
#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h"
#include "arm_compute/runtime/Tensor.h"
+
#include <memory>
namespace arm_compute
@@ -35,7 +35,7 @@ namespace arm_compute
class ITensor;
class NEReductionOperationKernel;
-/** Basic function to simulate a reduction operation. This function calls the following NEON kernels:
+/** Basic function to simulate a reduction operation. This function calls the following kernels:
*
* -# @ref NEReshapeLayer
* -# @ref NEReductionOperationKernel
@@ -58,7 +58,19 @@ public:
~NEReductionOperation();
/** Set the input and output tensors.
*
- * @param[in, out] input Source tensor. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32/S32. Data layouts supported: NCHW. (Written to only for border_size != 0)
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |F16 |F16 |
+ * |F32 |F32 |
+ * |S32 |S32 |
+ *
+ * @param[in, out] input Source tensor. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32/S32. (Written to only for border_size != 0)
* @param[out] output Destination tensor. Data types and data layouts supported: same as @p input.
* @param[in] axis Dimension along which to reduce. Supported reduction axis : 0
* @param[in] op Reduction operation to perform.
@@ -68,7 +80,7 @@ public:
/** Static function to check if given info will lead to a valid configuration of @ref NEReductionOperation.
*
- * @param[in] input Source tensor info. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32/S32. Data layouts supported: NCHW.
+ * @param[in] input Source tensor info. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32/S32.
* @param[in] output Destination tensor info. Data types and data layouts supported: same as @p input.
* @param[in] axis Dimension along which to reduce. Supported reduction axis : 0
* @param[in] op Reduction operation to perform.
@@ -76,7 +88,11 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op, bool keep_dims = true);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *output,
+ unsigned int axis,
+ ReductionOperation op,
+ bool keep_dims = true);
// Inherited methods overridden:
void run() override;
diff --git a/arm_compute/runtime/NEON/functions/NERemap.h b/arm_compute/runtime/NEON/functions/NERemap.h
deleted file mode 100644
index 86f366a697..0000000000
--- a/arm_compute/runtime/NEON/functions/NERemap.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEREMAP_H
-#define ARM_COMPUTE_NEREMAP_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
-#include "arm_compute/runtime/Tensor.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to execute remap. This function calls the following NEON kernels:
- *
- * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref NERemapKernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class NERemap : public INESimpleFunction
-{
-public:
- /** Initialise the function's sources, destination, interpolation policy and border mode.
- *
- * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[in] map_x Map for X coordinates. Data type supported: F32.
- * @param[in] map_y Map for Y coordinates. Data type supported: F32.
- * @param[out] output Output tensor. Data type supported: U8.
- * @param[in] policy Interpolation policy to use. Only NEAREST and BILINEAR are supported.
- * @param[in] border_mode Border mode to use on the input tensor.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- *
- */
- void configure(ITensor *input, const ITensor *map_x, const ITensor *map_y, ITensor *output,
- InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0);
-};
-}
-#endif /*ARM_COMPUTE_NEREMAP_H */
diff --git a/arm_compute/runtime/NEON/functions/NEReorderLayer.h b/arm_compute/runtime/NEON/functions/NEReorderLayer.h
new file mode 100644
index 0000000000..e3fa7b9c16
--- /dev/null
+++ b/arm_compute/runtime/NEON/functions/NEReorderLayer.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#if defined(__aarch64__)
+
+#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEREORDERLAYER
+#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEREORDERLAYER
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
+
+namespace arm_compute
+{
+class ITensor;
+class ITensorInfo;
+class NEReorderKernel;
+/** Function to compute blocked reorder. */
+class NEReorderLayer : public IFunction
+{
+public:
+ /** Default constructor */
+ NEReorderLayer();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEReorderLayer(const NEReorderLayer &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEReorderLayer &operator=(const NEReorderLayer &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEReorderLayer(NEReorderLayer &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEReorderLayer &operator=(NEReorderLayer &&) = delete;
+ /** Default destructor */
+ ~NEReorderLayer();
+ /** Set the input and output tensors.
+ *
+ * Valid data layouts:
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------|:---------|
+ * |F32 |F32 |
+ *
+ * @param[in] input Source tensor. Data type supported: F32. Data layouts supported: NCHW.
+ * @param[out] output Destination with the same dimensions, data type, data layout as @p input
+ * except last dimension of data layout which needs to be multiple of blocking parameter ksize
+ * @param[in] input_wf WeightFormat of input.
+ * @param[in] output_wf WeightFormat of output.
+ */
+ void configure(const ITensor *input,
+ ITensor *output,
+ arm_compute::WeightFormat input_wf,
+ arm_compute::WeightFormat output_wf);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref NEReorderLayer
+ *
+ * Similar to @ref NEReorderLayer::configure()
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *output,
+ arm_compute::WeightFormat input_wf,
+ arm_compute::WeightFormat output_wf);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ std::unique_ptr<NEReorderKernel> _reorder_kernel; /**< Reorder layer kernel */
+};
+} // namespace arm_compute
+#endif /* ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEREORDERLAYER */
+
+#endif // defined(__aarch64__)
diff --git a/arm_compute/runtime/NEON/functions/NEReorgLayer.h b/arm_compute/runtime/NEON/functions/NEReorgLayer.h
index f76d1d252c..0a7d824d10 100644
--- a/arm_compute/runtime/NEON/functions/NEReorgLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEReorgLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -39,6 +39,15 @@ class NEReorgLayer : public INESimpleFunctionNoBorder
public:
/** Initialise the kernel's inputs and outputs
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |All |All |
+ *
* @param[in] input First tensor input. Data type supported: All
* @param[out] output Output tensor. Data type supported: Same as @p input
* @param[in] stride Stride to be used during data re-organization
diff --git a/arm_compute/runtime/NEON/functions/NEReshapeLayer.h b/arm_compute/runtime/NEON/functions/NEReshapeLayer.h
index 641a96e0f9..3e6e33f797 100644
--- a/arm_compute/runtime/NEON/functions/NEReshapeLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEReshapeLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -34,7 +34,7 @@ namespace arm_compute
// Forward declarations
class ITensor;
-/** Basic function to run @ref NEReshapeLayerKernel */
+/** Basic function to run @ref cpu::kernels::CpuReshapeKernel */
class NEReshapeLayer : public IFunction
{
public:
@@ -52,6 +52,14 @@ public:
NEReshapeLayer &operator=(NEReshapeLayer &&);
/** Initialise the kernel's inputs and outputs
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:------|:------|
+ * |All |All |
+ *
* @param[in] input Input tensor. Data type supported: All
* @param[out] output Output tensor. Data type supported: Same as @p input
*/
@@ -73,41 +81,5 @@ private:
struct Impl;
std::unique_ptr<Impl> _impl;
};
-
-namespace experimental
-{
-/** Basic function to run @ref NEReshapeLayerKernel */
-class NEReshape : public INEOperator
-{
-public:
- /** Default Constructor */
- NEReshape() = default;
- /** Default Destructor */
- ~NEReshape();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEReshape(const NEReshape &) = delete;
- /** Default move constructor */
- NEReshape(NEReshapeLayer &&);
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEReshape &operator=(const NEReshape &) = delete;
- /** Default move assignment operator */
- NEReshape &operator=(NEReshape &&);
- /** Initialise the kernel's inputs and outputs
- *
- * @param[in] input Input tensor info. Data type supported: All
- * @param[out] output Output info. Data type supported: Same as @p input
- */
- void configure(const ITensorInfo *input, ITensorInfo *output);
-
- /** Static function to check if given info will lead to a valid configuration of @ref NEReshapeLayer
- *
- * @param[in] input Input tensor info. Data type supported: All
- * @param[in] output Output tensor info. Data type supported: Same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-};
-} // namespace experimental
} // namespace arm_compute
#endif /*ARM_COMPUTE_NERESHAPELAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEReverse.h b/arm_compute/runtime/NEON/functions/NEReverse.h
index 2048dafcb5..e03e415068 100644
--- a/arm_compute/runtime/NEON/functions/NEReverse.h
+++ b/arm_compute/runtime/NEON/functions/NEReverse.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,12 +21,11 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_NEREVERSE_H
-#define ARM_COMPUTE_NEREVERSE_H
-
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEREVERSE_H
+#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEREVERSE_H
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
namespace arm_compute
{
@@ -39,20 +38,39 @@ class NEReverse : public INESimpleFunctionNoBorder
public:
/** Initialize the function
*
- * @param[in] input Input tensor. Data types supported: All
- * @param[out] output Output tensor. Data type supported: Same as @p input
- * @param[in] axis Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:--------------|:--------------|
+ * |All |U32, S32 |All |
+ *
+ * @param[in] input Input tensor. Data types supported: All
+ * @param[out] output Output tensor. Data type supported: Same as @p input
+ * @param[in] axis Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32/S32
+ * @param[in] use_inverted_axis Reverse ACL axis indices convention, if true, (inverted)axis = (tensor_rank - 1) - axis
+ *
+ * @note The value of each axis should be between [-rank, rank)
+ * @note If there are duplicate values in the tensor, the subsequent axis values are ignored. e.g. an array of [2, 2] has the same effects as [2].
+ *
+ * @deprecated Support for U32 in axis tensor will be removed in 24.02 release
+ *
*/
- void configure(const ITensor *input, ITensor *output, const ITensor *axis);
+ void configure(const ITensor *input, ITensor *output, const ITensor *axis, const bool use_inverted_axis = false);
/** Static function to check if given info will lead to a valid configuration of @ref NEReverseKernel
*
- * @param[in] input Input tensor info. Data types supported: All
- * @param[in] output Output tensor info. Data type supported: Same as @p input
- * @param[in] axis Axis tensor info. Contains the indices of the dimensions to reverse. Data type supported: U32
+ * @param[in] input Input tensor info. Data types supported: All
+ * @param[in] output Output tensor info. Data type supported: Same as @p input
+ * @param[in] axis Axis tensor info. Contains the indices of the dimensions to reverse. Data type supported: U32/S32
+ * @param[in] use_inverted_axis Reverse ACL axis indices convention, if true, (inverted)axis = (tensor_rank - 1) - axis
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *axis);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *output,
+ const ITensorInfo *axis,
+ const bool use_inverted_axis = false);
};
} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEREVERSE_H */
+#endif // ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEREVERSE_H
diff --git a/arm_compute/runtime/NEON/functions/NEScale.h b/arm_compute/runtime/NEON/functions/NEScale.h
index fceda83510..72dfa3bda4 100644
--- a/arm_compute/runtime/NEON/functions/NEScale.h
+++ b/arm_compute/runtime/NEON/functions/NEScale.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,32 +26,58 @@
#include "arm_compute/core/KernelDescriptors.h"
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
-/** Basic function to run @ref NEScaleKernel */
-class NEScale : public INESimpleFunctionNoBorder
+/** Basic function to compute Scale */
+class NEScale : public IFunction
{
public:
- /** Constructor
- *
- * Initialize NEScale
- */
+ /** Constructor */
NEScale();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEScale(const NEScale &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEScale(NEScale &&) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEScale &operator=(const NEScale &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEScale &operator=(NEScale &&) = delete;
+ /** Destructor */
+ ~NEScale();
/** Initialize the function's source, destination, interpolation type and border_mode.
*
- * @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/U8/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED)
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |F16 |F16 |
+ * |F32 |F32 |
+ * |U8 |U8 |
+ * |S8 |S8 |
+ * |S16 |S16 |
+ *
+ * @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/U8/S8/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED)
* @param[out] output Destination tensor. Data type supported: Same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
* @param[in] info @ref ScaleKernelInfo to be used for configuration
+ *
+ * @note Using S8 data type only supports NHWC, @p border_mode Replicate, and @p policy Bilinear
*/
void configure(ITensor *input, ITensor *output, const ScaleKernelInfo &info);
/** Static function to check if given info will lead to a valid configuration of @ref NEScale
*
- * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/U8/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED)
+ * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/U8/S8/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED)
* @param[in] output Destination tensor. Data type supported: Same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
* @param[in] info @ref ScaleKernelInfo to be used for validation
*
@@ -59,10 +85,12 @@ public:
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ScaleKernelInfo &info);
+ // Inherited methods overridden:
+ void run() override;
+
private:
- Tensor _offsets; /**< Offset to access the element with NEAREST interpolation or the top-left element with BILINEAR interpolation in the input tensor */
- Tensor _dx; /**< Element's distance between the X real coordinate and the smallest X following integer */
- Tensor _dy; /**< Element's distance between the Y real coordinate and the smallest Y following integer */
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_NESCALEIMAGE_H */
diff --git a/arm_compute/runtime/NEON/functions/NEScharr3x3.h b/arm_compute/runtime/NEON/functions/NEScharr3x3.h
deleted file mode 100644
index 8dd8a80287..0000000000
--- a/arm_compute/runtime/NEON/functions/NEScharr3x3.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NESCHARR3x3_H
-#define ARM_COMPUTE_NESCHARR3x3_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to execute scharr 3x3 filter. This function calls the following NEON kernels:
- *
- * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref NEScharr3x3Kernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class NEScharr3x3 : public INESimpleFunction
-{
-public:
- /** Initialise the function's source, destinations and border mode.
- *
- * @note At least one of output_x or output_y must be not NULL.
- *
- * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output_x (optional) Destination for the Scharr 3x3 convolution along the X axis. Data type supported: S16.
- * @param[out] output_y (optional) Destination for the Scharr 3x3 convolution along the Y axis. Data type supported: S16.
- * @param[in] border_mode Border mode to use for the convolution.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- *
- */
- void configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0);
-};
-}
-#endif /*ARM_COMPUTE_NESCHARR3x3_H */
diff --git a/arm_compute/runtime/NEON/functions/NESelect.h b/arm_compute/runtime/NEON/functions/NESelect.h
index c66fbfa7d4..c8e5a204dd 100644
--- a/arm_compute/runtime/NEON/functions/NESelect.h
+++ b/arm_compute/runtime/NEON/functions/NESelect.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -39,6 +39,14 @@ class NESelect : public INESimpleFunctionNoBorder
public:
/** Initialise the kernel's inputs and output.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:--------------|:--------------|:------|:--------------|
+ * |U8 |All |All |All |
+ *
* @param[in] c Condition input tensor. Data types supported: U8.
* @param[in] x First input tensor. Data types supported: All.
* @param[in] y Second input tensor. Data types supported: Same as @p x
diff --git a/arm_compute/runtime/NEON/functions/NESlice.h b/arm_compute/runtime/NEON/functions/NESlice.h
index 28628778cb..70a688d3b0 100644
--- a/arm_compute/runtime/NEON/functions/NESlice.h
+++ b/arm_compute/runtime/NEON/functions/NESlice.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -32,25 +32,44 @@ namespace arm_compute
// Forward Declarations
class ITensor;
-namespace experimental
-{
/** Basic function to perform tensor slicing */
-class NESlice : public INEOperator
+class NESlice : public IFunction
{
public:
+ /** Default Constructor */
+ NESlice();
+ /** Default Destructor */
+ ~NESlice();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NESlice(const NESlice &) = delete;
+ /** Default move constructor */
+ NESlice(NESlice &&);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NESlice &operator=(const NESlice &) = delete;
+ /** Default move assignment operator */
+ NESlice &operator=(NESlice &&);
+
/** Configure kernel
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:------|:------|
+ * |All |All |
+ *
* @note Supported tensor rank: up to 4
* @note Start indices must be non-negative. 0 <= starts[i]
* @note End coordinates can be negative, which represents the number of elements before the end of that dimension.
* @note End indices are not inclusive unless negative.
*
- * @param[in] input Source tensor info. Data type supported: All
- * @param[out] output Destination tensor info. Data type supported: Same as @p input
+ * @param[in] input Source tensor. Data type supported: All
+ * @param[out] output Destination tensor. Data type supported: Same as @p input
* @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
* @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
*/
- void configure(const ITensorInfo *input, ITensorInfo *output, const Coordinates &starts, const Coordinates &ends);
+ void configure(const ITensor *input, ITensor *output, const Coordinates &starts, const Coordinates &ends);
/** Static function to check if given info will lead to a valid configuration of @ref NESlice
*
@@ -66,27 +85,23 @@ public:
*
* @return A status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Coordinates &starts, const Coordinates &ends);
+ static Status
+ validate(const ITensorInfo *input, const ITensorInfo *output, const Coordinates &starts, const Coordinates &ends);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
-} // namespace experimental
+namespace experimental
+{
/** Basic function to perform tensor slicing */
-class NESlice : public IFunction
+class NESlice : public INEOperator
{
public:
- /** Default Constructor */
- NESlice();
- /** Default Destructor */
- ~NESlice();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NESlice(const NESlice &) = delete;
- /** Default move constructor */
- NESlice(NESlice &&);
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NESlice &operator=(const NESlice &) = delete;
- /** Default move assignment operator */
- NESlice &operator=(NESlice &&);
-
/** Configure kernel
*
* @note Supported tensor rank: up to 4
@@ -94,12 +109,12 @@ public:
* @note End coordinates can be negative, which represents the number of elements before the end of that dimension.
* @note End indices are not inclusive unless negative.
*
- * @param[in] input Source tensor. Data type supported: All
- * @param[out] output Destination tensor. Data type supported: Same as @p input
+ * @param[in] input Source tensor info. Data type supported: All
+ * @param[out] output Destination tensor info. Data type supported: Same as @p input
* @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
* @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
*/
- void configure(const ITensor *input, ITensor *output, const Coordinates &starts, const Coordinates &ends);
+ void configure(const ITensorInfo *input, ITensorInfo *output, const Coordinates &starts, const Coordinates &ends);
/** Static function to check if given info will lead to a valid configuration of @ref NESlice
*
@@ -115,14 +130,9 @@ public:
*
* @return A status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Coordinates &starts, const Coordinates &ends);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- struct Impl;
- std::unique_ptr<Impl> _impl;
+ static Status
+ validate(const ITensorInfo *input, const ITensorInfo *output, const Coordinates &starts, const Coordinates &ends);
};
+} // namespace experimental
} // namespace arm_compute
#endif /* ARM_COMPUTE_NE_SLICE_H */
diff --git a/arm_compute/runtime/NEON/functions/NESobel3x3.h b/arm_compute/runtime/NEON/functions/NESobel3x3.h
deleted file mode 100644
index 89a2e07570..0000000000
--- a/arm_compute/runtime/NEON/functions/NESobel3x3.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NESOBEL3x3_H
-#define ARM_COMPUTE_NESOBEL3x3_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to execute sobel 3x3 filter. This function calls the following NEON kernels:
- *
- * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref NESobel3x3Kernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class NESobel3x3 : public INESimpleFunction
-{
-public:
- /** Initialise the function's source, destinations and border mode.
- *
- * @note At least one of output_x or output_y must be not NULL.
- *
- * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output_x (optional) Destination for the Sobel 3x3 convolution along the X axis. Data type supported: S16.
- * @param[out] output_y (optional) Destination for the Sobel 3x3 convolution along the Y axis. Data type supported: S16.
- * @param[in] border_mode Border mode to use for the convolution.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- *
- */
- void configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0);
-};
-}
-#endif /*ARM_COMPUTE_NESOBEL3x3_H */
diff --git a/arm_compute/runtime/NEON/functions/NESobel5x5.h b/arm_compute/runtime/NEON/functions/NESobel5x5.h
deleted file mode 100644
index 79e653b395..0000000000
--- a/arm_compute/runtime/NEON/functions/NESobel5x5.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NESOBEL5x5_H
-#define ARM_COMPUTE_NESOBEL5x5_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/Tensor.h"
-
-#include <cstdint>
-#include <memory>
-
-namespace arm_compute
-{
-class ITensor;
-class NESobel5x5HorKernel;
-class NESobel5x5VertKernel;
-class NEFillBorderKernel;
-
-/** Basic function to execute sobel 5x5 filter. This function calls the following NEON kernels:
- *
- * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref NESobel5x5HorKernel
- * -# @ref NESobel5x5VertKernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class NESobel5x5 : public IFunction
-{
-public:
- /** Default constructor */
- NESobel5x5(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NESobel5x5(const NESobel5x5 &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NESobel5x5 &operator=(const NESobel5x5 &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NESobel5x5(NESobel5x5 &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NESobel5x5 &operator=(NESobel5x5 &&) = delete;
- /** Default destructor */
- ~NESobel5x5();
- /** Initialise the function's source, destinations and border mode.
- *
- * @note At least one of output_x or output_y must be not NULL.
- *
- * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output_x (optional) Destination for the Sobel 5x5 convolution along the X axis. Data type supported: S16.
- * @param[out] output_y (optional) Destination for the Sobel 5x5 convolution along the Y axis. Data type supported: S16.
- * @param[in] border_mode Border mode to use for the convolution.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- *
- */
- void configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0);
-
- // Inherited methods overridden:
- void run() override;
-
-protected:
- MemoryGroup _memory_group; /**< Function memory group */
- std::unique_ptr<NESobel5x5HorKernel> _sobel_hor; /**< Sobel Horizontal 5x5 kernel */
- std::unique_ptr<NESobel5x5VertKernel> _sobel_vert; /**< Sobel Vertical 5x5 kernel */
- Tensor _tmp_x; /**< Temporary buffer for Sobel X */
- Tensor _tmp_y; /**< Temporary buffer for Sobel Y */
- std::unique_ptr<NEFillBorderKernel> _border_handler; /**< Kernel to handle tensor borders */
-};
-}
-#endif /*ARM_COMPUTE_NESOBEL5x5_H */
diff --git a/arm_compute/runtime/NEON/functions/NESobel7x7.h b/arm_compute/runtime/NEON/functions/NESobel7x7.h
deleted file mode 100644
index 7395bb0198..0000000000
--- a/arm_compute/runtime/NEON/functions/NESobel7x7.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NESOBEL7x7_H
-#define ARM_COMPUTE_NESOBEL7x7_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/Tensor.h"
-
-#include <cstdint>
-#include <memory>
-
-namespace arm_compute
-{
-class ITensor;
-class NESobel7x7HorKernel;
-class NESobel7x7VertKernel;
-class NEFillBorderKernel;
-
-/** Basic function to execute sobel 7x7 filter. This function calls the following NEON kernels:
- *
- * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref NESobel7x7HorKernel
- * -# @ref NESobel7x7VertKernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class NESobel7x7 : public IFunction
-{
-public:
- /** Default constructor */
- NESobel7x7(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NESobel7x7(const NESobel7x7 &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NESobel7x7 &operator=(const NESobel7x7 &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NESobel7x7(NESobel7x7 &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NESobel7x7 &operator=(NESobel7x7 &&) = delete;
- /** Default destructor */
- ~NESobel7x7();
- /** Initialise the function's source, destinations and border mode.
- *
- * @note At least one of output_x or output_y must be not NULL.
- *
- * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output_x (optional) Destination for the Sobel 7x7 convolution along the X axis. Data type supported: S32.
- * @param[out] output_y (optional) Destination for the Sobel 7x7 convolution along the Y axis. Data type supported: S32.
- * @param[in] border_mode Border mode to use for the convolution.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- *
- */
- void configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0);
-
- // Inherited methods overridden:
- void run() override;
-
-protected:
- MemoryGroup _memory_group; /**< Function memory group */
- std::unique_ptr<NESobel7x7HorKernel> _sobel_hor; /**< Sobel Horizontal 7x7 kernel */
- std::unique_ptr<NESobel7x7VertKernel> _sobel_vert; /**< Sobel Vertical 7x7 kernel */
- Tensor _tmp_x; /**< Temporary buffer for Sobel X */
- Tensor _tmp_y; /**< Temporary buffer for Sobel Y */
- std::unique_ptr<NEFillBorderKernel> _border_handler; /**< Kernel to handle tensor borders */
-};
-}
-#endif /*ARM_COMPUTE_NESOBEL7x7_H */
diff --git a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h
index 40fa38afde..1787de6237 100644
--- a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h
+++ b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,35 +24,18 @@
#ifndef ARM_COMPUTE_NESOFTMAXLAYER_H
#define ARM_COMPUTE_NESOFTMAXLAYER_H
+#include "arm_compute/core/Error.h"
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/functions/NEPermute.h"
-#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/IMemoryManager.h"
+
#include <memory>
namespace arm_compute
{
class ITensor;
-class NELogits1DMaxKernel;
-template <bool IS_LOG>
-class NELogits1DSoftmaxKernel;
-class NEFillBorderKernel;
+class ITensorInfo;
-/** Basic function to compute a SoftmaxLayer and a Log SoftmaxLayer.
- *
- * Softmax is calculated by :
- * @f[ out = exp((x - max(x)) * beta) / sum(exp((x - max(x)) * beta)) @f]
- *
- * Log Softmax is calculated by :
- * @f[ out = (x - max(x) * beta) - log(\sum{e^{x - max(x) * beta}}) @f]
- *
- * This function runs the following function/kernels:
- * -# If axis is not 0:
- * -# @ref NEPermute
- * -# @ref NEFillBorderKernel
- * -# @ref NELogits1DMaxKernel
- * -# @ref NELogits1DSoftmaxKernel
- */
+/** Basic function to compute a SoftmaxLayer and a Log SoftmaxLayer. */
template <bool IS_LOG = false>
class NESoftmaxLayerGeneric : public IFunction
{
@@ -62,17 +45,28 @@ public:
/** Prevent instances of this class from being copied (As this class contains pointers) */
NESoftmaxLayerGeneric(const NESoftmaxLayerGeneric &) = delete;
/** Default move constructor */
- NESoftmaxLayerGeneric(NESoftmaxLayerGeneric &&) = default;
+ NESoftmaxLayerGeneric(NESoftmaxLayerGeneric &&);
/** Prevent instances of this class from being copied (As this class contains pointers) */
NESoftmaxLayerGeneric &operator=(const NESoftmaxLayerGeneric &) = delete;
/** Default move assignment operator */
- NESoftmaxLayerGeneric &operator=(NESoftmaxLayerGeneric &&) = default;
+ NESoftmaxLayerGeneric &operator=(NESoftmaxLayerGeneric &&);
/** Default destructor */
~NESoftmaxLayerGeneric();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |F16 |F16 |
+ * |F32 |F32 |
+ *
* @param[in,out] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. If the width is not a
- * multiple of the internal processing block size, @ref NEFillBorderKernel replicates the
+ * multiple of the internal processing block size, @ref NEFillBorder replicates the
* last value of each row to the nearest multiple.
* @param[out] output Destination tensor. Data types supported: same as @p input.
* @param[in] beta (Optional) A scaling factor for the exponent.
@@ -96,17 +90,8 @@ public:
void run() override;
private:
- MemoryGroup _memory_group;
- NEPermute _permute_input;
- NEPermute _permute_output;
- std::unique_ptr<NELogits1DMaxKernel> _max_kernel;
- std::unique_ptr<NELogits1DSoftmaxKernel<IS_LOG>> _softmax_kernel;
- std::unique_ptr<NEFillBorderKernel> _fill_border_kernel;
- Tensor _max;
- Tensor _tmp;
- Tensor _input_permuted;
- Tensor _output_permuted;
- bool _needs_permute;
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
using NESoftmaxLayer = NESoftmaxLayerGeneric<false>;
diff --git a/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h b/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h
index 62af092c40..5dee61a4a8 100644
--- a/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h
+++ b/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,9 +24,9 @@
#ifndef ARM_COMPUTE_NESPACETOBATCHLAYER_H
#define ARM_COMPUTE_NESPACETOBATCHLAYER_H
+#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/Types.h"
#include <memory>
namespace arm_compute
@@ -34,11 +34,11 @@ namespace arm_compute
class ITensor;
class ITensorInfo;
class NESpaceToBatchLayerKernel;
-class NEMemsetKernel;
+class NEFill;
-/** Basic function to spatial divide a tensor. This function calls the following NEON kernels/functions:
+/** Basic function to spatial divide a tensor. This function calls the following kernels/functions:
*
- * -# @ref NEMemsetKernel
+ * -# @ref NEFill
* -# @ref NESpaceToBatchLayerKernel
*/
class NESpaceToBatchLayer : public IFunction
@@ -58,6 +58,15 @@ public:
~NESpaceToBatchLayer();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:---------|:---------|:---------|:---------|
+ * |All |S32 |S32 |All |
+ *
* @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
* @param[in] block_shape 1-D tensor with shape [M]. Supported M: 2. Data types supported: S32
* @param[in] paddings 2-D tensor with shape [2, M] (First dimension is the fastest-changing dimension). Supported M: 2. Data types supported: S32
@@ -73,7 +82,12 @@ public:
* @param[in] padding_right The padding at the end of every dimension of the output tensor.
* @param[out] output Tensor output. Data types supported: same as @p input
*/
- void configure(const ITensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, ITensor *output);
+ void configure(const ITensor *input,
+ const int block_shape_x,
+ const int block_shape_y,
+ const Size2D &padding_left,
+ const Size2D &padding_right,
+ ITensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref NESpaceToBatchLayer
*
* @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All.
@@ -83,7 +97,10 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *paddings, const ITensorInfo *output);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *block_shape,
+ const ITensorInfo *paddings,
+ const ITensorInfo *output);
/** Static function to check if given info will lead to a valid configuration of @ref NESpaceToBatchLayer (Static block shape and paddings)
*
* @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All.
@@ -95,14 +112,19 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, const ITensorInfo *output);
+ static Status validate(const ITensorInfo *input,
+ const int block_shape_x,
+ const int block_shape_y,
+ const Size2D &padding_left,
+ const Size2D &padding_right,
+ const ITensorInfo *output);
// Inherited methods overridden:
void run() override;
private:
std::unique_ptr<NESpaceToBatchLayerKernel> _space_to_batch_kernel; /**< SpaceToBatch kernel to run */
- std::unique_ptr<NEMemsetKernel> _memset_kernel; /**< Memset kernel to run */
+ std::unique_ptr<NEFill> _fill_f; /**< Fill function to run */
bool _has_padding; /**< Flag to check if the output has padding */
};
} // namespace arm_compute
diff --git a/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h b/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h
index 1e7aae215d..1820cb8f6b 100644
--- a/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h
+++ b/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -35,10 +35,7 @@ class ITensor;
class ITensorInfo;
class NESpaceToDepthLayerKernel;
-/** This function calls the following NEON kernels/functions:
- *
- * -# @ref NESpaceToDepthLayerKernel
- */
+/** Basic function to run @ref NESpaceToDepthLayerKernel. */
class NESpaceToDepthLayer : public IFunction
{
public:
@@ -56,6 +53,15 @@ public:
~NESpaceToDepthLayer();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |All |All |
+ *
* @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
* @param[out] output Tensor output. Data types supported: same as @p input
* @param[in] block_shape Block shape value
diff --git a/arm_compute/runtime/NEON/functions/NESplit.h b/arm_compute/runtime/NEON/functions/NESplit.h
index ede5ecf65a..36358a7094 100644
--- a/arm_compute/runtime/NEON/functions/NESplit.h
+++ b/arm_compute/runtime/NEON/functions/NESplit.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,7 +26,6 @@
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/Types.h"
-
#include "arm_compute/runtime/CPP/functions/CPPSplit.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/NEON/functions/NESlice.h"
@@ -40,6 +39,18 @@ namespace arm_compute
class NESplit : public CPPSplit<NESlice>
{
public:
+ /** NESplit
+ *
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |All |All |
+ *
+ */
+
// Inherited methods overridden:
void run() override;
};
diff --git a/arm_compute/runtime/NEON/functions/NEStackLayer.h b/arm_compute/runtime/NEON/functions/NEStackLayer.h
index f6fa4f2eb3..98dacde0c1 100644
--- a/arm_compute/runtime/NEON/functions/NEStackLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEStackLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_NESTACKLAYER_H
-#define ARM_COMPUTE_NESTACKLAYER_H
+#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NESTACKLAYER_H
+#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NESTACKLAYER_H
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
@@ -58,6 +58,14 @@ public:
~NEStackLayer();
/** Initialise the kernel's inputs vector and output.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |All |All |
+ *
* @note Supported input tensor rank: up to 4
*
* @param[in] input The vectors containing all the tensors with the same shape to stack. Data types supported: All
@@ -83,9 +91,8 @@ public:
void run() override;
private:
- std::vector<ITensor *> _input;
- std::vector<std::unique_ptr<NEStackLayerKernel>> _stack_kernels;
- unsigned int _num_inputs;
+ std::unique_ptr<NEStackLayerKernel> _stack_kernel;
+ bool _is_prepared;
};
} // namespace arm_compute
-#endif /* ARM_COMPUTE_NESTACKLAYER_H */
+#endif // ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NESTACKLAYER_H
diff --git a/arm_compute/runtime/NEON/functions/NEStridedSlice.h b/arm_compute/runtime/NEON/functions/NEStridedSlice.h
index f9c94f5301..fa1113ffec 100644
--- a/arm_compute/runtime/NEON/functions/NEStridedSlice.h
+++ b/arm_compute/runtime/NEON/functions/NEStridedSlice.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -32,18 +32,37 @@ namespace arm_compute
// Forward Declarations
class ITensor;
-namespace experimental
-{
/** Basic function to run @ref NEStridedSliceKernel */
-class NEStridedSlice : public INEOperator
+class NEStridedSlice : public IFunction
{
public:
+ /** Default Constructor */
+ NEStridedSlice();
+ /** Default Destructor */
+ ~NEStridedSlice();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEStridedSlice(const NEStridedSlice &) = delete;
+ /** Default move constructor */
+ NEStridedSlice(NEStridedSlice &&);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEStridedSlice &operator=(const NEStridedSlice &) = delete;
+ /** Default move assignment operator */
+ NEStridedSlice &operator=(NEStridedSlice &&);
+
/** Configure kernel
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:------|:------|
+ * |All |All |
+ *
* @note Supported tensor rank: up to 4
*
- * @param[in] input Source tensor info. Data type supported: All
- * @param[out] output Destination tensor info. Data type supported: Same as @p input
+ * @param[in] input Source tensor. Data type supported: All
+ * @param[out] output Destination tensor. Data type supported: Same as @p input
* @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
* @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
* @param[in] strides The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input).
@@ -52,9 +71,14 @@ public:
* @param[in] shrink_axis_mask (Optional) If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1.
* A slice of size 1 starting from starts[i] in the dimension must be preserved.
*/
- void configure(const ITensorInfo *input, ITensorInfo *output,
- const Coordinates &starts, const Coordinates &ends, const BiStrides &strides,
- int32_t begin_mask = 0, int32_t end_mask = 0, int32_t shrink_axis_mask = 0);
+ void configure(const ITensor *input,
+ ITensor *output,
+ const Coordinates &starts,
+ const Coordinates &ends,
+ const BiStrides &strides,
+ int32_t begin_mask = 0,
+ int32_t end_mask = 0,
+ int32_t shrink_axis_mask = 0);
/** Static function to check if given info will lead to a valid configuration of @ref NEStridedSlice
*
@@ -70,35 +94,35 @@ public:
* @param[in] shrink_axis_mask (Optional) If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1.
* A slice of size 1 starting from starts[i] in the dimension must be preserved.
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output,
- const Coordinates &starts, const Coordinates &ends, const BiStrides &strides,
- int32_t begin_mask = 0, int32_t end_mask = 0, int32_t shrink_axis_mask = 0);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *output,
+ const Coordinates &starts,
+ const Coordinates &ends,
+ const BiStrides &strides,
+ int32_t begin_mask = 0,
+ int32_t end_mask = 0,
+ int32_t shrink_axis_mask = 0);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
-} // namespace experimental
+namespace experimental
+{
/** Basic function to run @ref NEStridedSliceKernel */
-class NEStridedSlice : public IFunction
+class NEStridedSlice : public INEOperator
{
public:
- /** Default Constructor */
- NEStridedSlice();
- /** Default Destructor */
- ~NEStridedSlice();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEStridedSlice(const NEStridedSlice &) = delete;
- /** Default move constructor */
- NEStridedSlice(NEStridedSlice &&);
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEStridedSlice &operator=(const NEStridedSlice &) = delete;
- /** Default move assignment operator */
- NEStridedSlice &operator=(NEStridedSlice &&);
-
/** Configure kernel
*
* @note Supported tensor rank: up to 4
*
- * @param[in] input Source tensor. Data type supported: All
- * @param[out] output Destination tensor. Data type supported: Same as @p input
+ * @param[in] input Source tensor info. Data type supported: All
+ * @param[out] output Destination tensor info. Data type supported: Same as @p input
* @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
* @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
* @param[in] strides The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input).
@@ -107,9 +131,14 @@ public:
* @param[in] shrink_axis_mask (Optional) If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1.
* A slice of size 1 starting from starts[i] in the dimension must be preserved.
*/
- void configure(const ITensor *input, ITensor *output,
- const Coordinates &starts, const Coordinates &ends, const BiStrides &strides,
- int32_t begin_mask = 0, int32_t end_mask = 0, int32_t shrink_axis_mask = 0);
+ void configure(const ITensorInfo *input,
+ ITensorInfo *output,
+ const Coordinates &starts,
+ const Coordinates &ends,
+ const BiStrides &strides,
+ int32_t begin_mask = 0,
+ int32_t end_mask = 0,
+ int32_t shrink_axis_mask = 0);
/** Static function to check if given info will lead to a valid configuration of @ref NEStridedSlice
*
@@ -125,16 +154,15 @@ public:
* @param[in] shrink_axis_mask (Optional) If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1.
* A slice of size 1 starting from starts[i] in the dimension must be preserved.
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output,
- const Coordinates &starts, const Coordinates &ends, const BiStrides &strides,
- int32_t begin_mask = 0, int32_t end_mask = 0, int32_t shrink_axis_mask = 0);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- struct Impl;
- std::unique_ptr<Impl> _impl;
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *output,
+ const Coordinates &starts,
+ const Coordinates &ends,
+ const BiStrides &strides,
+ int32_t begin_mask = 0,
+ int32_t end_mask = 0,
+ int32_t shrink_axis_mask = 0);
};
+} // namespace experimental
} // namespace arm_compute
#endif /* ARM_COMPUTE_NE_STRIDED_SLICE_H */
diff --git a/arm_compute/runtime/NEON/functions/NETableLookup.h b/arm_compute/runtime/NEON/functions/NETableLookup.h
deleted file mode 100644
index 03674cd297..0000000000
--- a/arm_compute/runtime/NEON/functions/NETableLookup.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NETABLELOOKUP_H
-#define ARM_COMPUTE_NETABLELOOKUP_H
-
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
-namespace arm_compute
-{
-class ITensor;
-class ILut;
-
-/** Basic function to run @ref NETableLookupKernel */
-class NETableLookup : public INESimpleFunctionNoBorder
-{
-public:
- /** Initialise the kernel's inputs and output
- *
- * @param[in] input First tensor input. Data types supported: U8/S16
- * @param[in] lut Input lookup table.
- * @param[out] output Output tensor. Data types supported: same as @p input
- */
- void configure(const ITensor *input, const ILut *lut, ITensor *output);
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NETABLELOOKUP_H */
diff --git a/arm_compute/runtime/NEON/functions/NEThreshold.h b/arm_compute/runtime/NEON/functions/NEThreshold.h
deleted file mode 100644
index 9860abf835..0000000000
--- a/arm_compute/runtime/NEON/functions/NEThreshold.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NETHRESHOLD_H
-#define ARM_COMPUTE_NETHRESHOLD_H
-
-#include "arm_compute/core/KernelDescriptors.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-class ITensorInfo;
-
-/** Basic function to run @ref NEThresholdKernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
-*/
-class NEThreshold : public INESimpleFunctionNoBorder
-{
-public:
- /** Initialise the function's source, destination, thresholds and threshold type
- *
- * @param[in] input First tensor input. Data type supported: U8.
- * @param[out] output Output tensor. Data type supported: U8.
- * @param[in] threshold Threshold. If upper threshold is specified, this will be used as the lower threshold
- * @param[in] false_value Value to assign when the condition is false
- * @param[in] true_value value to assign when the condition is true
- * @param[in] type Thresholding type. Can either be BINARY or RANGE.
- * @param[in] upper Upper threshold. Only used with RANGE thresholding
- */
- ARM_COMPUTE_DEPRECATED_REL(20.08)
- void configure(const ITensor *input, ITensor *output, uint8_t threshold, uint8_t false_value = 0, uint8_t true_value = 0,
- ThresholdType type = ThresholdType::BINARY, uint8_t upper = 0);
- /** Initialise the function's source, destination, thresholds and threshold type
- *
- * @param[in] input First tensor input. Data type supported: U8.
- * @param[out] output Output tensor. Data type supported: U8.
- * @param[in] info Threshold descriptor
- */
- void configure(const ITensor *input, ITensor *output, const ThresholdKernelInfo &info);
- /** Static function to check if given info will lead to a valid configuration of @ref NEThreshold
- *
- * @param[in] input First tensor input. Data type supported: U8.
- * @param[in] output Output tensor. Data type supported: U8.
- * @param[in] info Threshold descriptor.
- *
- * @return A status, containing an error code in case of failure
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ThresholdKernelInfo &info);
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NETHRESHOLD_H */
diff --git a/arm_compute/runtime/NEON/functions/NETile.h b/arm_compute/runtime/NEON/functions/NETile.h
index d5ce76c9cf..001a0a4128 100644
--- a/arm_compute/runtime/NEON/functions/NETile.h
+++ b/arm_compute/runtime/NEON/functions/NETile.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,9 +24,8 @@
#ifndef ARM_COMPUTE_NETILE_H
#define ARM_COMPUTE_NETILE_H
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
namespace arm_compute
{
@@ -39,6 +38,14 @@ class NETile : public INESimpleFunctionNoBorder
public:
/** Set the source, destination of the kernel
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |All |All |
+ *
* @param[in] input Source tensor. Data type supported: All.
* @param[out] output Destination tensor. Same as @p input
* @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension.
diff --git a/arm_compute/runtime/NEON/functions/NETranspose.h b/arm_compute/runtime/NEON/functions/NETranspose.h
index 2651bdd727..5d2d1f1b01 100644
--- a/arm_compute/runtime/NEON/functions/NETranspose.h
+++ b/arm_compute/runtime/NEON/functions/NETranspose.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,23 +25,42 @@
#define ARM_COMPUTE_NETRANSPOSE_H
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
namespace arm_compute
{
+// Forward declarations
class ITensor;
class ITensorInfo;
-/** Basic function to transpose a matrix on NEON. This function calls the following NEON kernel:
- *
- * -# @ref NETransposeKernel
- *
- */
-class NETranspose : public INESimpleFunctionNoBorder
+/** Basic function to run @ref cpu::kernels::CpuTransposeKernel */
+class NETranspose : public IFunction
{
public:
+ /** Default Constructor */
+ NETranspose();
+ /** Default Destructor */
+ ~NETranspose();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NETranspose(const NETranspose &) = delete;
+ /** Default move constructor */
+ NETranspose(NETranspose &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NETranspose &operator=(const NETranspose &) = delete;
+ /** Default move assignment operator */
+ NETranspose &operator=(NETranspose &&) = default;
/** Initialise the kernel's inputs and output
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:------|:------|
+ * |All |All |
+ *
* @param[in] input Input tensor. Data types supported: All
* @param[out] output Output tensor. Data type supported: Same as @p input
*/
@@ -54,7 +73,13 @@ public:
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+
+ // Inherited methods overridden
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
-
#endif /* ARM_COMPUTE_NETRANSPOSE_H */
diff --git a/arm_compute/runtime/NEON/functions/NEUnstack.h b/arm_compute/runtime/NEON/functions/NEUnstack.h
index c8e85115f7..e1af96d08d 100644
--- a/arm_compute/runtime/NEON/functions/NEUnstack.h
+++ b/arm_compute/runtime/NEON/functions/NEUnstack.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,7 +26,6 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
-
#include "arm_compute/runtime/NEON/functions/NEStridedSlice.h"
#include <memory>
@@ -57,6 +56,14 @@ public:
~NEUnstack() = default;
/** Set the input, output and unstacking axis.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |All |All |
+ *
* @param[in] input A tensor to be unstacked. Data type supported: All.
* @param[in,out] output_vector A vector of tensors. Data types supported: same as @p input.
* Note: The number of elements of the vector will be used as the number of slices to be taken from the axis.
diff --git a/arm_compute/runtime/NEON/functions/NEWarpAffine.h b/arm_compute/runtime/NEON/functions/NEWarpAffine.h
deleted file mode 100644
index 0aedb87aa2..0000000000
--- a/arm_compute/runtime/NEON/functions/NEWarpAffine.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEWARPAFFINE_H
-#define ARM_COMPUTE_NEWARPAFFINE_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to run @ref NEWarpAffineKernel
- *
- * @deprecated This function is deprecated and will be removed in release 20.02
- *
-*/
-class NEWarpAffine : public INESimpleFunction
-{
-public:
- /** Initialize the function's source, destination, interpolation policy and border_mode.
- *
- * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination tensor. Data type supported: U8
- * @param[in] matrix The perspective matrix. Must be 2x3 of type float.
- * The matrix argument requires 9 values, the last 3 values are ignored.
- * @param[in] policy The interpolation type.
- * @param[in] border_mode Strategy to use for borders.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(ITensor *input, ITensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0);
-};
-}
-#endif /*ARM_COMPUTE_NEWARPAFFINE_H */
diff --git a/arm_compute/runtime/NEON/functions/NEWarpPerspective.h b/arm_compute/runtime/NEON/functions/NEWarpPerspective.h
deleted file mode 100644
index 31a1477dca..0000000000
--- a/arm_compute/runtime/NEON/functions/NEWarpPerspective.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEWARPPERSPECTIVE_H
-#define ARM_COMPUTE_NEWARPPERSPECTIVE_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to run @ref NEWarpPerspectiveKernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
-*/
-class NEWarpPerspective : public INESimpleFunction
-{
-public:
- /** Initialize the function's source, destination, interpolation policy and border_mode.
- *
- * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination tensor. Data type supported: U8
- * @param[in] matrix The perspective matrix. Must be 3x3 of type float.
- * @param[in] policy The interpolation type.
- * @param[in] border_mode Strategy to use for borders.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(ITensor *input, ITensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0);
-};
-}
-#endif /*ARM_COMPUTE_NEWARPPERSPECTIVE_H */
diff --git a/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h
index 6b61e7031b..6caa2aeb59 100644
--- a/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,17 +21,12 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_NEWINOGRADCONVOLUTIONLAYER_H
-#define ARM_COMPUTE_NEWINOGRADCONVOLUTIONLAYER_H
-
-#include "arm_compute/runtime/IFunction.h"
+#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEWINOGRADCONVOLUTIONLAYER_H
+#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEWINOGRADCONVOLUTIONLAYER_H
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CPP/functions/CPPPermute.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMM.h"
-
+#include "arm_compute/function_info/ActivationLayerInfo.h"
+#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/Tensor.h"
#include <memory>
@@ -40,13 +35,12 @@ namespace arm_compute
{
// Forward declarations
class ITensor;
-class ICPPKernel;
-/** Basic function to simulate a convolution layer. This function calls the following NEON kernels:
- * -# @ref NEWinogradLayerTransformWeightsKernel (executed only once in the first call to the run() method )
- * -# @ref NEWinogradLayerTransformInputKernel
- * -# @ref NEWinogradLayerTransformOutputKernel
- * -# @ref NEGEMMAssemblyDispatch
+/** Basic function to simulate a convolution layer. This function calls the following kernels:
+ *
+ * -# @ref cpu::CpuWinogradConv2dTransformInputKernel
+ * -# @ref cpu::CpuWinogradConv2dTransformOutputKernel
+ * -# @ref cpu::CpuGemmAssemblyDispatch
* -# @ref CPPPermute (three times: weights, input and output)
*
* @note Some Winograd configurations (i.e. F(2x2, 5x5), F(4x4, 5x5)) are supported only with enable_fast_math = true
@@ -56,20 +50,35 @@ class NEWinogradConvolutionLayer : public IFunction
public:
/** Constructor */
NEWinogradConvolutionLayer(const std::shared_ptr<IMemoryManager> &memory_manager = nullptr);
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEWinogradConvolutionLayer(NEWinogradConvolutionLayer &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEWinogradConvolutionLayer &operator=(NEWinogradConvolutionLayer &&) = delete;
- /** Default destructor */
- ~NEWinogradConvolutionLayer() = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEWinogradConvolutionLayer(const NEWinogradConvolutionLayer &) = delete;
+ /** Default move constructor */
+ NEWinogradConvolutionLayer(NEWinogradConvolutionLayer &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEWinogradConvolutionLayer &operator=(const NEWinogradConvolutionLayer &) = delete;
+ /** Default move assignment operator */
+ NEWinogradConvolutionLayer &operator=(NEWinogradConvolutionLayer &&) = default;
+ /** Destructor */
+ ~NEWinogradConvolutionLayer();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:--------------|:--------------|:------|:--------------|
+ * |F16 |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |F32 |
+ *
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
* while every optional dimension from 4 and above represent a batch of inputs.
* Data types supported: F16/F32.
* @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: Same as @p input.
- * Currently only 3x3 and 5x5 kernels are supported.
+ * Supported kernel sizes: (height, width) -> 3x3, 1x3, 3x1, 5x5, 1x5, 5x1 for Fp32
+ * -> 3x3 for Fp16
* @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights.
* @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
* Data types supported: Same as @p input.
@@ -78,62 +87,35 @@ public:
* @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
* available which may introduce a drop of accuracy as well. Default is false
*/
- void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info = ActivationLayerInfo(),
- bool enable_fast_math = false);
+ void configure(const ITensor *input,
+ const ITensor *weights,
+ const ITensor *biases,
+ ITensor *output,
+ const PadStrideInfo &conv_info,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ bool enable_fast_math = false);
// Inherited methods overridden:
void run() override;
void prepare() override;
- /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMConvolutionLayer
+ /** Static function to check if given info will lead to a valid configuration of @ref NEWinogradConvolutionLayer
*
- * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: F16/F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
- * Currently only 3x3 and 5x5 kernels are supported.
- * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights.
- * @param[in] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
- * Data types supported: Same as @p input.
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. Currently only unit strides are supported.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
- * available which may introduce a drop of accuracy as well. Default is false
+ * Similar to @ref NEWinogradConvolutionLayer::configure()
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false);
-
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEWinogradConvolutionLayer(const NEWinogradConvolutionLayer &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEWinogradConvolutionLayer &operator=(const NEWinogradConvolutionLayer &) = delete;
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *biases,
+ const ITensorInfo *output,
+ const PadStrideInfo &conv_info,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ bool enable_fast_math = false);
private:
- MemoryGroup _memory_group;
- NEGEMM _gemm_function;
- std::unique_ptr<ICPPKernel> _transform_input_kernel;
- std::unique_ptr<ICPPKernel> _transform_output_kernel;
- std::unique_ptr<ICPPKernel> _transform_weights_kernel;
- NEActivationLayer _activationlayer_function;
-
- CPPPermute _permute_input;
- CPPPermute _permute_weights;
- CPPPermute _permute_output;
- Tensor _input_transformed;
- Tensor _output_transformed;
- Tensor _input_workspace;
- Tensor _output_workspace;
- Tensor _kernel_storage;
- Tensor _input_nhwc;
- Tensor _output_nhwc;
- Tensor _weights_hwio;
- const ITensor *_input;
- const ITensor *_weights;
- ITensor *_output;
- bool _is_prepared;
- bool _is_activationlayer_enabled;
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEWINOGRADCONVOLUTIONLAYER_H */
+#endif // ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEWINOGRADCONVOLUTIONLAYER_H
diff --git a/arm_compute/runtime/NEON/functions/NEYOLOLayer.h b/arm_compute/runtime/NEON/functions/NEYOLOLayer.h
deleted file mode 100644
index 4c9a5bf6e4..0000000000
--- a/arm_compute/runtime/NEON/functions/NEYOLOLayer.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEYOLOLAYER_H
-#define ARM_COMPUTE_NEYOLOLAYER_H
-
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ITensor;
-class ITensorInfo;
-
-/** Basic function to run @ref NEYOLOLayerKernel */
-class NEYOLOLayer : public INESimpleFunctionNoBorder
-{
-public:
- /** Set the input and output tensor.
- *
- * @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place
- *
- * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
- * of the activation function. Data types supported: F16/F32.
- * @param[out] output Destination tensor. Data type supported: same as @p input
- * @param[in] act_info Activation layer parameters.
- * @param[in] num_classes Number of classes to activate (must be submultiple of @p input channels)
- */
- void configure(ITensor *input, ITensor *output, const ActivationLayerInfo &act_info, int32_t num_classes);
- /** Static function to check if given info will lead to a valid configuration of @ref NEYOLOLayer
- *
- * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result
- * of the activation function. Data types supported: F16/F32.
- * @param[in] output Destination tensor info. Data type supported: same as @p input
- * @param[in] act_info Activation layer information.
- * @param[in] num_classes Number of classes to activate (must be submultiple of @p input channels)
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info, int32_t num_classes);
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEYOLOLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h b/arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h
deleted file mode 100644
index 7f63717b02..0000000000
--- a/arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright (c) 2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEDEPTHWISECONVOLUTIONASSEMBLYDISPATCH_H
-#define ARM_COMPUTE_NEDEPTHWISECONVOLUTIONASSEMBLYDISPATCH_H
-
-#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/Tensor.h"
-
-namespace arm_compute
-{
-/** Depthwise convolution assembly kernel glue */
-class NEDepthwiseConvolutionAssemblyDispatch : public IFunction
-{
-public:
- /** Default constructor
- *
- * @param[in,out] memory_manager Memory manager to use
- */
- NEDepthwiseConvolutionAssemblyDispatch(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDepthwiseConvolutionAssemblyDispatch(const NEDepthwiseConvolutionAssemblyDispatch &) = delete;
- /** Default move constructor */
- NEDepthwiseConvolutionAssemblyDispatch(NEDepthwiseConvolutionAssemblyDispatch &&) = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDepthwiseConvolutionAssemblyDispatch &operator=(const NEDepthwiseConvolutionAssemblyDispatch &) = delete;
- /** Default move assignment operator */
- NEDepthwiseConvolutionAssemblyDispatch &operator=(NEDepthwiseConvolutionAssemblyDispatch &&) = default;
- /** Default destructor */
- ~NEDepthwiseConvolutionAssemblyDispatch();
- /** Initialize the function's source, destination, kernels and border_size.
- *
- * @note Supports only NHWC format
- *
- * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
- * @param[in] weights Weights tensor. These are 3D tensors with shape [W, H, IFM]. Data type supported: Same as @p input.
- * @param[in] bias (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input.
- * @param[out] output Destination tensor. Data type supported: same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- */
- void configure(const ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output,
- const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(),
- const Size2D &dilation = Size2D(1, 1));
- /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionAssemblyDispatch
- *
- * @note Supports only NHWC format
- *
- * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
- * @param[in] weights Weights tensor. These are 3D tensors with shape [W, H, IFM]. Data type supported: Same as @p input.
- * @param[in] bias (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input.
- * @param[out] output Destination tensor. Data type supported: same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- *
- * @return An error status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output,
- const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(),
- const Size2D &dilation = Size2D(1, 1));
- /** Check if the optimized kernel can be used for the given kernel sizes and strides
- *
- * @warning Even if this return true the inputs and outputs might need to get permuted as the only layout supported is NHWC
- *
- * @param[in] input Input tensor info.
- * @param[in] weights Weights tensor info.
- * @param[in] conv_info Convolution layer metadata.
- * @param[in] depth_multiplier (Optional) Depth multiplier to be used.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- *
- * @return True if the assembly kernel could be used else false. Note that transformations of input/output could be needed.
- */
- static bool is_optimized_supported(const ITensorInfo *input, const ITensorInfo *weights, PadStrideInfo conv_info, unsigned int depth_multiplier = 1, const Size2D &dilation = Size2D(1, 1));
-
- // Inherited methods overridden:
- void run() override;
- void prepare() override;
-
-private:
- struct LocalImpl;
-
-private:
- MemoryGroup _memory_group;
- const ITensor *_input;
- const ITensor *_weights;
- const ITensor *_bias;
- ITensor *_output;
- Tensor _packed_weights;
- Tensor _workspace;
- bool _is_prepared;
- std::unique_ptr<LocalImpl> _pImpl;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEDEPTHWISECONVOLUTIONASSEMBLYDISPATCH_H */