aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/runtime/NEON/functions
diff options
context:
space:
mode:
Diffstat (limited to 'arm_compute/runtime/NEON/functions')
-rw-r--r--arm_compute/runtime/NEON/functions/NEActivationLayer.h8
-rw-r--r--arm_compute/runtime/NEON/functions/NEAddMulAdd.h115
-rw-r--r--arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h24
-rw-r--r--arm_compute/runtime/NEON/functions/NEArithmeticAddition.h19
-rw-r--r--arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h18
-rw-r--r--arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h22
-rw-r--r--arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h24
-rw-r--r--arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h8
-rw-r--r--arm_compute/runtime/NEON/functions/NECast.h7
-rw-r--r--arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h1
-rw-r--r--arm_compute/runtime/NEON/functions/NEConcatenateLayer.h6
-rw-r--r--arm_compute/runtime/NEON/functions/NEConv3D.h100
-rw-r--r--arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h52
-rw-r--r--arm_compute/runtime/NEON/functions/NEConvolutionLayer.h71
-rw-r--r--arm_compute/runtime/NEON/functions/NECopy.h3
-rw-r--r--arm_compute/runtime/NEON/functions/NECropResize.h18
-rw-r--r--arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h74
-rw-r--r--arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h6
-rw-r--r--arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h24
-rw-r--r--arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h69
-rw-r--r--arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h22
-rw-r--r--arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h16
-rw-r--r--arm_compute/runtime/NEON/functions/NEElementwiseOperations.h56
-rw-r--r--arm_compute/runtime/NEON/functions/NEFFT1D.h3
-rw-r--r--arm_compute/runtime/NEON/functions/NEFFT2D.h3
-rw-r--r--arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h21
-rw-r--r--arm_compute/runtime/NEON/functions/NEFill.h3
-rw-r--r--arm_compute/runtime/NEON/functions/NEFillBorder.h6
-rw-r--r--arm_compute/runtime/NEON/functions/NEFloor.h3
-rw-r--r--arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h117
-rw-r--r--arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h27
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMM.h100
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMConv2d.h14
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h351
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h107
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h242
-rw-r--r--arm_compute/runtime/NEON/functions/NEGather.h13
-rw-r--r--arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h13
-rw-r--r--arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayer.h8
-rw-r--r--arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h2
-rw-r--r--arm_compute/runtime/NEON/functions/NELSTMLayer.h58
-rw-r--r--arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h95
-rw-r--r--arm_compute/runtime/NEON/functions/NEMatMul.h145
-rw-r--r--arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h17
-rw-r--r--arm_compute/runtime/NEON/functions/NENormalizationLayer.h16
-rw-r--r--arm_compute/runtime/NEON/functions/NEPadLayer.h21
-rw-r--r--arm_compute/runtime/NEON/functions/NEPermute.h3
-rw-r--r--arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h28
-rw-r--r--arm_compute/runtime/NEON/functions/NEPooling3dLayer.h96
-rw-r--r--arm_compute/runtime/NEON/functions/NEPoolingLayer.h9
-rw-r--r--arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h5
-rw-r--r--arm_compute/runtime/NEON/functions/NEQLSTMLayer.h362
-rw-r--r--arm_compute/runtime/NEON/functions/NERNNLayer.h15
-rw-r--r--arm_compute/runtime/NEON/functions/NEROIAlignLayer.h5
-rw-r--r--arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h9
-rw-r--r--arm_compute/runtime/NEON/functions/NERange.h1
-rw-r--r--arm_compute/runtime/NEON/functions/NEReduceMean.h15
-rw-r--r--arm_compute/runtime/NEON/functions/NEReductionOperation.h8
-rw-r--r--arm_compute/runtime/NEON/functions/NERemap.h67
-rw-r--r--arm_compute/runtime/NEON/functions/NEReorderLayer.h94
-rw-r--r--arm_compute/runtime/NEON/functions/NEReverse.h40
-rw-r--r--arm_compute/runtime/NEON/functions/NEScale.h12
-rw-r--r--arm_compute/runtime/NEON/functions/NESlice.h6
-rw-r--r--arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h21
-rw-r--r--arm_compute/runtime/NEON/functions/NESplit.h1
-rw-r--r--arm_compute/runtime/NEON/functions/NEStackLayer.h13
-rw-r--r--arm_compute/runtime/NEON/functions/NEStridedSlice.h44
-rw-r--r--arm_compute/runtime/NEON/functions/NETile.h3
-rw-r--r--arm_compute/runtime/NEON/functions/NETranspose.h5
-rw-r--r--arm_compute/runtime/NEON/functions/NEUnstack.h1
-rw-r--r--arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h106
71 files changed, 1778 insertions, 1339 deletions
diff --git a/arm_compute/runtime/NEON/functions/NEActivationLayer.h b/arm_compute/runtime/NEON/functions/NEActivationLayer.h
index b39a8d7701..5584fdc783 100644
--- a/arm_compute/runtime/NEON/functions/NEActivationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEActivationLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,9 +24,9 @@
#ifndef ARM_COMPUTE_NEACTIVATIONLAYER_H
#define ARM_COMPUTE_NEACTIVATIONLAYER_H
-#include "arm_compute/runtime/IFunction.h"
-
#include "arm_compute/core/Types.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
+#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IRuntimeContext.h"
#include <memory>
@@ -101,5 +101,5 @@ private:
struct Impl;
std::unique_ptr<Impl> _impl;
};
-} // namespace arm_computes
+} // namespace arm_compute
#endif /* ARM_COMPUTE_NEACTIVATIONLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEAddMulAdd.h b/arm_compute/runtime/NEON/functions/NEAddMulAdd.h
new file mode 100644
index 0000000000..6c65c055dd
--- /dev/null
+++ b/arm_compute/runtime/NEON/functions/NEAddMulAdd.h
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEADDMULADD
+#define ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEADDMULADD
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IMemoryManager.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+class ITensor;
+class ITensorInfo;
+class ActivationLayerInfo;
+
+/** Function to compute Add+Mul+Add fused operation */
+class NEAddMulAdd : public IFunction
+{
+public:
+ /** Constructor */
+ NEAddMulAdd(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEAddMulAdd(const NEAddMulAdd &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEAddMulAdd(NEAddMulAdd &&) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEAddMulAdd &operator=(const NEAddMulAdd &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEAddMulAdd &operator=(NEAddMulAdd &&) = delete;
+ /** Destructor */
+ ~NEAddMulAdd();
+ /** Initialize the function's inputs and outputs.
+ *
+ * Valid data layouts:
+ * - Any
+ *
+ * Valid data type configurations:
+ * |input1 |input2 |bn_mul |bn_add |add_output |final_output |
+ * |:--------------|:--------------|:--------------|:--------------|:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |QASYMM8 |QASYMM8 |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |F16 |F16 |F16 |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |F32 |F32 |F32 |
+ *
+ * This is what this composite function (tailored for add followed by a batch norm operation) does:
+ * add_output <- input1 + input2 (add)
+ * final_output <- add_output * bn_mul + bn_add (batch norm = mul+add)
+ *
+ * @param[in] input1 First tensor input. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] input2 Second tensor input. Data types supported: Same as @p input.
+ * @param[in] bn_mul The multiplication coefficient on the feature dimension. Data types supported: Same as @p input.
+ * It's one dimensional tensor with size equal to the feature maps [FM]
+ * @param[in] bn_add The addition coefficient on the feature dimension. Data types supported: Same as @p input.
+ * It's one dimensional tensor with size equal to the feature maps [FM]
+ * @param[out] add_output Output of the first add. Data type supported: Same as @p input.
+ * @param[out] final_output Output of the add+mul+add+act composite operation. Data type supported: Same as @p input.
+ * @param[in] policy Policy to handle overflow
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ *
+ */
+ void configure(ITensor *input1,
+ ITensor *input2,
+ ITensor *bn_mul,
+ ITensor *bn_add,
+ ITensor *add_output,
+ ITensor *final_output,
+ ConvertPolicy policy,
+ const ActivationLayerInfo &act_info);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEAddMulAdd
+ *
+ * Similar to @ref NEAddMulAdd::configure() except the arguments are @ref ITensorInfo * instead of @ref ITensor *
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input1,
+ const ITensorInfo *input2,
+ const ITensorInfo *bn_mul,
+ const ITensorInfo *bn_add,
+ const ITensorInfo *add_output,
+ const ITensorInfo *final_output,
+ ConvertPolicy policy,
+ const ActivationLayerInfo &act_info);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEADDMULADD */
diff --git a/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h b/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h
index 4392de7b28..3bb50a0f90 100644
--- a/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,8 +24,6 @@
#ifndef ARM_COMPUTE_NEARGMINMAXLAYER_H
#define ARM_COMPUTE_NEARGMINMAXLAYER_H
-#include "arm_compute/runtime/NEON/functions/NEReductionOperation.h"
-
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/NEON/INESimpleFunction.h"
@@ -33,7 +31,6 @@
namespace arm_compute
{
class ITensor;
-
/** Function to calculate the index of the minimum or maximum values in a
* tensor based on an axis.
*
@@ -68,13 +65,13 @@ public:
* - All
*
* Valid data type configurations:
- * |src |dst |
- * |:--------------|:----------|
- * |QASYMM8 |U32, S32 |
- * |QASYMM8_SIGNED |U32, S32 |
- * |S32 |U32, S32 |
- * |F16 |U32, S32 |
- * |F32 |U32, S32 |
+ * |src |dst |
+ * |:--------------|:-------------|
+ * |QASYMM8 |U32, S32 |
+ * |QASYMM8_SIGNED |U32, S32 |
+ * |S32 |U32, S32, S64 |
+ * |F16 |U32, S32 |
+ * |F32 |U32, S32 |
*
* @param[in] input Input source tensor. Data types supported: QASYMM8_SIGNED/QASYMM8/S32/F16/F32.
* @param[in] axis Axis to find max/min index.
@@ -86,7 +83,7 @@ public:
*
* @param[in] input Input source tensor info. Data types supported: QASYMM8_SIGNED/QASYMM8/S32/F16/F32.
* @param[in] axis Axis to find max/min index.
- * @param[in] output Output source tensor info. Data types supported: U32/S32.
+ * @param[in] output Output source tensor info. Data types supported: U32/S32/S64.
* @param[in] op Operation to perform: min or max
*
* @return a status
@@ -97,7 +94,8 @@ public:
void run() override;
private:
- std::unique_ptr<NEReductionOperation> _reduction_function;
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEARGMINMAXLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h
index b8e46ff36e..73a43dbc44 100644
--- a/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h
+++ b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2021 Arm Limited.
+ * Copyright (c) 2016-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,7 +25,9 @@
#define ARM_COMPUTE_NEARITHMETICADDITION_H
#include "arm_compute/core/Types.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
#include "arm_compute/runtime/IFunction.h"
+
#include <memory>
namespace arm_compute
@@ -62,9 +64,6 @@ public:
* |QSYMM16 |QSYMM16 |QASYMM16 |
* |QSYMM16 |QSYMM16 |S32 |
* |U8 |U8 |U8 |
- * |U8 |U8 |S16 |
- * |U8 |S16 |S16 |
- * |S16 |U8 |S16 |
* |S16 |S16 |S16 |
* |S32 |S32 |S32 |
* |F16 |F16 |F16 |
@@ -76,7 +75,11 @@ public:
* @param[in] policy Policy to use to handle overflow.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
*/
- void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ void configure(const ITensor *input1,
+ const ITensor *input2,
+ ITensor *output,
+ ConvertPolicy policy,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
/** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticAddition
*
* @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32
@@ -87,7 +90,11 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ static Status validate(const ITensorInfo *input1,
+ const ITensorInfo *input2,
+ const ITensorInfo *output,
+ ConvertPolicy policy,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
// Inherited methods overridden:
void run() override;
diff --git a/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h b/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h
index 0c72e946f6..3e4f6356c5 100644
--- a/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h
+++ b/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2021 Arm Limited.
+ * Copyright (c) 2016-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,6 +25,7 @@
#define ARM_COMPUTE_NEARITHMETICSUBTRACTION_H
#include "arm_compute/core/Types.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/NEON/INEOperator.h"
@@ -68,9 +69,6 @@ public:
* |QSYMM16 |QSYMM16 |QASYMM16 |
* |QSYMM16 |QSYMM16 |S32 |
* |U8 |U8 |U8 |
- * |U8 |U8 |S16 |
- * |U8 |S16 |S16 |
- * |S16 |U8 |S16 |
* |S16 |S16 |S16 |
* |S32 |S32 |S32 |
* |F16 |F16 |F16 |
@@ -82,7 +80,11 @@ public:
* @param[in] policy Policy to use to handle overflow. Convert policy cannot be WRAP if datatype is quantized.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
*/
- void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ void configure(const ITensor *input1,
+ const ITensor *input2,
+ ITensor *output,
+ ConvertPolicy policy,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
/** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticSubtraction
*
* @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/F16/F32
@@ -93,7 +95,11 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ static Status validate(const ITensorInfo *input1,
+ const ITensorInfo *input2,
+ const ITensorInfo *output,
+ ConvertPolicy policy,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
// Inherited methods overridden:
void run() override;
diff --git a/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h b/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h
index ec00fbdbf2..99e2dcadbb 100644
--- a/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h
@@ -81,7 +81,13 @@ public:
* @param[in] epsilon (Optional) Small value to avoid division with zero. Default value is 0.001f.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
*/
- void configure(ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta = nullptr, const ITensor *gamma = nullptr, float epsilon = 0.001f,
+ void configure(ITensor *input,
+ ITensor *output,
+ const ITensor *mean,
+ const ITensor *var,
+ const ITensor *beta = nullptr,
+ const ITensor *gamma = nullptr,
+ float epsilon = 0.001f,
ActivationLayerInfo act_info = ActivationLayerInfo());
/** Static function to check if given info will lead to a valid configuration of @ref NEBatchNormalizationLayer
*
@@ -98,10 +104,14 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output,
- const ITensorInfo *mean, const ITensorInfo *var,
- const ITensorInfo *beta = nullptr, const ITensorInfo *gamma = nullptr,
- float epsilon = 0.001f, ActivationLayerInfo act_info = ActivationLayerInfo());
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *output,
+ const ITensorInfo *mean,
+ const ITensorInfo *var,
+ const ITensorInfo *beta = nullptr,
+ const ITensorInfo *gamma = nullptr,
+ float epsilon = 0.001f,
+ ActivationLayerInfo act_info = ActivationLayerInfo());
// Inherited methods overridden:
void run() override;
@@ -109,5 +119,5 @@ public:
private:
std::unique_ptr<NEBatchNormalizationLayerKernel> _norm_kernel; /**< Batch normalization layer kernel */
};
-}
+} // namespace arm_compute
#endif /* ARM_COMPUTE_NEBATCHNORMALIZATIONLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h b/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h
index 810bf81a22..ebed0bea29 100644
--- a/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,9 +24,8 @@
#ifndef ARM_COMPUTE_NEBATCHTOSPACELAYER_H
#define ARM_COMPUTE_NEBATCHTOSPACELAYER_H
-#include "arm_compute/runtime/IFunction.h"
-
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
namespace arm_compute
@@ -64,7 +63,10 @@ public:
* @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
* @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32
* @param[out] output Tensor output. Data types supported: same as @p input
+ *
+ * @deprecated This method for dynamic block shape is not fully mature and will be removed in 23.08 release
*/
+ ARM_COMPUTE_DEPRECATED_REL(23.05)
void configure(const ITensor *input, const ITensor *block_shape, ITensor *output);
/** Set the input and output tensors. (Static block shape).
*
@@ -72,8 +74,13 @@ public:
* @param[in] block_shape_x Block shape x value.
* @param[in] block_shape_y Block shape y value.
* @param[out] output Tensor output. Data types supported: same as @p input
+ * @param[in] crop_info Specifies how the output shape is cropped after batch to space is performed
*/
- void configure(const ITensor *input, int32_t block_shape_x, int32_t block_shape_y, ITensor *output);
+ void configure(const ITensor *input,
+ int32_t block_shape_x,
+ int32_t block_shape_y,
+ ITensor *output,
+ const CropInfo &crop_info = CropInfo{});
/** Static function to check if given info will lead to a valid configuration of @ref CLBatchToSpaceLayer
*
* @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All.
@@ -81,7 +88,9 @@ public:
* @param[out] output Tensor output info. Data types supported: same as @p input
*
* @return a status
+ * @deprecated This method for dynamic block shape is not fully mature and will be removed in 23.08 release
*/
+ ARM_COMPUTE_DEPRECATED_REL(23.05)
static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *output);
/** Static function to check if given info will lead to a valid configuration of @ref CLBatchToSpaceLayer (Static block shape).
*
@@ -89,10 +98,15 @@ public:
* @param[in] block_shape_x Block shape x value.
* @param[in] block_shape_y Block shape y value.
* @param[out] output Tensor output info. Data types supported: same as @p input
+ * @param[in] crop_info Specifies how the output shape is cropped after batch to space is performed
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, int32_t block_shape_x, int32_t block_shape_y, const ITensorInfo *output);
+ static Status validate(const ITensorInfo *input,
+ int32_t block_shape_x,
+ int32_t block_shape_y,
+ const ITensorInfo *output,
+ const CropInfo &crop_info = CropInfo{});
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEBATCHTOSPACELAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h b/arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h
index 2a196a2de5..aa41fc0df2 100644
--- a/arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h
+++ b/arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h
@@ -57,7 +57,8 @@ public:
*
* @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct.
*/
- void configure(const ITensor *boxes, ITensor *pred_boxes, const ITensor *deltas, const BoundingBoxTransformInfo &info);
+ void
+ configure(const ITensor *boxes, ITensor *pred_boxes, const ITensor *deltas, const BoundingBoxTransformInfo &info);
/** Static function to check if given info will lead to a valid configuration of @ref NEBoundingBoxTransform
*
@@ -71,7 +72,10 @@ public:
*
* @return a Status
*/
- static Status validate(const ITensorInfo *boxes, const ITensorInfo *pred_boxes, const ITensorInfo *deltas, const BoundingBoxTransformInfo &info);
+ static Status validate(const ITensorInfo *boxes,
+ const ITensorInfo *pred_boxes,
+ const ITensorInfo *deltas,
+ const BoundingBoxTransformInfo &info);
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEBOUNDINGBOXTRANSFORM_H */
diff --git a/arm_compute/runtime/NEON/functions/NECast.h b/arm_compute/runtime/NEON/functions/NECast.h
index 30499f5ecf..43cae777f6 100644
--- a/arm_compute/runtime/NEON/functions/NECast.h
+++ b/arm_compute/runtime/NEON/functions/NECast.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,9 +24,8 @@
#ifndef ARM_COMPUTE_NECAST_H
#define ARM_COMPUTE_NECAST_H
-#include "arm_compute/runtime/IFunction.h"
-
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
#include <memory>
@@ -85,7 +84,7 @@ public:
*
* @return a status
*/
- static Status validate(ITensorInfo *input, ITensorInfo *output, ConvertPolicy policy);
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy);
// Inherited methods overridden
void run() override;
diff --git a/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h b/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h
index 8888efec4f..bc19e1a4af 100644
--- a/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h
@@ -46,6 +46,7 @@ public:
*
* Valid data layouts:
* - NCHW
+ * - NHWC
*
* Valid data type configurations:
* |src |dst |
diff --git a/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h b/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h
index dd1c709d76..1600f85488 100644
--- a/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h
@@ -24,9 +24,8 @@
#ifndef ARM_COMPUTE_NECONCATENATELAYER_H
#define ARM_COMPUTE_NECONCATENATELAYER_H
-#include "arm_compute/runtime/IFunction.h"
-
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
#include <memory>
@@ -87,7 +86,8 @@ public:
*
* @return a status
*/
- static Status validate(const std::vector<const ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis);
+ static Status
+ validate(const std::vector<const ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis);
// Inherited methods overridden:
void run() override;
diff --git a/arm_compute/runtime/NEON/functions/NEConv3D.h b/arm_compute/runtime/NEON/functions/NEConv3D.h
new file mode 100644
index 0000000000..525f37f3e7
--- /dev/null
+++ b/arm_compute/runtime/NEON/functions/NEConv3D.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NECONV3D_H
+#define ARM_COMPUTE_NECONV3D_H
+
+#include "arm_compute/core/ITensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/FunctionDescriptors.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+// Forward declarations
+class ITensor;
+
+/** Basic function to simulate a 3d convolution. This function calls one of the following functions:
+ * -# @ref cpu::CpuDirectConv3d
+ *
+ */
+class NEConv3D : public IFunction
+{
+public:
+ /** Constructor */
+ NEConv3D();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEConv3D(const NEConv3D &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEConv3D &operator=(const NEConv3D &) = delete;
+ /** Default move constructor */
+ NEConv3D(NEConv3D &&) = default;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEConv3D &operator=(NEConv3D &&) = default;
+ /** Default destructor */
+ ~NEConv3D();
+ /** Set the input and output tensors.
+ *
+ * Valid data layouts:
+ * - NDHWC
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:--------------|:------------------|:------|:--------------|
+ * |F16 |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |F32 |
+ * |QASYMM8 |QASYMM8 |S32 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED |
+ *
+ * @param[in] input Source tensor. 4 lower dimensions represent a single input [IFM, width, height, depth],
+ * while every optional dimension from 5 and above represent a batch of inputs.
+ * @param[in] weights Weights tensor. Weights are 5D tensor with dimensions [OFM, IFM, kernel_x, kernel_y, kernel_z].
+ * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
+ * @param[out] output Destination tensor. 4 lower dimensions represent a single output [OFM, width, height, depth], while the rest represent batch of outputs.
+ * @param[in] conv_info Contains padding, stride, acitvation information described in @ref Conv3dInfo.
+ */
+ void configure(
+ ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const Conv3dInfo &conv_info);
+ /** Static function to check if given info will lead to a valid configuration
+ *
+ * Similar to NEConv3D::configure()
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *biases,
+ const ITensorInfo *output,
+ const Conv3dInfo &conv_info);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NECONV3D_H */
diff --git a/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h b/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h
index 218877d421..dc6b22d717 100644
--- a/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h
+++ b/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h
@@ -24,14 +24,14 @@
#ifndef ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTS_H
#define ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTS_H
+#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/ITransformWeights.h"
-#include "arm_compute/runtime/Tensor.h"
namespace arm_compute
{
// Forward declarations
class ITensor;
+class ITensorInfo;
/** Basic function to run @ref cpu::kernels::CpuConvertFullyConnectedWeightsKernel. */
class NEConvertFullyConnectedWeights : public IFunction
@@ -65,7 +65,8 @@ public:
* @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer).
* @param[in] data_layout The data layout the weights have been trained in.
*/
- void configure(const ITensor *input, ITensor *output, const TensorShape &original_input_shape, DataLayout data_layout);
+ void
+ configure(const ITensor *input, ITensor *output, const TensorShape &original_input_shape, DataLayout data_layout);
/** Static function to check if given info will lead to a valid configuration of @ref NEConvertFullyConnectedWeights
*
* @param[in] input Source weights tensor info to convert. Must be 2 dimensional. Data types supported: All.
@@ -75,7 +76,10 @@ public:
*
* @return A Status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const TensorShape &original_input_shape, DataLayout data_layout);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *output,
+ const TensorShape &original_input_shape,
+ DataLayout data_layout);
// Inherited methods overriden:
void run() override;
@@ -84,45 +88,5 @@ private:
struct Impl;
std::unique_ptr<Impl> _impl;
};
-
-namespace weights_transformations
-{
-/** Basic function to manage @ref NEConvertFullyConnectedWeights. */
-class NEConvertFullyConnectedWeightsManaged : public ITransformWeights
-{
-public:
- void run() override
- {
- _output.allocator()->allocate();
- _func.run();
- _reshape_run = true;
- }
-
- void release() override
- {
- _output.allocator()->free();
- }
-
- ITensor *get_weights() override
- {
- return &_output;
- }
-
- uint32_t uid() override
- {
- return _uid;
- }
-
- void configure(const ITensor *input, const TensorShape &original_input_shape, DataLayout data_layout)
- {
- _func.configure(input, &_output, original_input_shape, data_layout);
- }
-
-private:
- static constexpr uint32_t _uid = 0x4;
- Tensor _output{};
- NEConvertFullyConnectedWeights _func{};
-};
-} // namespace weights_transformations
} // namespace arm_compute
#endif /* ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTS_H */
diff --git a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
index f19aa8008b..2d07980ade 100644
--- a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,13 +21,13 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_NECONVOLUTIONLAYER_H
-#define ARM_COMPUTE_NECONVOLUTIONLAYER_H
-
-#include "arm_compute/runtime/IFunction.h"
+#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NECONVOLUTIONLAYER_H
+#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NECONVOLUTIONLAYER_H
#include "arm_compute/core/ITensorInfo.h"
#include "arm_compute/core/Types.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
+#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/MemoryGroup.h"
#include <memory>
@@ -38,9 +38,9 @@ namespace arm_compute
class ITensor;
/** Basic function to simulate a convolution layer. This function calls one of the following functions:
- * -# @ref NEGEMMConvolutionLayer (executed only in case GEMM is required for the operation)
- * -# @ref NEWinogradConvolutionLayer (executed only in case Winograd is required for the operation)
- * -# @ref NEDirectConvolutionLayer (executed only in case Direct Convolution is required for the operation)
+ * -# @ref cpu::CpuGemmConv2d (executed only in case GEMM is required for the operation)
+ * -# @ref cpu::CpuWinogradConv2d (executed only in case Winograd is required for the operation)
+ * -# @ref cpu::CpuDirectConv2d (executed only in case Direct Convolution is required for the operation)
* -# @ref NEFFTConvolutionLayer (executed only in case FFT is required for the operation)
*
*
@@ -78,12 +78,12 @@ public:
NEConvolutionLayer(const NEConvolutionLayer &) = delete;
/** Prevent instances of this class from being copied (As this class contains pointers) */
NEConvolutionLayer &operator=(const NEConvolutionLayer &) = delete;
+ /** Default move constructor */
+ NEConvolutionLayer(NEConvolutionLayer &&) = default;
/** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEConvolutionLayer(NEConvolutionLayer &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEConvolutionLayer &operator=(NEConvolutionLayer &&) = delete;
+ NEConvolutionLayer &operator=(NEConvolutionLayer &&) = default;
/** Default destructor */
- ~NEConvolutionLayer() = default;
+ ~NEConvolutionLayer();
/** Set the input and output tensors.
*
* Valid data layouts:
@@ -111,15 +111,23 @@ public:
* Data types supported: Same as @p input.
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
* @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights
- * tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input.
+ * tensor has also been transposed with cpu::kernels::CpuGemmTranspose1xWKernel. Data type supported: Same as @p input.
* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
* @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
* available which may introduce a drop of accuracy as well. Default is false
* @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported
*/
- void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(),
- const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false, unsigned int num_groups = 1);
+ void configure(ITensor *input,
+ const ITensor *weights,
+ const ITensor *biases,
+ ITensor *output,
+ const PadStrideInfo &conv_info,
+ const WeightsInfo &weights_info = WeightsInfo(),
+ const Size2D &dilation = Size2D(1U, 1U),
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ bool enable_fast_math = false,
+ unsigned int num_groups = 1);
/** Static function to check if given info will lead to a valid configuration of @ref NEConvolutionLayer
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
@@ -133,7 +141,7 @@ public:
* Data types supported: Same as @p input.
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
* @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights
- * tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input.
+ * tensor has also been transposed with cpu::kernels::CpuGemmTranspose1xWKernel. Data type supported: Same as @p input.
* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
* @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
@@ -142,9 +150,16 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false,
- unsigned int num_groups = 1);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *biases,
+ const ITensorInfo *output,
+ const PadStrideInfo &conv_info,
+ const WeightsInfo &weights_info = WeightsInfo(),
+ const Size2D &dilation = Size2D(1U, 1U),
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ bool enable_fast_math = false,
+ unsigned int num_groups = 1);
/** Static function to check if given info will return the convolution called by @ref NEConvolutionLayer
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
@@ -156,7 +171,7 @@ public:
* Data types supported: Same as @p input.
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
* @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights
- * tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input.
+ * tensor has also been transposed with cpu::kernels::CpuGemmTranspose1xWKernel. Data type supported: Same as @p input.
* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
* @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
@@ -164,15 +179,21 @@ public:
*
* @return the Convolution Method Hint
*/
- static ConvolutionMethod get_convolution_method(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, const PadStrideInfo &conv_info,
- const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false);
+ static ConvolutionMethod get_convolution_method(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *output,
+ const PadStrideInfo &conv_info,
+ const WeightsInfo &weights_info = WeightsInfo(),
+ const Size2D &dilation = Size2D(1U, 1U),
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ bool enable_fast_math = false);
// Inherited methods overridden:
void run() override;
void prepare() override;
private:
- std::shared_ptr<IMemoryManager> _memory_manager;
- std::unique_ptr<IFunction> _function; /**< Function to run */
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
-#endif /* ARM_COMPUTE_NECONVOLUTIONLAYER_H */
+#endif // ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NECONVOLUTIONLAYER_H
diff --git a/arm_compute/runtime/NEON/functions/NECopy.h b/arm_compute/runtime/NEON/functions/NECopy.h
index ee02c259f4..840c03e968 100644
--- a/arm_compute/runtime/NEON/functions/NECopy.h
+++ b/arm_compute/runtime/NEON/functions/NECopy.h
@@ -24,9 +24,8 @@
#ifndef ARM_COMPUTE_NECOPY_H
#define ARM_COMPUTE_NECOPY_H
-#include "arm_compute/runtime/IFunction.h"
-
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
#include <memory>
diff --git a/arm_compute/runtime/NEON/functions/NECropResize.h b/arm_compute/runtime/NEON/functions/NECropResize.h
index 143bbbc6f1..f806762158 100644
--- a/arm_compute/runtime/NEON/functions/NECropResize.h
+++ b/arm_compute/runtime/NEON/functions/NECropResize.h
@@ -75,8 +75,13 @@ public:
* @param[in] method The policy to be used when resizing image. Default is bilinear.
* @param[in] extrapolation_value Value to be used for values outside of the image for cropping and resizing. Default is 0.
*/
- void configure(const ITensor *input, const ITensor *boxes, const ITensor *box_ind, ITensor *output, Coordinates2D crop_size,
- InterpolationPolicy method = InterpolationPolicy::BILINEAR, float extrapolation_value = 0);
+ void configure(const ITensor *input,
+ const ITensor *boxes,
+ const ITensor *box_ind,
+ ITensor *output,
+ Coordinates2D crop_size,
+ InterpolationPolicy method = InterpolationPolicy::BILINEAR,
+ float extrapolation_value = 0);
/** Static function to check if given info will lead to a valid configuration of @ref NESlice
*
@@ -96,8 +101,13 @@ public:
*
* @return A status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *boxes, const ITensorInfo *box_ind, const ITensorInfo *output,
- Coordinates2D crop_size, InterpolationPolicy method, float extrapolation_value);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *boxes,
+ const ITensorInfo *box_ind,
+ const ITensorInfo *output,
+ Coordinates2D crop_size,
+ InterpolationPolicy method,
+ float extrapolation_value);
void run() override;
diff --git a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
index 34ab0707c2..aabe42f928 100644
--- a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,15 +24,14 @@
#ifndef ARM_COMPUTE_NEDECONVOLUTIONLAYER_H
#define ARM_COMPUTE_NEDECONVOLUTIONLAYER_H
-#include "arm_compute/runtime/CPP/functions/CPPUpsample.h"
-#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEReverse.h"
-
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CPP/functions/CPPUpsample.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEReverse.h"
#include "arm_compute/runtime/Tensor.h"
#include <memory>
@@ -76,17 +75,16 @@ class NEDeconvolutionLayer : public IFunction
public:
/** Constructor */
NEDeconvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
-
/** Prevent instances of this class from being copied (As this class contains pointers) */
NEDeconvolutionLayer(const NEDeconvolutionLayer &) = delete;
+ /** Default move constructor */
+ NEDeconvolutionLayer(NEDeconvolutionLayer &&) = default;
/** Prevent instances of this class from being copied (As this class contains pointers) */
NEDeconvolutionLayer &operator=(const NEDeconvolutionLayer &) = delete;
- /** Prevent instances of this class from being moved (As this class contains pointers) */
- NEDeconvolutionLayer(NEDeconvolutionLayer &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains pointers) */
- NEDeconvolutionLayer &operator=(NEDeconvolutionLayer &&) = delete;
+ /** Default move assignment operator */
+ NEDeconvolutionLayer &operator=(NEDeconvolutionLayer &&) = default;
/** Default destructor */
- virtual ~NEDeconvolutionLayer() = default;
+ ~NEDeconvolutionLayer() = default;
/** Set the input, weights, biases and output tensors.
*
@@ -104,25 +102,50 @@ public:
* |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED |
* |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED |
*
- * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED.
- * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED.
- * @param[in] bias Optional, ignored if NULL. The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8/QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input.
- * @param[out] output Output tensor. The output has the same number of dimensions as the @p input.
- * @param[in] info Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo.
+ * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs.
+ * Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED.
+ * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM].
+ * Data type supported: Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] bias Optional, ignored if NULL. The biases have one dimension.
+ * Data type supported: Data types supported: S32 for QASYMM8/QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input.
+ * @param[out] output Output tensor. The output has the same number of dimensions as the @p input.
+ * @param[in] info Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo.
+ * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
+ * available which may introduce a drop of accuracy as well. Default is false
+ * @param[in] weights_info (Optional) Specifies the weight format. Default is unspecified. This parameter can be used to specify the weight format that is optimal for
+ * the GEMM convolution.
*
*/
- void configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, const PadStrideInfo &info);
+ void configure(ITensor *input,
+ const ITensor *weights,
+ const ITensor *bias,
+ ITensor *output,
+ const PadStrideInfo &info,
+ bool enable_fast_math = false,
+ const WeightsInfo &weights_info = WeightsInfo());
/** Static function to check if given info will lead to a valid configuration of @ref NEDeconvolutionLayer
*
- * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED.
- * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED.
- * @param[in] bias (Optional) The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8/QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input.
- * @param[in] output Output tensor info. The output has the same number of dimensions as the @p input.
- * @param[in] info Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo.
+ * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs.
+ * Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED.
+ * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM].
+ * Data type supported: Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] bias (Optional) The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8/QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input.
+ * @param[in] output Output tensor info. The output has the same number of dimensions as the @p input.
+ * @param[in] info Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo.
+ * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
+ * available which may introduce a drop of accuracy as well. Default is false
+ * @param[in] weights_info (Optional) Specifies the weight format. Default is unspecified. This parameter can be used to specify the weight format that is optimal for
+ * the GEMM convolution.
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output, const PadStrideInfo &info);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *bias,
+ const ITensorInfo *output,
+ const PadStrideInfo &info,
+ bool enable_fast_math = false,
+ const WeightsInfo &weights_info = WeightsInfo());
// Inherited methods overridden:
void run() override;
@@ -140,6 +163,7 @@ private:
ITensor *_input;
PadStrideInfo _info;
bool _is_prepared;
+ bool _do_upsampling;
};
-} // arm_compute
+} // namespace arm_compute
#endif /* ARM_COMPUTE_NEDECONVOLUTIONLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h b/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h
index eb0724ae12..7bfdfbd13d 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h
@@ -24,9 +24,8 @@
#ifndef ARM_COMPUTE_NEDEPTHCONVERT_H
#define ARM_COMPUTE_NEDEPTHCONVERT_H
-#include "arm_compute/runtime/IFunction.h"
-
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
#include <memory>
@@ -84,7 +83,8 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift = 0);
+ static Status
+ validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift = 0);
// Inherited methods overridden
void run() override;
diff --git a/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h b/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h
index b9bdcd1f11..d27369670e 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,26 +21,27 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_NEDEPTHTOSPACELAYER_H
-#define ARM_COMPUTE_NEDEPTHTOSPACELAYER_H
+#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEDEPTHTOSPACELAYER_H
+#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEDEPTHTOSPACELAYER_H
+#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+#include <memory>
namespace arm_compute
{
// Forward declarations
class ITensor;
class ITensorInfo;
+class NEDepthToSpaceLayerKernel;
/** Basic function to run @ref NEDepthToSpaceLayerKernel. */
-class NEDepthToSpaceLayer : public INESimpleFunctionNoBorder
+class NEDepthToSpaceLayer : public IFunction
{
public:
/** Constructor */
- NEDepthToSpaceLayer() = default;
+ NEDepthToSpaceLayer();
/** Prevent instances of this class from being copied (As this class contains pointers) */
NEDepthToSpaceLayer(const NEDepthToSpaceLayer &) = delete;
/** Prevent instances of this class from being copied (As this class contains pointers) */
@@ -50,7 +51,7 @@ public:
/** Prevent instances of this class from being moved (As this class contains non movable objects) */
NEDepthToSpaceLayer &operator=(NEDepthToSpaceLayer &&) = delete;
/** Default destructor */
- ~NEDepthToSpaceLayer() = default;
+ ~NEDepthToSpaceLayer();
/** Set the input and output tensors.
*
* Valid data layouts:
@@ -76,6 +77,11 @@ public:
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape);
+
+ void run() override;
+
+private:
+ std::unique_ptr<NEDepthToSpaceLayerKernel> _kernel;
};
} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEDEPTHTOSPACELAYER_H */
+#endif // ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEDEPTHTOSPACELAYER_H
diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
index 2f541758f4..6ad5aa7bfa 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
@@ -28,6 +28,7 @@
#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
#include "arm_compute/runtime/NEON/functions/NEPermute.h"
+
#include <memory>
namespace arm_compute
@@ -80,8 +81,14 @@ public:
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
*/
- void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
+ void configure(ITensor *input,
+ const ITensor *weights,
+ const ITensor *biases,
+ ITensor *output,
+ const PadStrideInfo &conv_info,
+ unsigned int depth_multiplier = 1,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ const Size2D &dilation = Size2D(1U, 1U));
/** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayer
*
@@ -98,8 +105,14 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *biases,
+ const ITensorInfo *output,
+ const PadStrideInfo &conv_info,
+ unsigned int depth_multiplier = 1,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ const Size2D &dilation = Size2D(1U, 1U));
// Inherited methods overriden:
void run() override;
@@ -112,7 +125,7 @@ private:
*
* -# @ref NEFillBorderKernel (if pad_x or pad_y > 0) and no assembly kernel implementation is present
* -# @ref NEDepthwiseConvolutionLayer3x3Kernel if 3x3 and no assembly kernel implementation is present
- * -# @ref NEDepthwiseConvolutionAssemblyDispatch if assembly kernel implementation is present
+ * -# @ref cpu::CpuDepthwiseConvolutionAssemblyDispatch if assembly kernel implementation is present
* -# @ref NEDirectConvolutionLayerOutputStageKernel if re-quantization of output is required
* -# @ref NEActivationLayer if fused activation is required
*
@@ -127,9 +140,11 @@ private:
/** Default move constructor */
NEDepthwiseConvolutionLayerOptimizedInternal(NEDepthwiseConvolutionLayerOptimizedInternal &&) = default;
/** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDepthwiseConvolutionLayerOptimizedInternal &operator=(const NEDepthwiseConvolutionLayerOptimizedInternal &) = delete;
+ NEDepthwiseConvolutionLayerOptimizedInternal &
+ operator=(const NEDepthwiseConvolutionLayerOptimizedInternal &) = delete;
/** Default move assignment operator */
- NEDepthwiseConvolutionLayerOptimizedInternal &operator=(NEDepthwiseConvolutionLayerOptimizedInternal &&) = default;
+ NEDepthwiseConvolutionLayerOptimizedInternal &
+ operator=(NEDepthwiseConvolutionLayerOptimizedInternal &&) = default;
/** Default destructor */
~NEDepthwiseConvolutionLayerOptimizedInternal() = default;
/** Initialize the function's source, destination, kernels and border_size.
@@ -144,8 +159,14 @@ private:
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
*/
- void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
+ void configure(ITensor *input,
+ const ITensor *weights,
+ const ITensor *biases,
+ ITensor *output,
+ const PadStrideInfo &conv_info,
+ unsigned int depth_multiplier = 1,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ const Size2D &dilation = Size2D(1U, 1U));
/** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayer3x3
*
@@ -161,8 +182,14 @@ private:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *biases,
+ const ITensorInfo *output,
+ const PadStrideInfo &conv_info,
+ unsigned int depth_multiplier = 1,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ const Size2D &dilation = Size2D(1U, 1U));
// Inherited methods overriden:
void run() override;
@@ -207,8 +234,14 @@ private:
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
*/
- void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
+ void configure(ITensor *input,
+ const ITensor *weights,
+ const ITensor *biases,
+ ITensor *output,
+ const PadStrideInfo &conv_info,
+ unsigned int depth_multiplier = 1,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ const Size2D &dilation = Size2D(1U, 1U));
/** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayerGeneric
*
@@ -225,8 +258,14 @@ private:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *biases,
+ const ITensorInfo *output,
+ const PadStrideInfo &conv_info,
+ unsigned int depth_multiplier = 1,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ const Size2D &dilation = Size2D(1U, 1U));
// Inherited methods overriden:
void run() override;
diff --git a/arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h b/arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h
index 2affa8d49e..7a94833d10 100644
--- a/arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h
@@ -24,13 +24,12 @@
#ifndef ARM_COMPUTE_NE_DETECTION_POSTPROCESS_H
#define ARM_COMPUTE_NE_DETECTION_POSTPROCESS_H
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
-
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CPP/functions/CPPDetectionPostProcessLayer.h"
#include "arm_compute/runtime/IMemoryManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/NEON/functions/NEDequantizationLayer.h"
+#include "arm_compute/runtime/NEON/INESimpleFunction.h"
#include "arm_compute/runtime/Tensor.h"
#include <map>
@@ -78,8 +77,14 @@ public:
*
* @note Output contains all the detections. Of those, only the ones selected by the valid region are valid.
*/
- void configure(const ITensor *input_box_encoding, const ITensor *input_score, const ITensor *input_anchors,
- ITensor *output_boxes, ITensor *output_classes, ITensor *output_scores, ITensor *num_detection, DetectionPostProcessLayerInfo info = DetectionPostProcessLayerInfo());
+ void configure(const ITensor *input_box_encoding,
+ const ITensor *input_score,
+ const ITensor *input_anchors,
+ ITensor *output_boxes,
+ ITensor *output_classes,
+ ITensor *output_scores,
+ ITensor *num_detection,
+ DetectionPostProcessLayerInfo info = DetectionPostProcessLayerInfo());
/** Static function to check if given info will lead to a valid configuration of @ref NEDetectionPostProcessLayer
*
* @param[in] input_box_encoding The bounding box input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F32.
@@ -93,8 +98,13 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input_box_encoding, const ITensorInfo *input_class_score, const ITensorInfo *input_anchors,
- ITensorInfo *output_boxes, ITensorInfo *output_classes, ITensorInfo *output_scores, ITensorInfo *num_detection,
+ static Status validate(const ITensorInfo *input_box_encoding,
+ const ITensorInfo *input_class_score,
+ const ITensorInfo *input_anchors,
+ ITensorInfo *output_boxes,
+ ITensorInfo *output_classes,
+ ITensorInfo *output_scores,
+ ITensorInfo *num_detection,
DetectionPostProcessLayerInfo info = DetectionPostProcessLayerInfo());
// Inherited methods overridden:
void run() override;
diff --git a/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h
index 70352fdfaa..3ae3b2a15c 100644
--- a/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,6 +25,7 @@
#define ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYER_H
#include "arm_compute/core/Types.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
@@ -84,7 +85,12 @@ public:
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
*/
- void configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ void configure(ITensor *input,
+ const ITensor *weights,
+ const ITensor *bias,
+ ITensor *output,
+ const PadStrideInfo &conv_info,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
/** Static function to check if given info will lead to a valid configuration of @ref NEDirectConvolutionLayer
*
* @note: DirectConvolution only works in the following configurations:
@@ -105,7 +111,11 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output, const PadStrideInfo &conv_info,
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *bias,
+ const ITensorInfo *output,
+ const PadStrideInfo &conv_info,
const ActivationLayerInfo &act_info = ActivationLayerInfo());
// Inherited methods overridden:
diff --git a/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h b/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h
index 95274bdb0c..ebf2277d1f 100644
--- a/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h
+++ b/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,6 +25,7 @@
#define ARM_COMPUTE_NEELEMENTWISEOPERATIONS_H
#include "arm_compute/core/Types.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/NEON/INEOperator.h"
@@ -72,7 +73,10 @@ public:
* @param[out] output Output tensor. Data types supported: Same as @p input1.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
*/
- void configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ void configure(ITensor *input1,
+ ITensor *input2,
+ ITensor *output,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
/** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuArithmeticKernel for max
*
* @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
@@ -82,7 +86,10 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ static Status validate(const ITensorInfo *input1,
+ const ITensorInfo *input2,
+ const ITensorInfo *output,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
// Inherited methods overridden:
void run() override;
@@ -132,7 +139,10 @@ public:
* @param[out] output Output tensor. Data types supported: Same as @p input1.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
*/
- void configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ void configure(ITensor *input1,
+ ITensor *input2,
+ ITensor *output,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
/** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuArithmeticKernel for min
*
* @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
@@ -142,7 +152,10 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ static Status validate(const ITensorInfo *input1,
+ const ITensorInfo *input2,
+ const ITensorInfo *output,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
// Inherited methods overridden:
void run() override;
@@ -192,7 +205,10 @@ public:
* @param[out] output Output tensor. Data types supported: Same as @p input1.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
*/
- void configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ void configure(ITensor *input1,
+ ITensor *input2,
+ ITensor *output,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
/** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuArithmeticKernel for squared difference
*
* @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
@@ -202,7 +218,10 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ static Status validate(const ITensorInfo *input1,
+ const ITensorInfo *input2,
+ const ITensorInfo *output,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
// Inherited methods overridden:
void run() override;
@@ -248,7 +267,10 @@ public:
* @param[out] output Output tensor. Data types supported: Same as @p input1.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
*/
- void configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ void configure(ITensor *input1,
+ ITensor *input2,
+ ITensor *output,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
/** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuArithmeticKernel for division
*
* @param[in] input1 First tensor input info. Data types supported: F16/F32.
@@ -258,7 +280,10 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ static Status validate(const ITensorInfo *input1,
+ const ITensorInfo *input2,
+ const ITensorInfo *output,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
// Inherited methods overridden:
void run() override;
@@ -305,7 +330,10 @@ public:
* @param[out] output Output tensor. Data types supported: Same as @p input1.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
*/
- void configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ void configure(ITensor *input1,
+ ITensor *input2,
+ ITensor *output,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
/** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuArithmeticKernel for power
*
* @param[in] input1 First tensor input info. Data types supported: F16/F32.
@@ -315,7 +343,10 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ static Status validate(const ITensorInfo *input1,
+ const ITensorInfo *input2,
+ const ITensorInfo *output,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
// Inherited methods overridden:
void run() override;
@@ -376,7 +407,8 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ComparisonOperation op);
+ static Status
+ validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ComparisonOperation op);
// Inherited methods overridden:
void run() override;
diff --git a/arm_compute/runtime/NEON/functions/NEFFT1D.h b/arm_compute/runtime/NEON/functions/NEFFT1D.h
index 9654b1e604..99c6fd4eb4 100644
--- a/arm_compute/runtime/NEON/functions/NEFFT1D.h
+++ b/arm_compute/runtime/NEON/functions/NEFFT1D.h
@@ -24,9 +24,8 @@
#ifndef ARM_COMPUTE_NEFFT1D_H
#define ARM_COMPUTE_NEFFT1D_H
-#include "arm_compute/runtime/IFunction.h"
-
#include "arm_compute/runtime/FunctionDescriptors.h"
+#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/Tensor.h"
diff --git a/arm_compute/runtime/NEON/functions/NEFFT2D.h b/arm_compute/runtime/NEON/functions/NEFFT2D.h
index 57f38d1942..cefd3df17a 100644
--- a/arm_compute/runtime/NEON/functions/NEFFT2D.h
+++ b/arm_compute/runtime/NEON/functions/NEFFT2D.h
@@ -24,9 +24,8 @@
#ifndef ARM_COMPUTE_NEFFT2D_H
#define ARM_COMPUTE_NEFFT2D_H
-#include "arm_compute/runtime/IFunction.h"
-
#include "arm_compute/runtime/FunctionDescriptors.h"
+#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/NEON/functions/NEFFT1D.h"
#include "arm_compute/runtime/Tensor.h"
diff --git a/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h
index c5f4d45b6b..84bfe6b02f 100644
--- a/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h
@@ -24,9 +24,8 @@
#ifndef ARM_COMPUTE_NEFFTCONVOLUTIONLAYER_H
#define ARM_COMPUTE_NEFFTCONVOLUTIONLAYER_H
-#include "arm_compute/runtime/IFunction.h"
-
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
#include "arm_compute/runtime/NEON/functions/NEFFT2D.h"
@@ -94,8 +93,13 @@ public:
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
* @param[in] enable_fast_math (Optional) Enable fast math computation. Unused for CPU backend.
*/
- void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
- const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false);
+ void configure(ITensor *input,
+ const ITensor *weights,
+ const ITensor *biases,
+ ITensor *output,
+ const PadStrideInfo &conv_info,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ bool enable_fast_math = false);
/** Static function to check if given info will lead to a valid configuration of @ref NEFFTConvolutionLayer
*
* @note: This function only works with any square kernel size and unit strides for both NCHW and NHWC data layout
@@ -113,8 +117,13 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *biases,
+ const ITensorInfo *output,
+ const PadStrideInfo &conv_info,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ bool enable_fast_math = false);
// Inherited methods overridden:
void run() override;
diff --git a/arm_compute/runtime/NEON/functions/NEFill.h b/arm_compute/runtime/NEON/functions/NEFill.h
index e923ce33e1..1829c71fef 100644
--- a/arm_compute/runtime/NEON/functions/NEFill.h
+++ b/arm_compute/runtime/NEON/functions/NEFill.h
@@ -24,10 +24,9 @@
#ifndef ARM_COMPUTE_NEFILL_H
#define ARM_COMPUTE_NEFILL_H
-#include "arm_compute/runtime/IFunction.h"
-
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
#include <memory>
diff --git a/arm_compute/runtime/NEON/functions/NEFillBorder.h b/arm_compute/runtime/NEON/functions/NEFillBorder.h
index ab77c28839..44b1d4a62b 100644
--- a/arm_compute/runtime/NEON/functions/NEFillBorder.h
+++ b/arm_compute/runtime/NEON/functions/NEFillBorder.h
@@ -27,6 +27,7 @@
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
+
#include <memory>
namespace arm_compute
@@ -57,7 +58,10 @@ public:
* @param[in] border_mode Strategy to use for borders.
* @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
*/
- void configure(ITensor *input, unsigned int border_width, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue());
+ void configure(ITensor *input,
+ unsigned int border_width,
+ BorderMode border_mode,
+ const PixelValue &constant_border_value = PixelValue());
// Inherited methods overridden:
void run() override;
diff --git a/arm_compute/runtime/NEON/functions/NEFloor.h b/arm_compute/runtime/NEON/functions/NEFloor.h
index 4d47b068db..77ac484bab 100644
--- a/arm_compute/runtime/NEON/functions/NEFloor.h
+++ b/arm_compute/runtime/NEON/functions/NEFloor.h
@@ -24,9 +24,8 @@
#ifndef ARM_COMPUTE_NEFLOOR_H
#define ARM_COMPUTE_NEFLOOR_H
-#include "arm_compute/runtime/IFunction.h"
-
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
#include <memory>
diff --git a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
index 9727e108a5..885f8430cf 100644
--- a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,16 +24,15 @@
#ifndef ARM_COMPUTE_NEFULLYCONNECTEDLAYER_H
#define ARM_COMPUTE_NEFULLYCONNECTEDLAYER_H
+#include "arm_compute/function_info/FullyConnectedLayerInfo.h"
#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h"
-#include "arm_compute/runtime/NEON/functions/NEFlattenLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMM.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h"
+#include "arm_compute/runtime/IMemoryManager.h"
+#include "arm_compute/runtime/IWeightsManager.h"
#include "arm_compute/runtime/NEON/functions/NETranspose.h"
#include "arm_compute/runtime/Tensor.h"
+#include <memory>
+
namespace arm_compute
{
namespace weights_transformations
@@ -77,10 +76,10 @@ private:
} // namespace weights_transformations
/** Basic function to compute a Fully Connected layer. This function calls the following kernels:
- * -# @ref NEIm2ColKernel (called when the input comes from a convolutional layer)
+ * -# @ref cpu::kernels::CpuIm2ColKernel (called when the input comes from a convolutional layer)
* -# @ref NETranspose (if @p are_weights_reshaped is set to false and transpose_weights is set to true ) (called once)
- * -# @ref NEGEMMMatrixMultiplyKernel or @ref NEGEMMLowpMatrixMultiplyCore (if quantized asymmetric)
- * -# @ref NEGEMMMatrixAdditionKernel or @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint (if quantized asymmetric) (if @p biases is not equal to nullptr)
+ * -# @ref NEGEMM or @ref NEGEMMLowpMatrixMultiplyCore (if quantized asymmetric)
+ * -# @ref cpu::kernels::CpuGemmMatrixAdditionKernel or @ref NEGEMMLowpOutputStage (if quantized asymmetric) (if @p biases is not equal to nullptr)
*
* @note The fully connected layer accepts "weights" tensors only with 2 dimensions.
*/
@@ -88,7 +87,8 @@ class NEFullyConnectedLayer : public IFunction
{
public:
/** Constructor */
- NEFullyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr, IWeightsManager *weights_manager = nullptr);
+ NEFullyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr,
+ IWeightsManager *weights_manager = nullptr);
/** Prevent instances of this class from being copied (As this class contains pointers) */
NEFullyConnectedLayer(const NEFullyConnectedLayer &) = delete;
/** Prevent instances of this class from being moved (As this class contains pointers) */
@@ -113,66 +113,65 @@ public:
* |QASYMM8 |QASYMM8 |S32 |QASYMM8 |
* |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED |
*
- * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] weights Weights tensor. The weights must be 2 dimensional.
- * If this function is called after a Convolution Layer, the (transposed) weights will have as many rows as the product of the first 3 input's dimensions.
- * If it is called after another FullyConnected Layer, the (transposed) weights will have as many rows as the input's first dimension.
- * Data type supported: Same as @p input.
- * @param[in] biases Bias tensor. Can be nullptr. Data type supported: Same as @p weights, S32 if @p weights is QASYMM8/QASYMM8_SIGNED.
- * @param[out] output Destination tensor. Its shape should be equal to the output of a matrix multiplication between:
- * - The output of im2col on the input and the (transposed) 2D weights, if the function is called after a Convolution Layer
- * - The input tensor and the (transposed) 2D weights, if the function is called after another FullyConnected Layer.
- * Data type supported: Same as @p input.
- * @param[in] fc_info (Optional) Fully connected layer additional info
+ * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] weights Weights tensor. The weights must be 2 dimensional.
+ * If this function is called after a Convolution Layer, the (transposed) weights will have as many rows as the product of the first 3 input's dimensions.
+ * If it is called after another FullyConnected Layer, the (transposed) weights will have as many rows as the input's first dimension.
+ * Data type supported: Same as @p input.
+ * @param[in] biases Bias tensor. Can be nullptr. Data type supported: Same as @p weights, S32 if @p weights is QASYMM8/QASYMM8_SIGNED.
+ * @param[out] output Destination tensor. Its shape should be equal to the output of a matrix multiplication between:
+ * - The output of im2col on the input and the (transposed) 2D weights, if the function is called after a Convolution Layer
+ * - The input tensor and the (transposed) 2D weights, if the function is called after another FullyConnected Layer.
+ * Data type supported: Same as @p input.
+ * @param[in] fc_info (Optional) Fully connected layer additional info
+ * @param[in] weights_info (Optional) Stores neccessary compute information when weights are already reshaped
*/
- void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output,
- FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo());
+ void configure(const ITensor *input,
+ const ITensor *weights,
+ const ITensor *biases,
+ ITensor *output,
+ FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo(),
+ const WeightsInfo &weights_info = WeightsInfo());
/** Static function to check if given info will lead to a valid configuration of @ref NEFullyConnectedLayer
*
- * @param[in] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] weights Weights tensor info. The weights must be 2 dimensional.
- * If this function is called after a Convolution Layer, the (transposed) weights will have as many rows as the product of the first 3 input's dimensions.
- * If it is called after another FullyConnected Layer, the (transposed) weights will have as many rows as the input's first dimension.
- * Data type supported: Same as @p input.
- * @param[in] biases Bias tensor. Can be nullptr. Data type supported: Same as @p weights, S32 if @p weights is QASYMM8/QASYMM8_SIGNED.
- * @param[in] output Destination tensor info. Its shape should be equal to the output of a matrix multiplication between:
- * - The output of im2col on the input and the (transposed) 2D weights, if the function is called after a Convolution Layer
- * - The input tensor and the (transposed) 2D weights, if the function is called after another FullyConnected Layer.
- * Data type supported: Same as @p input.
- * @param[in] fc_info (Optional) Fully connected layer additional info
+ * Similar to @ref NEFullyConnectedLayer::configure()
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *biases,
+ const ITensorInfo *output,
+ FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo(),
+ const WeightsInfo &weights_info = WeightsInfo());
+
+ /** Static function that queries whether fixed-format kernel exists for a given problem description
+ *
+ * @param[out] expected_weight_format Format in which weights should be for found fixed format kernel
+ * @param[in] input Source tensor
+ * @param[in] weights Weights tensor.
+ * @param[in] biases Bias tensor. Can be nullptr. Data type supported: Same as @p weights, S32 if @p weights is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] output Destination tensor
+ * @param[in] fc_info Fully connected layer additional info
+ * @param[in] weights_info Describes weights shape
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
- FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo());
+ static Status has_opt_impl(arm_compute::WeightFormat &expected_weight_format,
+ const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *biases,
+ const ITensorInfo *output,
+ const FullyConnectedLayerInfo &fc_info,
+ const WeightsInfo &weights_info);
//Inherited methods override
void run() override;
void prepare() override;
private:
- void configure_fc_fc(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const ActivationLayerInfo &act);
- void configure_conv_fc(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const ActivationLayerInfo &act);
- void configure_mm(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const ActivationLayerInfo &act);
-
- MemoryGroup _memory_group;
- IWeightsManager *_weights_manager;
- NEFlattenLayer _flatten;
- NEConvertFullyConnectedWeights _convert_weights;
- weights_transformations::NEConvertFullyConnectedWeightsManaged _convert_weights_managed;
- NETranspose _reshape_weights_function;
- weights_transformations::NEFullyConnectedLayerReshapeWeightsManaged _reshape_weights_managed_function;
- NEGEMM _mm_gemm;
- NEGEMMLowpMatrixMultiplyCore _mm_gemmlowp;
- Tensor _flatten_output;
- Tensor _converted_weights_output;
- Tensor _reshape_weights_output;
- const ITensor *_original_weights;
- bool _are_weights_converted;
- bool _are_weights_reshaped;
- bool _is_fc_after_conv;
- bool _is_quantized_asymmetric;
- bool _is_prepared;
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEFULLYCONNECTEDLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h b/arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h
index 3dd7f49044..f53b3de7f6 100644
--- a/arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h
+++ b/arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h
@@ -75,9 +75,16 @@ public:
* @param[in] epsilon (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f.
* @param[in] fbn_type (Optional) Fused batch normalization type. Defaults to Convolution.
*/
- void configure(const ITensor *input_weights, const ITensor *bn_mean, const ITensor *bn_var, ITensor *fused_weights, ITensor *fused_bias,
- const ITensor *input_bias = nullptr, const ITensor *bn_beta = nullptr, const ITensor *bn_gamma = nullptr,
- float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION);
+ void configure(const ITensor *input_weights,
+ const ITensor *bn_mean,
+ const ITensor *bn_var,
+ ITensor *fused_weights,
+ ITensor *fused_bias,
+ const ITensor *input_bias = nullptr,
+ const ITensor *bn_beta = nullptr,
+ const ITensor *bn_gamma = nullptr,
+ float epsilon = 0.001f,
+ FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION);
/** Static function to check if given info will lead to a valid configuration of @ref NEFuseBatchNormalization
*
* @param[in] input_weights Input weights tensor info for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC
@@ -95,10 +102,16 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input_weights, const ITensorInfo *bn_mean, const ITensorInfo *bn_var,
- const ITensorInfo *fused_weights, const ITensorInfo *fused_bias,
- const ITensorInfo *input_bias = nullptr, const ITensorInfo *bn_beta = nullptr, const ITensorInfo *bn_gamma = nullptr,
- float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION);
+ static Status validate(const ITensorInfo *input_weights,
+ const ITensorInfo *bn_mean,
+ const ITensorInfo *bn_var,
+ const ITensorInfo *fused_weights,
+ const ITensorInfo *fused_bias,
+ const ITensorInfo *input_bias = nullptr,
+ const ITensorInfo *bn_beta = nullptr,
+ const ITensorInfo *bn_gamma = nullptr,
+ float epsilon = 0.001f,
+ FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION);
// Inherited methods overridden:
void run() override;
diff --git a/arm_compute/runtime/NEON/functions/NEGEMM.h b/arm_compute/runtime/NEON/functions/NEGEMM.h
index 6fa30bd545..29650a5eca 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMM.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMM.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,44 +24,18 @@
#ifndef ARM_COMPUTE_NEGEMM_H
#define ARM_COMPUTE_NEGEMM_H
-#include "arm_compute/core/ITensorPack.h"
+#include "arm_compute/function_info/GEMMInfo.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
#include "arm_compute/runtime/IWeightsManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
-#include "arm_compute/runtime/Tensor.h"
#include <memory>
namespace arm_compute
{
-// Forward declarations
-class NEGEMMInterleave4x4Kernel;
-class NEGEMMMatrixAdditionKernel;
-class NEGEMMMatrixMultiplyKernel;
-class NEGEMMTranspose1xWKernel;
-namespace cpu
-{
-class CpuGemmAssemblyDispatch;
-}
-
/** Basic function to execute GEMM. This function calls the following kernels:
*
- * If optimized assembly is available:
- * -# @ref cpu::CpuGemmAssemblyDispatch
- * -# @ref NEActivationLayer (if alpha != 1.0)
- * Else:
- * -# @ref NEGEMMInterleave4x4Kernel (if the output tensor is a matrix)
- * -# @ref NEGEMMTranspose1xWKernel (if the output tensor is a matrix)
- * -# @ref NEGEMMMatrixMultiplyKernel
- * In both cases:
- * -# @ref NEGEMMMatrixAdditionKernel (if c != nullptr and beta != 0.0 and is not reshaped once)
- * Else:
- * -# @ref NEArithmeticAddition (if c != nullptr and is reshaped once and not optimized assembly in place)
- *
- * -# @ref NEActivationLayer (if activation is specified in GEMMInfo)
+ * -# @ref cpu::CpuGemm
*/
class NEGEMM : public IFunction
{
@@ -93,6 +67,8 @@ public:
* @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C].
* @note GEMM: The tensors a, b, c, d must have the same data type. You should not mix data types when calling this function.
*
+ * @note Batched GEMM only supports broadcasting cases where RHS rank < LHS rank but not the other way around
+ *
* @param[in] a First input tensor (Matrix A or Vector A). Data type supported: BFLOAT16/F16/F32
* @param[in] b Second input tensor (Matrix B). Data type supported: same as @p a
* @param[in] c Third input tensor (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a
@@ -102,51 +78,49 @@ public:
* @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and
* if the reshape of matrix B should happen only for the first run
*/
- void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *d, float alpha, float beta, const GEMMInfo &gemm_info = GEMMInfo());
+ void configure(const ITensor *a,
+ const ITensor *b,
+ const ITensor *c,
+ ITensor *d,
+ float alpha,
+ float beta,
+ const GEMMInfo &gemm_info = GEMMInfo());
/** Static function to check if given info will lead to a valid configuration of @ref NEGEMM.
*
- * @param[in] a First input tensor info (Matrix or Vector A). Data types supported: BFLOAT16/F16/F32
- * @param[in] b Second input tensor info (Matrix B). Data type supported: same as @p a.
- * @param[in] c Third input tensor info (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a.
- * @param[out] output Output tensor info. Data type supported: same as @p a
- * @param[in] alpha Weight of the matrix product
- * @param[in] beta Weight of matrix C
- * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and
- * if the reshape of matrix B should happen only for the first run
+ * Similar to @ref NEGEMM::configure()
*
* @return a status
*/
- static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info = GEMMInfo());
+ static Status validate(const ITensorInfo *a,
+ const ITensorInfo *b,
+ const ITensorInfo *c,
+ const ITensorInfo *output,
+ float alpha,
+ float beta,
+ const GEMMInfo &gemm_info = GEMMInfo());
+
+ /** Static function that queries whether there exists fixed-format kernel and if it exists it will return in the first argument in what format
+ * weights are expected to be reshaped as defined by WeightFormat class. Apart from the first argument the rest of the arguments are the same
+ * as in @ref NEGEMM::validate() except that all arguments are required.
+ *
+ * @return a status
+ */
+ static Status has_opt_impl(arm_compute::WeightFormat &expected_weight_format,
+ const ITensorInfo *a,
+ const ITensorInfo *b,
+ const ITensorInfo *c,
+ const ITensorInfo *output,
+ float alpha,
+ float beta,
+ const GEMMInfo &gemm_info = GEMMInfo());
// Inherited methods overridden:
void run() override;
void prepare() override;
private:
- MemoryGroup _memory_group;
- IWeightsManager *_weights_manager;
- std::unique_ptr<NEGEMMInterleave4x4Kernel> _interleave_kernel;
- std::unique_ptr<NEGEMMTranspose1xWKernel> _transpose_kernel;
- std::unique_ptr<NEGEMMMatrixMultiplyKernel> _mm_kernel;
- std::unique_ptr<cpu::CpuGemmAssemblyDispatch> _asm_glue;
- std::unique_ptr<NEGEMMMatrixAdditionKernel> _ma_kernel;
- NEActivationLayer _alpha_scale_func;
- NEArithmeticAddition _add_bias;
- NEActivationLayer _activation_func;
-
- Tensor _tmp_a;
- Tensor _tmp_b;
- Tensor _tmp_d;
- const ITensor *_original_b;
- bool _run_vector_matrix_multiplication;
- bool _run_alpha_scale;
- bool _run_addition;
- bool _run_bias_addition;
- bool _run_activation;
- bool _reshape_b_only_on_first_run;
- bool _is_prepared;
-
- ITensorPack _asm_glue_tensors{};
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_NEGEMM_H */
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h b/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h
index f39ce4dfa3..d1c5a1c9b3 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h
@@ -29,15 +29,12 @@
#include "arm_compute/runtime/IMemoryManager.h"
#include <memory>
+
namespace arm_compute
{
// Forward declarations
class ITensor;
class ITensorInfo;
-namespace cpu
-{
-class CpuGemmAssemblyDispatch;
-}
/** Basic function to compute the convolution layer. This function calls the following kernels/functions:
*
@@ -89,7 +86,8 @@ public:
* Data types supported: Same as @p input.
* @param[in] info Convolution layer descriptor
*/
- void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const Conv2dInfo &info);
+ void
+ configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const Conv2dInfo &info);
/** Static function to check if given info will lead to a valid configuration of @ref NEGEMMConv2d
*
* @param[in] input Source tensor info. 3 lower dimensions represent a single input [width, height, IFM],
@@ -105,7 +103,11 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const Conv2dInfo &info);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *biases,
+ const ITensorInfo *output,
+ const Conv2dInfo &info);
// Inherited methods overridden:
void run() override;
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
index e89eae1d31..3e84c3e2cf 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,147 +24,31 @@
#ifndef ARM_COMPUTE_NEGEMMCONVOLUTIONLAYER_H
#define ARM_COMPUTE_NEGEMMCONVOLUTIONLAYER_H
-#include "arm_compute/runtime/IFunction.h"
-
#include "arm_compute/core/Types.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IMemoryManager.h"
#include "arm_compute/runtime/IWeightsManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMM.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h"
-#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h"
-#include "arm_compute/runtime/Tensor.h"
#include <memory>
namespace arm_compute
{
class ITensor;
-class NECol2ImKernel;
-class NEIm2ColKernel;
-class NEWeightsReshapeKernel;
-
-/** Function to reshape the weights. This function calls the following kernel:
- * -# @ref NEWeightsReshapeKernel
- */
-class NEConvolutionLayerReshapeWeights : public IFunction
-{
-public:
- /** Constructor */
- NEConvolutionLayerReshapeWeights() noexcept;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEConvolutionLayerReshapeWeights(const NEConvolutionLayerReshapeWeights &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEConvolutionLayerReshapeWeights(NEConvolutionLayerReshapeWeights &&) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEConvolutionLayerReshapeWeights &operator=(const NEConvolutionLayerReshapeWeights &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEConvolutionLayerReshapeWeights &operator=(NEConvolutionLayerReshapeWeights &&) = delete;
- /** Default destructor */
- ~NEConvolutionLayerReshapeWeights();
- /** Set the input and output tensors.
- *
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
- * Data type supported: All.
- * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
- * Data type supported: same as @p weights.
- * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types.
- * @param[out] output Destination tensor. Data types supported: same as @p weights.
- */
- void configure(const ITensor *weights, const ITensor *biases, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEConvolutionLayerReshapeWeights
- *
- * @param[in] weights Weights tensor info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
- * Data type supported: All.
- * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
- * Data type supported: same as @p weights.
- * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types.
- * @param[in] output Destination tensor. Data types supported: same as @p weights.
- *
- * @return an error status
- */
- static Status validate(const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- std::unique_ptr<NEWeightsReshapeKernel> _weights_reshape_kernel;
-};
-
-namespace weights_transformations
-{
-/** Basic function to manage the reshape weights generated from @ref NEConvolutionLayerReshapeWeights */
-class NEConvolutionLayerReshapeWeightsTransform : public ITransformWeights
-{
-public:
- /** Constructor */
- NEConvolutionLayerReshapeWeightsTransform() = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEConvolutionLayerReshapeWeightsTransform(const NEConvolutionLayerReshapeWeightsTransform &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEConvolutionLayerReshapeWeightsTransform &operator=(const NEConvolutionLayerReshapeWeightsTransform &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEConvolutionLayerReshapeWeightsTransform(NEConvolutionLayerReshapeWeightsTransform &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEConvolutionLayerReshapeWeightsTransform &operator=(NEConvolutionLayerReshapeWeightsTransform &&) = delete;
- /** Default destructor */
- ~NEConvolutionLayerReshapeWeightsTransform() = default;
- void configure(const ITensor *input, const ITensor *biases)
- {
- _bias_bit = (biases != nullptr) ? 1 : 0;
- _func.configure(input, biases, &_output);
- }
-
- void run() override
- {
- _output.allocator()->allocate();
- _func.run();
- _reshape_run = true;
- }
-
- ITensor *get_weights() override
- {
- return &_output;
- }
-
- void release() override
- {
- _output.allocator()->free();
- }
-
- uint32_t uid() override
- {
- return ((0x8) | (_bias_bit << 7));
- }
-
- bool is_reshape_run()
- {
- return _reshape_run;
- }
-
-private:
- Tensor _output{};
- NEConvolutionLayerReshapeWeights _func{};
- int32_t _bias_bit{ 0 };
-};
-} // namespace weights_transformations
+class ITensorInfo;
/** Basic function to compute the convolution layer. This function calls the following kernels/functions:
*
- * -# @ref NEIm2ColKernel
- * -# @ref NEGEMM (if the data type is BFLOAT16/FP16/FP32)
- * -# @ref NEGEMMLowpMatrixMultiplyCore (if the data type is QASYMM8/QASYMM8_SIGNED)
- * -# @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint (if the data type is QASYMM8/QASYMM8_SIGNED)
- * -# @ref NEArithmeticAddition (if biases != nullptr and we have a 1x1 convolution with the NHWC data layout)
- * -# @ref NECol2ImKernel (if NCHW data layout)
+ * -# @ref cpu::CpuGemmConv2d
*
*/
class NEGEMMConvolutionLayer : public IFunction
{
public:
/** Constructor */
- NEGEMMConvolutionLayer(const std::shared_ptr<IMemoryManager> &memory_manager = nullptr, IWeightsManager *weights_manager = nullptr);
+ NEGEMMConvolutionLayer(const std::shared_ptr<IMemoryManager> &memory_manager = nullptr,
+ IWeightsManager *weights_manager = nullptr);
/** Prevent instances of this class from being copied (As this class contains pointers) */
NEGEMMConvolutionLayer(const NEGEMMConvolutionLayer &) = delete;
/** Prevent instances of this class from being moved (As this class contains non movable objects) */
@@ -192,118 +76,139 @@ public:
* |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED |
* |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED |
*
- * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
- * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.
- * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
- * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
- * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
- * Data types supported: Same as @p input.
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights
- * tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
- * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported
+ * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of inputs.
+ * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
+ * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.
+ * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
+ * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
+ * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
+ * Data types supported: Same as @p input.
+ * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights
+ * tensor has also been transposed with cpu::kernels::CpuGemmTranspose1xWKernel. Data type supported: Same as @p input.
+ * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
+ * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
+ * available which may introduce a drop of accuracy as well. Default is false
+ * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported
*/
- void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(),
- const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1);
+ void configure(const ITensor *input,
+ const ITensor *weights,
+ const ITensor *biases,
+ ITensor *output,
+ const PadStrideInfo &conv_info,
+ const WeightsInfo &weights_info = WeightsInfo(),
+ const Size2D &dilation = Size2D(1U, 1U),
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ bool enable_fast_math = false,
+ unsigned int num_groups = 1);
/** Static function to check if given info will lead to a valid configuration of @ref NEGEMMConvolutionLayer
*
- * @param[in] input Source tensor info. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
- * @param[in] weights Weights tensor info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
- * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.
- * @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
- * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
- * @param[in] output Destination tensor info. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
- * Data types supported: Same as @p input.
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights
- * tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
- * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported
+ * @param[in] input Source tensor info. 3 lower dimensions represent a single input [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of inputs.
+ * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
+ * @param[in] weights Weights tensor info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
+ * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.
+ * @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
+ * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
+ * @param[in] output Destination tensor info. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
+ * Data types supported: Same as @p input.
+ * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights
+ * tensor has also been transposed with cpu::kernels::CpuGemmTranspose1xWKernel. Data type supported: Same as @p input.
+ * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
+ * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
+ * available which may introduce a drop of accuracy as well. Default is false
+ * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1);
-
- // Inherited methods overridden:
- void run() override;
- void prepare() override;
-
-private:
- /** Configures the appropriate matrix multiply routine
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *biases,
+ const ITensorInfo *output,
+ const PadStrideInfo &conv_info,
+ const WeightsInfo &weights_info = WeightsInfo(),
+ const Size2D &dilation = Size2D(1U, 1U),
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ bool enable_fast_math = false,
+ unsigned int num_groups = 1);
+
+ /** Static function to check if there is an optimized version of
+ * GEMM available for the input parameters.
*
- * @param[in] input Input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
- * @param[in] weights Weights tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.
- * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
- * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
- * @param[out] output Output tensor. Data types supported: Same as @p input,
- * except for input of QASYMM8/QASYMM8_SIGNED type where output should be of S32 type.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
- * @param[in] gemm_3d_depth (Optional) Depth of GEMM 3D (Defaults to 1)
- */
- void configure_mm(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo(), int gemm_3d_depth = 1);
- /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMConvolutionLayer matrix multiply routines
+ * The method is intended to be used to find out the optimal
+ * memory layout to be used for the weights tensor when running
+ * variable weights execution.
*
- * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
- * @param[in] weights Weights tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.
- * @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
- * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
- * @param[in] output Output tensor info. Data types supported: Same as @p input,
- * except for input of QASYMM8/QASYMM8_SIGNED type where output should be of S32 type.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
- * @param[in] gemm_3d_depth (Optional) Depth of GEMM 3D (Defaults to 1)
- * @param[in] skip_im2col (Optional) Flag which specifies if im2col has to be skipped. i.e. 1x1 convolution with NHWC data layout. (Default to false)
+ * The user can query the database of optimised kernels in
+ * arm_gemm by specifying one of the enumerations of
+ * arm_compute::WeightFormat in the weight_format field of the input
+ * parameter weights_info. In case of success, the method
+ * writes the expected format in the output parameter
+ * expected_weight_format. The expected_weight_format can than be
+ * used in the configure method of the class for retrieving the
+ * best optimal kernel.
*
- * @return a status
- */
- static Status validate_mm(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo(),
- int gemm_3d_depth = 1, bool skip_im2col = false);
- /** Static function to check if GEMM3D is supported in @ref NEGEMM or in @ref NEGEMMLowpMatrixMultiplyCore
+ * Use case one - query for a specific format:
*
- * @param[in] input_info Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
- * @param[in] weights_info Weights tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
- * @param[in] act_info Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
- * @param[in] gemm_3d_depth Depth of GEMM 3D
- * @param[in] skip_im2col Flag which specifies if im2col has to be skipped. i.e. 1x1 convolution with NHWC data layout
+ * WeightInfo weights_info(..., arm_compute::WeightFormat::OHWIo4, ...); // Set the value of the input query.
+ * if (NEGEMMConvolutionlayer::has_opt_impl(WeightFormat(), ...., weights_info, ...))
+ * {
+ * auto conv = std::unique_ptr<NEGEMMConvolutionlayer>();
+ * conv->configure(..., weights_info, ...); // uses the same WeightFormat the user wanted originally, OHWYo4.
+ * conv->run(...);
+ * }
*
- * @return a status
+ * Use case two - query for any format that would be optimal for the GEMM to execute:
+ *
+ * WeightInfo weights_info(..., arm_compute::WeightFormat::ANY, ...); // Set the value of the input query.
+ * arm_compute::WeightFormat expected_wf;
+ * if (NEGEMMConvolutionlayer::has_opt_impl(expected_wf, ...., weights_info, ...))
+ * {
+ * auto conv = std::unique_ptr<NEGEMMConvolutionlayer>();
+ * // ... code to convert the layout of the weights tensor to the layout returned by has_opt_impl
+ * WeightInfo new_weights_info(..., expected_wf, ...); // Set the value of the WeightFormat returned by has_opt_impl.
+ * conv->configure(..., new_weights_info, ...);
+ * conv->run(...);
+ * }
+ *
+ * Notice that a GEMM configured with a WeightFormat other than
+ * UNSPECIFIED will run GEMM with variable weights mode.
+ *
+ * @param[out] expected_weight_format The arm_compute::WeightFormat expected by the kernel.
+ * @param[in] src Source tensor info.
+ * @param[in] weights Weights tensor info.
+ * @param[in] biases Biases tensor info. Shared biases supported.
+ * @param[in] dst Destination tensor info.
+ * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ * @param[in] weights_info (optional) Specifies additional configuration parameters for the weights of the GEMM computation.
+ * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. And no activation (i.e. Linear) which is the default value.
+ * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
+ *
+ * @return a Status
*/
- static Status validate_gemm3d(const ITensorInfo *input_info, const ITensorInfo *weights_info, const ActivationLayerInfo &act_info, int gemm_3d_depth, bool skip_im2col);
+ static Status has_opt_impl(arm_compute::WeightFormat &expected_weight_format,
+ const ITensorInfo *src,
+ const ITensorInfo *weights,
+ const ITensorInfo *biases,
+ const ITensorInfo *dst,
+ const PadStrideInfo &conv_info,
+ const WeightsInfo &weights_info = WeightsInfo(),
+ const Size2D &dilation = Size2D(1U, 1U),
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ bool enable_fast_math = false);
+ // Inherited methods overridden:
+ void run() override;
+ void prepare() override;
private:
- MemoryGroup _memory_group;
- IWeightsManager *_weights_manager;
- NEConvolutionLayerReshapeWeights _reshape_weights;
- weights_transformations::NEConvolutionLayerReshapeWeightsTransform _reshape_weights_managed;
- std::unique_ptr<NEIm2ColKernel> _im2col_kernel;
- NEGEMM _mm_gemm;
- NEGEMMLowpMatrixMultiplyCore _mm_gemmlowp;
- std::unique_ptr<NECol2ImKernel> _col2im_kernel;
- NEReshapeLayer _reshape_layer;
-
- const ITensor *_original_weights;
- const ITensor *_original_output;
-
- Tensor _im2col_output;
- Tensor _weights_reshaped;
- Tensor _gemm_output;
- Tensor _gemm_output_3d;
- Tensor _tmp_output;
-
- DataLayout _data_layout;
-
- bool _skip_im2col;
- bool _skip_col2im;
- bool _is_quantized;
- bool _is_prepared;
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
-#endif /* ARM_COMPUTE_NECONVOLUTIONGEMMLAYER_H */
+#endif /* ARM_COMPUTE_NEGEMMCONVOLUTIONLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
index dc9783f9eb..6d07675d3d 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2023-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,53 +21,34 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H
-#define ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H
+#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEGEMMLOWPMATRIXMULTIPLYCORE_H
+#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEGEMMLOWPMATRIXMULTIPLYCORE_H
-#include "NEActivationLayer.h"
-#include "arm_compute/core/ITensorPack.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/function_info/GEMMInfo.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
#include "arm_compute/runtime/IWeightsManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/Tensor.h"
#include <memory>
namespace arm_compute
{
class ITensor;
-class NEConvertQuantizedSignednessKernel;
-class NEGEMMInterleave4x4Kernel;
-class NEGEMMLowpMatrixMultiplyKernel;
-class NEGEMMLowpOffsetContributionKernel;
-class NEGEMMLowpOffsetContributionOutputStageKernel;
-class NEGEMMLowpMatrixAReductionKernel;
-class NEGEMMLowpMatrixBReductionKernel;
-class NEGEMMTranspose1xWKernel;
-namespace cpu
-{
-class CpuGemmAssemblyDispatch;
-}
+class ITensorInfo;
-/** Basic function to execute GEMMLowpMatrixMultiplyCore. This function calls the following kernels if the DOT product instruction is not available:
- *
- * -# @ref NEGEMMInterleave4x4Kernel
- * -# @ref NEGEMMTranspose1xWKernel
- * -# @ref NEGEMMLowpMatrixMultiplyKernel
- * -# @ref NEGEMMLowpOffsetContributionKernel
- * -# @ref NEActivationLayer
+/** Function to run Gemm on quantized types.
*
- * otherwise if the DOT product instruction is available:
+ * This function calls the following:
*
- * -# @ref NEGEMMLowpOffsetContributionKernel
- *
-*/
+ * -# @ref cpu::CpuGemmLowpMatrixMultiplyCore
+ */
class NEGEMMLowpMatrixMultiplyCore : public IFunction
{
public:
/** Constructor */
- NEGEMMLowpMatrixMultiplyCore(std::shared_ptr<IMemoryManager> memory_manager = nullptr, IWeightsManager *weights_manager = nullptr);
+ NEGEMMLowpMatrixMultiplyCore(std::shared_ptr<IMemoryManager> memory_manager = nullptr,
+ IWeightsManager *weights_manager = nullptr);
/** Prevent instances of this class from being copied (As this class contains pointers) */
NEGEMMLowpMatrixMultiplyCore(const NEGEMMLowpMatrixMultiplyCore &) = delete;
/** Default move constructor */
@@ -99,6 +80,7 @@ public:
* |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |S32 |
* |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |S32 |
* |QASYMM8_SIGNED |QSYMM8 |S32 |S32 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |F32 |F32 |
*
* @note GEMM_LOWP: low precision GEMM kernel
* This kernel performs the following computations:
@@ -107,71 +89,36 @@ public:
* -# Convert b values from QASYMM8 to int32 add b_offset to each of them.
* -# Compute the matrix product of the resulting a * b in int32.
*
- * @note The @p output type is S32 if @p gemm_info.type == GEMMLowpOutputStageType::NONE. It is QASYMM8/QASYMM8_SIGNED otherwise
+ * @note The @p output type is S32 if @p gemm_info.type == GEMMLowpOutputStageType::NONE. It is QASYMM8/QASYMM8_SIGNED/F32 otherwise
*
* @param[in] a First input tensor (Matrix A). Data type supported: QASYMM8/QASYMM8_SIGNED.
* @param[in] b Second input tensor (Matrix B). Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL.
- * @param[in] c Third input tensor (Matrix C). It can be a nullptr. Data type supported: S32
- * @param[out] output Output tensor. Data type supported: Data type supported: S32/QASYMM8/QASYMM8_SIGNED
+ * @param[in] c Third input tensor (Matrix C). It can be a nullptr. Data type supported: S32/F32
+ * @param[out] output Output tensor. Data type supported: Data type supported: S32/QASYMM8/QASYMM8_SIGNED/F32
* @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and
* if the reshape of matrix B should be executed only for the first run
*/
- void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *output, const GEMMInfo &gemm_info = GEMMInfo());
+ void configure(
+ const ITensor *a, const ITensor *b, const ITensor *c, ITensor *output, const GEMMInfo &gemm_info = GEMMInfo());
/** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpMatrixMultiplyCore
*
- * @note The @p output type is S32 if @p gemm_info.type == GEMMLowpOutputStageType::NONE. It is QASYMM8/QASYMM8_SIGNED otherwise
- *
- * @param[in] a First input tensor info (Matrix A). Data type supported: QASYMM8/QASYMM8_SIGNED.
- * @param[in] b Second input tensor info (Matrix B). Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL.
- * @param[in] c Third input tensor info (Matrix C). It can be a nullptr. Data type supported: S32
- * @param[in] output Output tensor info. Data type supported: Data type supported: S32/QASYMM8/QASYMM8_SIGNED
- * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and
- * if the reshape of matrix B should be executed only for the first run
+ * Similar to @ref NEGEMMLowpMatrixMultiplyCore::configure()
*
* @return a status
*/
- static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info = GEMMInfo());
+ static Status validate(const ITensorInfo *a,
+ const ITensorInfo *b,
+ const ITensorInfo *c,
+ const ITensorInfo *output,
+ const GEMMInfo &gemm_info = GEMMInfo());
// Inherited methods overridden
void run() override;
void prepare() override;
private:
- MemoryGroup _memory_group;
- IWeightsManager *_weights_manager;
- std::unique_ptr<cpu::CpuGemmAssemblyDispatch> _asm_glue;
- std::unique_ptr<NEGEMMLowpMatrixMultiplyKernel> _mm_kernel;
- std::unique_ptr<NEGEMMInterleave4x4Kernel> _mtx_a_reshape_kernel;
- std::unique_ptr<NEGEMMTranspose1xWKernel> _mtx_b_reshape_kernel;
- std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _mtx_a_reduction_kernel;
- std::unique_ptr<NEGEMMLowpMatrixBReductionKernel> _mtx_b_reduction_kernel;
- std::unique_ptr<NEGEMMLowpOffsetContributionKernel> _offset_contribution_kernel;
- std::unique_ptr<NEGEMMLowpOffsetContributionOutputStageKernel> _offset_contribution_output_stage_kernel;
- NEActivationLayer _activation_func;
- std::unique_ptr<NEConvertQuantizedSignednessKernel> _convert_to_signed_asymm;
- std::unique_ptr<NEConvertQuantizedSignednessKernel> _convert_from_signed_asymm;
-
- Tensor _vector_sum_col;
- Tensor _vector_sum_row;
- Tensor _tmp_a;
- Tensor _tmp_b;
- Tensor _mm_result_s32;
- Tensor _signed_a;
- Tensor _signed_output;
- const ITensor *_original_b;
- int32_t _a_offset;
- int32_t _b_offset;
-
- bool _run_vector_matrix_multiplication;
- bool _assembly_path;
- bool _fused_assembly_path;
- bool _reshape_b_only_on_first_run;
- bool _is_prepared;
- bool _fuse_output_stage;
- bool _run_activation;
- bool _flip_signedness;
-
- ITensorPack _asm_glue_tensors{};
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H */
+#endif // ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEGEMMLOWPMATRIXMULTIPLYCORE_H
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h
index fa5f5e3826..0d932bb4af 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h
@@ -25,7 +25,7 @@
#define ARM_COMPUTE_NEGEMMLOWPOUTPUTSTAGE_H
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+#include "arm_compute/runtime/IFunction.h"
/** This file contains all available output stages for GEMMLowp.
*
@@ -39,237 +39,17 @@ namespace arm_compute
{
class ITensor;
class ITensorInfo;
-
-/** Basic function to execute NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint.
- *
- * NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint depends on 3 parameters:
- *
- * result_fixedpoint_multiplier, result_shift, result_offset_after_shift
- *
- * The final result is:
- *
- * (FixedPointMul(input[i][k], result_fixedpoint_multiplier) >> result_shift) + result_offset_after_shift
- *
- * where FixedPointMul(x, y) is the nearest integer to the following
- * mathematical expression, evaluated without overflow or intermediate rounding:
- *
- * (x * y) / 2^31
- *
- * For more information: https://github.com/google/gemmlowp/blob/master/public/output_stages.h#L68
- *
- * In case the bias tensor is provided, the final result is:
- *
- * ((FixedPointMul(input[i][k] + bias[k], result_fixedpoint_multiplier)) >> result_shift) + result_offset_after_shift
- *
- * This function calls the following kernels:
- *
- * -# @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel
- *
- * @note The function accepts also 2 optional input arguments (min and max) which can be used to implement "rectified linear unit" activation functions
- * after the result is shifted right by result_shift
-*/
-class NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint : public INESimpleFunctionNoBorder
-{
-public:
- /** Constructor */
- NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint() = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint(const NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint &operator=(const NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint(NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint &operator=(NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint &&) = delete;
- /** Default destructor */
- ~NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint();
- /** Initialise the kernel's inputs, output
- *
- * @param[in] input Input tensor. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8
- * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
- * @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication
- * @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
- * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
- */
- void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift,
- int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
- /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint
- *
- * @param[in] input Input tensor. It is the output of @ref NEGEMMLowpMatrixMultiplyCore function. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
- * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
-};
-/** Basic function to execute NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint.
- *
- * NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint depends on 3 parameters:
- *
- * result_fixedpoint_multiplier, result_shift, result_offset_after_shift
- *
- * The final result is:
- *
- * (FixedPointMul(input[i][k], result_fixedpoint_multiplier) >> result_shift) + result_offset_after_shift
- *
- * where FixedPointMul(x, y) is the nearest integer to the following
- * mathematical expression, evaluated without overflow or intermediate rounding:
- *
- * (x * y) / 2^31
- *
- * For more information: https://github.com/google/gemmlowp/blob/master/public/output_stages.h#L68
- *
- * In case the bias tensor is provided, the final result is:
- *
- * ((FixedPointMul(input[i][k] + bias[k], result_fixedpoint_multiplier)) >> result_shift) + result_offset_after_shift
- *
- * This function calls the following kernels:
- *
- * -# @ref NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel
- *
- * @note The function accepts also 2 optional input arguments (min and max) which can be used to implement "rectified linear unit" activation functions
- * after the result is shifted right by result_shift
-*/
-class NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint : public INESimpleFunctionNoBorder
-{
-public:
- /** Constructor */
- NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint() = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint(const NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint &operator=(const NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint(NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint &operator=(NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint &&) = delete;
- /** Default destructor */
- ~NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint();
- /** Initialise the kernel's inputs, output
- *
- * @param[in] input Input tensor. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8_SIGNED
- * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
- * @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication
- * @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8_SIGNED
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to the minimum possible 32-bit signed integer.
- * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
- */
- void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift,
- int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
- /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint
- *
- * @param[in] input Input tensor. It is the output of @ref NEGEMMLowpMatrixMultiplyCore function. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8_SIGNED
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to the minimum possible 32-bit signed integer.
- * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
-};
-/** Basic function to execute NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint.
- *
- * NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint depends on 2 parameters:
- *
- * result_fixedpoint_multiplier, result_shift
- *
- * The final result is:
- *
- * (FixedPointMul(input[i][k], result_fixedpoint_multiplier) >> result_shift)
- *
- * where FixedPointMul(x, y) is the nearest integer to the following
- * mathematical expression, evaluated without overflow or intermediate rounding:
- *
- * (x * y) / 2^31
- *
- * For more information: https://github.com/google/gemmlowp/blob/master/public/output_stages.h#L68
- *
- * In case the bias tensor is provided, the final result is:
- *
- * ((FixedPointMul(input[i][k] + bias[k], result_fixedpoint_multiplier)) >> result_shift) + result_offset_after_shift
- *
- * This function calls the following kernels:
- *
- * -# @ref NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel
- *
- * @note The function accepts also 2 optional input arguments (min and max) which can be used to implement "rectified linear unit" activation functions
- * after the result is shifted right by result_shift
-*/
-class NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint : public INESimpleFunctionNoBorder
-{
-public:
- /** Constructor */
- NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint() = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint(const NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint &operator=(const NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint(NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint &operator=(NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint &&) = delete;
- /** Default destructor */
- ~NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint();
- /** Initialise the kernel's inputs, output
- *
- * @param[in] input Input tensor. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[out] output Output tensor. Data type supported: Data type supported: QSYMM16
- * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
- * @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to the minimum possible 32-bit signed integer.
- * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16.
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
- */
- void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int min = std::numeric_limits<int32_t>::lowest(),
- int max = std::numeric_limits<int32_t>::max());
- /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint
- *
- * @param[in] input Input tensor info. It is the output of @ref NEGEMMLowpMatrixMultiplyCore function. Data type supported: S32
- * @param[in] bias Biases tensor info. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[in] output Output tensor info. Data type supported: Data type supported: QSYMM16
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to the minimum possible 32-bit signed integer.
- * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
-};
-
/** Basic function to execute GEMMLowpQuantizeDown kernels.
*
- * This function calls the following kernels:
+ * This function calls the following operators:
*
- * -# @ref NEGEMMLowpQuantizeDownInt32ScaleKernel
- * -# @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel
- * -# @ref NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel
- * -# @ref NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel
+ * -# @ref cpu::CpuGemmLowpOutputStage
*/
-class NEGEMMLowpOutputStage : public INESimpleFunctionNoBorder
+class NEGEMMLowpOutputStage : public IFunction
{
public:
/** Constructor */
- NEGEMMLowpOutputStage() = default;
+ NEGEMMLowpOutputStage();
/** Prevent instances of this class from being copied (As this class contains pointers) */
NEGEMMLowpOutputStage(const NEGEMMLowpOutputStage &) = delete;
/** Prevent instances of this class from being copied (As this class contains pointers) */
@@ -309,7 +89,17 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo &info);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *bias,
+ const ITensorInfo *output,
+ const GEMMLowpOutputStageInfo &info);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_NEGEMMLOWPOUTPUTSTAGE_H */
diff --git a/arm_compute/runtime/NEON/functions/NEGather.h b/arm_compute/runtime/NEON/functions/NEGather.h
index 393a38ee4d..9c7ae0134d 100644
--- a/arm_compute/runtime/NEON/functions/NEGather.h
+++ b/arm_compute/runtime/NEON/functions/NEGather.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -49,18 +49,17 @@ public:
* |All |All |
*
* @param[in] input Source tensor. Supported tensor rank: up to 4. Data type supported: All
- * @param[in] indices Indices tensor. Supported tensor rank: up to 1. Must be one of the following type: U32/S32. Each value Must be in range [0, input.shape[@p axis])
+ * @param[in] indices Indices tensor. Supported tensor rank: up to 3. Must be one of the following type: U32/S32. Each value must be in range [0, input.shape[@p axis]), otherwise the result will become unpredictable.
+ * @note The "axis" must be in the range [0, input.rank -1] when indices is a vector, and must be 1 when indices is a 2D or 3D tensor.
* @param[out] output Destination tensor. Data type supported: Same as @p input
* @param[in] axis (Optional) The axis in @p input to gather @p indices from. Defaults to 0
+ *
*/
void configure(const ITensor *input, const ITensor *indices, ITensor *output, int axis = 0);
- /** Static function to check if given info will lead to a valid configuration of @ref NEGatherKernel
+ /** Static function to check if given info will lead to a valid configuration
*
- * @param[in] input Source tensor info. Supported tensor rank: up to 4. Data type supported: All
- * @param[in] indices Indices tensor info. Supported tensor rank: up to 1. Must be one of the following types: U32/S32. Each value Must be in range [0, input.shape[@p axis])
- * @param[in] output Destination tensor info. Data type supported: Same as @p input
- * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Defaults to 0
+ * Similar to @ref NEGather::configure()
*
* @return a status
*/
diff --git a/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h b/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h
index 3b683382ec..0f294fde22 100644
--- a/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h
@@ -95,7 +95,12 @@ public:
* @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the @ref GenerateProposalsInfo struct.
* @note Proposals contains all the proposals. Of those, only the first num_valid_proposals are valid.
*/
- void configure(const ITensor *scores, const ITensor *deltas, const ITensor *anchors, ITensor *proposals, ITensor *scores_out, ITensor *num_valid_proposals,
+ void configure(const ITensor *scores,
+ const ITensor *deltas,
+ const ITensor *anchors,
+ ITensor *proposals,
+ ITensor *scores_out,
+ ITensor *num_valid_proposals,
const GenerateProposalsInfo &info);
/** Static function to check if given info will lead to a valid configuration of @ref NEGenerateProposalsLayer
@@ -112,7 +117,11 @@ public:
*
* @return a Status
*/
- static Status validate(const ITensorInfo *scores, const ITensorInfo *deltas, const ITensorInfo *anchors, const ITensorInfo *proposals, const ITensorInfo *scores_out,
+ static Status validate(const ITensorInfo *scores,
+ const ITensorInfo *deltas,
+ const ITensorInfo *anchors,
+ const ITensorInfo *proposals,
+ const ITensorInfo *scores_out,
const ITensorInfo *num_valid_proposals,
const GenerateProposalsInfo &info);
diff --git a/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayer.h b/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayer.h
index bb0697072b..0bc57be09e 100644
--- a/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayer.h
@@ -89,7 +89,11 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, float gamma = 1.0f, float beta = 0.0f, float epsilon = 1e-12f);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *output,
+ float gamma = 1.0f,
+ float beta = 0.0f,
+ float epsilon = 1e-12f);
// Inherited methods overridden:
void run() override;
@@ -103,5 +107,5 @@ private:
Tensor _permuted_input;
Tensor _permuted_output;
};
-}
+} // namespace arm_compute
#endif /* ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h b/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h
index 7f1a5e785e..8502cee5d2 100644
--- a/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h
@@ -97,5 +97,5 @@ private:
std::unique_ptr<NEL2NormalizeLayerKernel> _normalize_kernel;
Tensor _sumsq;
};
-}
+} // namespace arm_compute
#endif /* ARM_COMPUTE_NEL2NORMALIZELAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NELSTMLayer.h b/arm_compute/runtime/NEON/functions/NELSTMLayer.h
index 075fb4530a..629c5d10a0 100644
--- a/arm_compute/runtime/NEON/functions/NELSTMLayer.h
+++ b/arm_compute/runtime/NEON/functions/NELSTMLayer.h
@@ -25,7 +25,8 @@
#define ARM_COMPUTE_NELSTMLAYER_H
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
+#include "arm_compute/runtime/common/LSTMParams.h"
+#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
#include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h"
#include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h"
@@ -35,7 +36,6 @@
#include "arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h"
#include "arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h"
#include "arm_compute/runtime/NEON/functions/NETranspose.h"
-#include "arm_compute/runtime/common/LSTMParams.h"
namespace arm_compute
{
@@ -104,13 +104,26 @@ public:
* @param[in] projection_threshold The clipping threshold for the output from the projection layer, such that values are bound within [-proj_clip, proj_clip].
* If set to 0.0 then clipping is disabled.
*/
- void configure(const ITensor *input,
- const ITensor *input_to_forget_weights, const ITensor *input_to_cell_weights, const ITensor *input_to_output_weights,
- const ITensor *recurrent_to_forget_weights, const ITensor *recurrent_to_cell_weights, const ITensor *recurrent_to_output_weights,
- const ITensor *forget_gate_bias, const ITensor *cell_bias, const ITensor *output_gate_bias,
- const ITensor *output_state_in, const ITensor *cell_state_in,
- ITensor *scratch_buffer, ITensor *output_state_out, ITensor *cell_state_out, ITensor *output,
- const LSTMParams<ITensor> &lstm_params, const ActivationLayerInfo &activation_info, float cell_threshold = 0.f, float projection_threshold = 0.f);
+ void configure(const ITensor *input,
+ const ITensor *input_to_forget_weights,
+ const ITensor *input_to_cell_weights,
+ const ITensor *input_to_output_weights,
+ const ITensor *recurrent_to_forget_weights,
+ const ITensor *recurrent_to_cell_weights,
+ const ITensor *recurrent_to_output_weights,
+ const ITensor *forget_gate_bias,
+ const ITensor *cell_bias,
+ const ITensor *output_gate_bias,
+ const ITensor *output_state_in,
+ const ITensor *cell_state_in,
+ ITensor *scratch_buffer,
+ ITensor *output_state_out,
+ ITensor *cell_state_out,
+ ITensor *output,
+ const LSTMParams<ITensor> &lstm_params,
+ const ActivationLayerInfo &activation_info,
+ float cell_threshold = 0.f,
+ float projection_threshold = 0.f);
/** Static function to check if given info will lead to a valid configuration of @ref NELSTMLayer
*
@@ -151,13 +164,26 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input,
- const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights,
- const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights,
- const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias,
- const ITensorInfo *output_state_in, const ITensorInfo *cell_state_in,
- const ITensorInfo *scratch_buffer, const ITensorInfo *output_state_out, const ITensorInfo *cell_state_out, const ITensorInfo *output,
- const LSTMParams<ITensorInfo> &lstm_params, const ActivationLayerInfo &activation_info, float cell_threshold = 0.f, float projection_threshold = 0.f);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *input_to_forget_weights,
+ const ITensorInfo *input_to_cell_weights,
+ const ITensorInfo *input_to_output_weights,
+ const ITensorInfo *recurrent_to_forget_weights,
+ const ITensorInfo *recurrent_to_cell_weights,
+ const ITensorInfo *recurrent_to_output_weights,
+ const ITensorInfo *forget_gate_bias,
+ const ITensorInfo *cell_bias,
+ const ITensorInfo *output_gate_bias,
+ const ITensorInfo *output_state_in,
+ const ITensorInfo *cell_state_in,
+ const ITensorInfo *scratch_buffer,
+ const ITensorInfo *output_state_out,
+ const ITensorInfo *cell_state_out,
+ const ITensorInfo *output,
+ const LSTMParams<ITensorInfo> &lstm_params,
+ const ActivationLayerInfo &activation_info,
+ float cell_threshold = 0.f,
+ float projection_threshold = 0.f);
// Inherited methods overridden:
void run() override;
diff --git a/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h b/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h
index 2f0c753691..ae951669b3 100644
--- a/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h
+++ b/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h
@@ -25,6 +25,7 @@
#define ARM_COMPUTE_NELSTMLAYERQUANTIZED_H
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/common/LSTMParams.h"
#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
#include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h"
@@ -38,8 +39,6 @@
#include "arm_compute/runtime/NEON/functions/NESlice.h"
#include "arm_compute/runtime/NEON/functions/NETranspose.h"
-#include "arm_compute/runtime/common/LSTMParams.h"
-
namespace arm_compute
{
// Forward declarations
@@ -50,7 +49,7 @@ class ITensor;
* This function calls the following functions/kernels:
*
* -# @ref NEGEMMLowpMatrixMultiplyCore Quantized matrix multiplication core. Accumulators are 32-bit integers
- * -# @ref NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint Convert 32-bit integers into QSYMM16
+ * -# @ref NEGEMMLowpOutputStage Convert 32-bit integers into QSYMM16
* -# @ref NETranspose Matrix transpose
* -# @ref NEConcatenateLayer Tensor concatenation
* -# @ref NEActivationLayer Activation functions (tanh and logistic)
@@ -104,11 +103,22 @@ public:
* @param[out] output_state_out Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size].Data types supported: Same as @p input.
*/
void configure(const ITensor *input,
- const ITensor *input_to_input_weights, const ITensor *input_to_forget_weights, const ITensor *input_to_cell_weights, const ITensor *input_to_output_weights,
- const ITensor *recurrent_to_input_weights, const ITensor *recurrent_to_forget_weights, const ITensor *recurrent_to_cell_weights, const ITensor *recurrent_to_output_weights,
- const ITensor *input_gate_bias, const ITensor *forget_gate_bias, const ITensor *cell_bias, const ITensor *output_gate_bias,
- ITensor *cell_state_in, const ITensor *output_state_in,
- ITensor *cell_state_out, ITensor *output_state_out);
+ const ITensor *input_to_input_weights,
+ const ITensor *input_to_forget_weights,
+ const ITensor *input_to_cell_weights,
+ const ITensor *input_to_output_weights,
+ const ITensor *recurrent_to_input_weights,
+ const ITensor *recurrent_to_forget_weights,
+ const ITensor *recurrent_to_cell_weights,
+ const ITensor *recurrent_to_output_weights,
+ const ITensor *input_gate_bias,
+ const ITensor *forget_gate_bias,
+ const ITensor *cell_bias,
+ const ITensor *output_gate_bias,
+ ITensor *cell_state_in,
+ const ITensor *output_state_in,
+ ITensor *cell_state_out,
+ ITensor *output_state_out);
/** Static function to check if given info will lead to a valid configuration of @ref NELSTMLayer
*
@@ -133,11 +143,22 @@ public:
* @return a status
*/
static Status validate(const ITensorInfo *input,
- const ITensorInfo *input_to_input_weights, const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights,
- const ITensorInfo *recurrent_to_input_weights, const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights,
- const ITensorInfo *input_gate_bias, const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias,
- const ITensorInfo *cell_state_in, const ITensorInfo *output_state_in,
- const ITensorInfo *cell_state_out, const ITensorInfo *output_state_out);
+ const ITensorInfo *input_to_input_weights,
+ const ITensorInfo *input_to_forget_weights,
+ const ITensorInfo *input_to_cell_weights,
+ const ITensorInfo *input_to_output_weights,
+ const ITensorInfo *recurrent_to_input_weights,
+ const ITensorInfo *recurrent_to_forget_weights,
+ const ITensorInfo *recurrent_to_cell_weights,
+ const ITensorInfo *recurrent_to_output_weights,
+ const ITensorInfo *input_gate_bias,
+ const ITensorInfo *forget_gate_bias,
+ const ITensorInfo *cell_bias,
+ const ITensorInfo *output_gate_bias,
+ const ITensorInfo *cell_state_in,
+ const ITensorInfo *output_state_in,
+ const ITensorInfo *cell_state_out,
+ const ITensorInfo *output_state_out);
// Inherited methods overridden:
void run() override;
@@ -147,30 +168,30 @@ private:
MemoryGroup _memory_group;
// Functions used
- NEGEMMLowpMatrixMultiplyCore _gemmlowp;
- NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint _output_stage;
- NETranspose _transpose_weights;
- NEConcatenateLayer _concat_input_weights;
- NEConcatenateLayer _concat_recurrent_weights;
- NEConcatenateLayer _concat_weights;
- NEConcatenateLayer _concat_inputs;
- NEConcatenateLayer _concat_bias;
- NEActivationLayer _sigmoid_forget_gate;
- NEActivationLayer _sigmoid_input_gate;
- NEActivationLayer _sigmoid_output_gate;
- NEActivationLayer _tanh_modulation_gate;
- NEActivationLayer _tanh_output_state;
- NEArithmeticAddition _add1;
- NEArithmeticAddition _add2;
- NEPixelWiseMultiplication _mul1;
- NEPixelWiseMultiplication _mul2;
- NEPixelWiseMultiplication _mul3;
- NESlice _slice_input_tensor;
- NESlice _slice_forget_tensor;
- NESlice _slice_cell_tensor;
- NESlice _slice_output_tensor;
- NEDequantizationLayer _dequantize;
- NEQuantizationLayer _quantize;
+ NEGEMMLowpMatrixMultiplyCore _gemmlowp;
+ NEGEMMLowpOutputStage _output_stage;
+ NETranspose _transpose_weights;
+ NEConcatenateLayer _concat_input_weights;
+ NEConcatenateLayer _concat_recurrent_weights;
+ NEConcatenateLayer _concat_weights;
+ NEConcatenateLayer _concat_inputs;
+ NEConcatenateLayer _concat_bias;
+ NEActivationLayer _sigmoid_forget_gate;
+ NEActivationLayer _sigmoid_input_gate;
+ NEActivationLayer _sigmoid_output_gate;
+ NEActivationLayer _tanh_modulation_gate;
+ NEActivationLayer _tanh_output_state;
+ NEArithmeticAddition _add1;
+ NEArithmeticAddition _add2;
+ NEPixelWiseMultiplication _mul1;
+ NEPixelWiseMultiplication _mul2;
+ NEPixelWiseMultiplication _mul3;
+ NESlice _slice_input_tensor;
+ NESlice _slice_forget_tensor;
+ NESlice _slice_cell_tensor;
+ NESlice _slice_output_tensor;
+ NEDequantizationLayer _dequantize;
+ NEQuantizationLayer _quantize;
// Tensor pointers
const ITensor *_input_to_input_weights;
diff --git a/arm_compute/runtime/NEON/functions/NEMatMul.h b/arm_compute/runtime/NEON/functions/NEMatMul.h
new file mode 100644
index 0000000000..58dd7a6f6b
--- /dev/null
+++ b/arm_compute/runtime/NEON/functions/NEMatMul.h
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEMATMUL_H
+#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEMATMUL_H
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+/** Settings for MatMul Cpu implementation*/
+class CpuMatMulSettings
+{
+public:
+ // get fast math flag
+ bool fast_math() const
+ {
+ return _fast_math;
+ }
+ // get fixed format flag
+ bool fixed_format() const
+ {
+ return _fixed_format;
+ }
+ // Set fast math flag
+ CpuMatMulSettings &fast_math(bool fmath)
+ {
+ _fast_math = fmath;
+ return *this;
+ }
+ // Set fixed format flag
+ CpuMatMulSettings &fixed_format(bool fixed_format)
+ {
+ _fixed_format = fixed_format;
+ return *this;
+ }
+
+private:
+ bool _fast_math{false};
+ bool _fixed_format{false};
+};
+
+// Forward declarations
+class ITensor;
+class ITensorInfo;
+class MatMulInfo;
+class Status;
+
+/** Basic function to run the following operators:
+ *
+ * -# @ref cpu::CpuMatMul
+ */
+class NEMatMul : public IFunction
+{
+public:
+ /** Constructor */
+ NEMatMul();
+ /** Destructor */
+ ~NEMatMul();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEMatMul(const NEMatMul &) = delete;
+ /** Default move constructor */
+ NEMatMul(NEMatMul &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEMatMul &operator=(const NEMatMul &) = delete;
+ /** Default move assignment operator */
+ NEMatMul &operator=(NEMatMul &&) = default;
+ /** Initialize
+ *
+ * Valid data layouts:
+ * - Any
+ *
+ * Valid data type configurations:
+ * |lhs |rhs |dst |
+ * |:--------------|:------------------|:--------------|
+ * |F32 |F32 |F32 |
+ * |F16 |F16 |F16 |
+ * |BFLOAT16 |BFLOAT16 |BFLOAT16 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |QASYMM8 |QASYMM8 |QASYMM8 |
+ *
+ * @param[in] lhs Left-hand side tensor info. Data types supported: F16/F32/QASYMM8_SIGNED/QASYMM8.
+ * @param[in] rhs Right-hand side tensor info. Data types supported: same as @p lhs.
+ * @param[out] dst Output tensor to store the result of the batched matrix multiplication. Data types supported: same as @p lhs / @p rhs.
+ * @param[in] info Contains MatMul operation information described in @ref MatMulInfo.
+ * @param[in] settings Contains flags for function level settings i.e fast math
+ * @param[in] act_info (Optional) Contains activation function and lower and upper bound values for bounded activation functions.
+ */
+ void configure(ITensor *lhs,
+ ITensor *rhs,
+ ITensor *dst,
+ const MatMulInfo &info,
+ const CpuMatMulSettings &settings,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ /** Static function to check if given info will lead to a valid configuration of @ref NEMatMul
+ *
+ * @param[in] lhs Left-hand side tensor info. Data types supported: F16/F32/QASYMM8_SIGNED/QASYMM8.
+ * @param[in] rhs Right-hand side tensor info. Data types supported: same as @p lhs.
+ * @param[out] dst Output tensor info to store the result of the batched matrix multiplication. Data types supported: same as @p lhs / @p rhs.
+ * @param[in] info Contains MatMul operation information described in @ref MatMulInfo.
+ * @param[in] settings Contains flags for function level settings i.e fast math
+ * @param[in] act_info (Optional) Contains activation function and lower and upper bound values for bounded activation functions.
+ *
+ * @return Status
+ */
+ static Status validate(const ITensorInfo *lhs,
+ const ITensorInfo *rhs,
+ const ITensorInfo *dst,
+ const MatMulInfo &info,
+ const CpuMatMulSettings &settings,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
+
+ // Inherited methods overridden
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
+};
+} // namespace arm_compute
+#endif // ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEMATMUL_H
diff --git a/arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h b/arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h
index 41ea040457..e00fc4544f 100644
--- a/arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020-2021 Arm Limited.
+ * Copyright (c) 2020-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,6 +26,7 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
+
#include <memory>
namespace arm_compute
@@ -33,12 +34,10 @@ namespace arm_compute
class ITensor;
class ITensorInfo;
class NEFill;
-class NEMaxUnpoolingLayerKernel;
/** Function to perform MaxUnpooling. This function calls the following kernels:
*
* -# @ref NEFill
- * -# @ref NEMaxUnpoolingLayerKernel
*/
class NEMaxUnpoolingLayer : public IFunction
{
@@ -88,14 +87,18 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, const PoolingLayerInfo &pool_info);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *indices,
+ const ITensorInfo *output,
+ const PoolingLayerInfo &pool_info);
// Inherited methods overridden:
void run() override;
private:
- std::unique_ptr<NEFill> _fill_func;
- std::unique_ptr<NEMaxUnpoolingLayerKernel> _unpooling_layer_kernel;
+ std::unique_ptr<NEFill> _fill_func;
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
-}
+} // namespace arm_compute
#endif /* ARM_COMPUTE_NEMAXUNPOOLINGLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NENormalizationLayer.h b/arm_compute/runtime/NEON/functions/NENormalizationLayer.h
index fbe000445c..27e3fa674e 100644
--- a/arm_compute/runtime/NEON/functions/NENormalizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NENormalizationLayer.h
@@ -24,9 +24,8 @@
#ifndef ARM_COMPUTE_NENORMALIZATIONLAYER_H
#define ARM_COMPUTE_NENORMALIZATIONLAYER_H
-#include "arm_compute/runtime/IFunction.h"
-
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h"
@@ -88,16 +87,17 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const NormalizationLayerInfo &norm_info);
+ static Status
+ validate(const ITensorInfo *input, const ITensorInfo *output, const NormalizationLayerInfo &norm_info);
// Inherited methods overridden:
void run() override;
private:
- MemoryGroup _memory_group; /**< Function memory group */
- std::unique_ptr<NENormalizationLayerKernel> _norm_kernel; /**< Normalization layer kernel */
- NEPixelWiseMultiplication _multiply_f; /**< Pixel multiplication function */
- Tensor _input_squared; /**< The intermediate buffer which stores results of squaring input */
+ MemoryGroup _memory_group; /**< Function memory group */
+ std::unique_ptr<NENormalizationLayerKernel> _norm_kernel; /**< Normalization layer kernel */
+ NEPixelWiseMultiplication _multiply_f; /**< Pixel multiplication function */
+ Tensor _input_squared; /**< The intermediate buffer which stores results of squaring input */
};
-}
+} // namespace arm_compute
#endif /* ARM_COMPUTE_NENORMALIZATIONLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEPadLayer.h b/arm_compute/runtime/NEON/functions/NEPadLayer.h
index 4aa6725496..494b1c0641 100644
--- a/arm_compute/runtime/NEON/functions/NEPadLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEPadLayer.h
@@ -24,14 +24,14 @@
#ifndef ARM_COMPUTE_NEPADLAYER_H
#define ARM_COMPUTE_NEPADLAYER_H
+#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h"
#include "arm_compute/runtime/NEON/functions/NECopy.h"
#include "arm_compute/runtime/NEON/functions/NEStridedSlice.h"
#include "arm_compute/runtime/SubTensor.h"
-
-#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/Tensor.h"
+
#include <memory>
namespace arm_compute
@@ -82,7 +82,11 @@ public:
* @param[in] mode (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT,
* or reflect the input, either including the border values (SYMMETRIC) or not (REFLECT).
*/
- void configure(ITensor *input, ITensor *output, const PaddingList &padding, const PixelValue constant_value = PixelValue(), const PaddingMode mode = PaddingMode::CONSTANT);
+ void configure(ITensor *input,
+ ITensor *output,
+ const PaddingList &padding,
+ const PixelValue constant_value = PixelValue(),
+ const PaddingMode mode = PaddingMode::CONSTANT);
/** Static function to check if given info will lead to a valid configuration of @ref NEPadLayer.
*
* @param[in] input Source tensor info. Data types supported: All.
@@ -95,7 +99,11 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding, const PixelValue constant_value = PixelValue(), const PaddingMode mode = PaddingMode::CONSTANT);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *output,
+ const PaddingList &padding,
+ const PixelValue constant_value = PixelValue(),
+ const PaddingMode mode = PaddingMode::CONSTANT);
// Inherited methods overridden:
void run() override;
@@ -109,7 +117,10 @@ private:
* specifies the front and the end padding in the i-th dimension.
* @param[in] constant_value Constant value to be used for the padding
*/
- void configure_constant_mode(ITensor *input, ITensor *output, const PaddingList &padding, const PixelValue constant_value);
+ void configure_constant_mode(ITensor *input,
+ ITensor *output,
+ const PaddingList &padding,
+ const PixelValue constant_value);
/** Configure functions for when reflect or symmetric padding is used.
*
* @param[in] input Source tensor. Data types supported: All.
diff --git a/arm_compute/runtime/NEON/functions/NEPermute.h b/arm_compute/runtime/NEON/functions/NEPermute.h
index c863fde0ac..2cef64764d 100644
--- a/arm_compute/runtime/NEON/functions/NEPermute.h
+++ b/arm_compute/runtime/NEON/functions/NEPermute.h
@@ -24,9 +24,8 @@
#ifndef ARM_COMPUTE_NEPERMUTE_H
#define ARM_COMPUTE_NEPERMUTE_H
-#include "arm_compute/runtime/IFunction.h"
-
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
#include <memory>
diff --git a/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h b/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h
index 4684c2d4b8..3d81bf6087 100644
--- a/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h
+++ b/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2021 Arm Limited.
+ * Copyright (c) 2016-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,7 +24,9 @@
#ifndef ARM_COMPUTE_NEPIXELWISEMULTIPLICATION_H
#define ARM_COMPUTE_NEPIXELWISEMULTIPLICATION_H
+#include "arm_compute/core/Rounding.h"
#include "arm_compute/core/Types.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
#include "arm_compute/runtime/IFunction.h"
#include <memory>
@@ -93,7 +95,12 @@ public:
* @param[in] rounding_policy Rounding policy.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
*/
- void configure(const ITensor *input1, const ITensor *input2, ITensor *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy,
+ void configure(const ITensor *input1,
+ const ITensor *input2,
+ ITensor *output,
+ float scale,
+ ConvertPolicy overflow_policy,
+ RoundingPolicy rounding_policy,
const ActivationLayerInfo &act_info = ActivationLayerInfo());
/** Static function to check if given info will lead to a valid configuration of @ref NEPixelWiseMultiplication
*
@@ -120,7 +127,12 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy,
+ static Status validate(const ITensorInfo *input1,
+ const ITensorInfo *input2,
+ const ITensorInfo *output,
+ float scale,
+ ConvertPolicy overflow_policy,
+ RoundingPolicy rounding_policy,
const ActivationLayerInfo &act_info = ActivationLayerInfo());
// Inherited methods overridden:
@@ -156,7 +168,10 @@ public:
* @param[out] output The output tensor. Data types supported: same as @p input1. Number of channels: same as @p input1.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
*/
- void configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ void configure(ITensor *input1,
+ ITensor *input2,
+ ITensor *output,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
/** Static function to check if given info will lead to a valid configuration of @ref NEComplexPixelWiseMultiplication
*
* @param[in] input1 An input tensor info. Data types supported: F32. Number of channels supported: 2 (complex tensor).
@@ -164,7 +179,10 @@ public:
* @param[in] output The output tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
*/
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ static Status validate(const ITensorInfo *input1,
+ const ITensorInfo *input2,
+ const ITensorInfo *output,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
// Inherited methods overridden:
void run() override;
diff --git a/arm_compute/runtime/NEON/functions/NEPooling3dLayer.h b/arm_compute/runtime/NEON/functions/NEPooling3dLayer.h
new file mode 100644
index 0000000000..09251f2a5f
--- /dev/null
+++ b/arm_compute/runtime/NEON/functions/NEPooling3dLayer.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEPOOLING3DLAYER_H
+#define ARM_COMPUTE_NEPOOLING3DLAYER_H
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+// Forward declarations
+class ITensor;
+class ITensorInfo;
+class IMemoryManager;
+/** Basic function to simulate a pooling 3d layer with the specified pooling operation. This function calls the following kernels:
+ *
+ * -# @ref cpu::CpuPool3d
+ */
+class NEPooling3dLayer : public IFunction
+{
+public:
+ /** Constructor */
+ NEPooling3dLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEPooling3dLayer(const NEPooling3dLayer &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEPooling3dLayer &operator=(const NEPooling3dLayer &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEPooling3dLayer(NEPooling3dLayer &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEPooling3dLayer &operator=(NEPooling3dLayer &&) = delete;
+ /** Default destructor */
+ ~NEPooling3dLayer();
+ /** Set the input and output tensors.
+ *
+ * Valid data layouts:
+ * - NDHWC
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |F16 |F16 |
+ * |F32 |F32 |
+ * |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ *
+ * @note Source tensor is padded with -inf for MAX pooling and 0 otherwise
+ *
+ * @param[in] input Source tensor. Data types supported: F16/F32/QASYMM8/QASYMM8_SIGNED.
+ * @param[out] output Destination tensor.
+ * @param[in] pool_info Contains pooling operation information described in @ref Pooling3dLayerInfo.
+ */
+ void configure(const ITensor *input, ITensor *output, const Pooling3dLayerInfo &pool_info);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEPooling3dLayer
+ *
+ *
+ * @param[in] input Source tensor info. Data types supported: F16/F32/QASYMM8/QASYMM8_SIGNED.
+ * @param[in] output Destination tensor info.
+ * @param[in] pool_info Contains pooling operation information described in @ref Pooling3dLayerInfo.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Pooling3dLayerInfo &pool_info);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NEPOOLING3DLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEPoolingLayer.h b/arm_compute/runtime/NEON/functions/NEPoolingLayer.h
index 9398e1fce9..768ad0d818 100644
--- a/arm_compute/runtime/NEON/functions/NEPoolingLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEPoolingLayer.h
@@ -71,6 +71,8 @@ public:
* |F32 |F32 |
*
* @note F16 is supported for pool sizes 2 and 3 only
+ * @note Source tensor is padded with -inf for MAX pooling and 0 otherwise
+ * Cases where pooling region is completely outside input tensor are only supported for floating point data type
*
* @param[in, out] input Source tensor. (Written to only when padding != 0) Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
* @param[out] output Destination tensor. Data types supported: Same as @p input.
@@ -89,7 +91,10 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices = nullptr);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *output,
+ const PoolingLayerInfo &pool_info,
+ const ITensorInfo *indices = nullptr);
// Inherited methods overridden:
void run() override;
@@ -98,5 +103,5 @@ private:
struct Impl;
std::unique_ptr<Impl> _impl;
};
-}
+} // namespace arm_compute
#endif /* ARM_COMPUTE_NEPOOLINGLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h b/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h
index 38e0c9f3ad..858e3299af 100644
--- a/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h
@@ -62,7 +62,10 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const PriorBoxLayerInfo &info);
+ static Status validate(const ITensorInfo *input1,
+ const ITensorInfo *input2,
+ const ITensorInfo *output,
+ const PriorBoxLayerInfo &info);
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEPRIORBOXLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h b/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h
index 7c2e9bc5a1..009a4e0911 100644
--- a/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020-2021 Arm Limited.
+ * Copyright (c) 2020-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,15 +25,17 @@
#define ARM_COMPUTE_NEQLSTMLAYER_H
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/common/LSTMParams.h"
#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
#include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h"
#include "arm_compute/runtime/NEON/functions/NECopy.h"
+#include "arm_compute/runtime/NEON/functions/NEDequantizationLayer.h"
#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h"
#include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h"
#include "arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h"
+#include "arm_compute/runtime/NEON/functions/NEQuantizationLayer.h"
#include "arm_compute/runtime/NEON/functions/NETranspose.h"
-#include "arm_compute/runtime/common/LSTMParams.h"
#include <memory>
@@ -43,8 +45,13 @@ namespace arm_compute
class ITensor;
class ITensorInfo;
class NEQLSTMLayerNormalizationKernel;
-class NEGEMMLowpMatrixAReductionKernel;
-
+namespace cpu
+{
+namespace kernels
+{
+class CpuGemmLowpMatrixAReductionKernel;
+} // namespace kernels
+} // namespace cpu
/** Basic function to run @ref NEQLSTMLayer
*
* This function calls the following kernels:
@@ -54,8 +61,8 @@ class NEGEMMLowpMatrixAReductionKernel;
* -# @ref NEArithmeticSubtraction Elementwise subtraction
* -# @ref NECopy Copy kernel for copying output_state_out to output
* -# @ref NEGEMMLowpMatrixMultiplyCore Quantized matrix multiplication core. Accumulators are 32-bit integers
- * -# @ref NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint Convert 32-bit integers into QSYMM16
- * -# @ref NEGEMMLowpMatrixAReductionKernel For precomputing effective biases to use
+ * -# @ref NEGEMMLowpOutputStage Convert 32-bit integers into QSYMM16
+ * -# @ref cpu::kernels::CpuGemmLowpMatrixAReductionKernel For precomputing effective biases to use
* -# @ref NEPixelWiseMultiplication Elementwise multiplication
* -# @ref NETranspose Transpose function for reshaping the weights
* */
@@ -123,12 +130,21 @@ public:
* projection_threshold (Optional) The clipping threshold for the output from the projection layer, such that values are bound within
* [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.
*/
- void configure(const ITensor *input,
- const ITensor *input_to_forget_weights, const ITensor *input_to_cell_weights, const ITensor *input_to_output_weights,
- const ITensor *recurrent_to_forget_weights, const ITensor *recurrent_to_cell_weights, const ITensor *recurrent_to_output_weights,
- const ITensor *forget_gate_bias, const ITensor *cell_bias, const ITensor *output_gate_bias,
- const ITensor *cell_state_in, ITensor *output_state_in,
- ITensor *cell_state_out, ITensor *output_state_out, ITensor *output,
+ void configure(const ITensor *input,
+ const ITensor *input_to_forget_weights,
+ const ITensor *input_to_cell_weights,
+ const ITensor *input_to_output_weights,
+ const ITensor *recurrent_to_forget_weights,
+ const ITensor *recurrent_to_cell_weights,
+ const ITensor *recurrent_to_output_weights,
+ const ITensor *forget_gate_bias,
+ const ITensor *cell_bias,
+ const ITensor *output_gate_bias,
+ const ITensor *cell_state_in,
+ ITensor *output_state_in,
+ ITensor *cell_state_out,
+ ITensor *output_state_out,
+ ITensor *output,
const LSTMParams<ITensor> &lstm_params);
/** Static function to check if given info will lead to a valid configuration of @ref NEQLSTMLayer
@@ -173,12 +189,21 @@ public:
* [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.
* @return a status
*/
- static Status validate(const ITensorInfo *input,
- const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights,
- const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights,
- const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias,
- const ITensorInfo *cell_state_in, const ITensorInfo *output_state_in,
- const ITensorInfo *cell_state_out, const ITensorInfo *output_state_out, const ITensorInfo *output,
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *input_to_forget_weights,
+ const ITensorInfo *input_to_cell_weights,
+ const ITensorInfo *input_to_output_weights,
+ const ITensorInfo *recurrent_to_forget_weights,
+ const ITensorInfo *recurrent_to_cell_weights,
+ const ITensorInfo *recurrent_to_output_weights,
+ const ITensorInfo *forget_gate_bias,
+ const ITensorInfo *cell_bias,
+ const ITensorInfo *output_gate_bias,
+ const ITensorInfo *cell_state_in,
+ const ITensorInfo *output_state_in,
+ const ITensorInfo *cell_state_out,
+ const ITensorInfo *output_state_out,
+ const ITensorInfo *output,
const LSTMParams<ITensorInfo> &lstm_params);
// Inherited methods overridden:
@@ -211,10 +236,17 @@ private:
* @param[in] mm_res_info Tensor info to be used to initialize output stage result tensor.
*
*/
- void configure_mm(NEGEMMLowpMatrixMultiplyCore &mm, NEGEMMLowpOutputStage &outstage, GEMMLowpOutputStageInfo &gemmlowp_info,
- const ITensor *mm_input, const ITensor *mm_weights, const ITensor *bias, Tensor *mm_res,
- Tensor *outstage_res, float gemmlowp_scale,
- const TensorInfo &mm_res_info, const TensorInfo &outstage_tensor_info);
+ void configure_mm(NEGEMMLowpMatrixMultiplyCore &mm,
+ NEGEMMLowpOutputStage &outstage,
+ GEMMLowpOutputStageInfo &gemmlowp_info,
+ const ITensor *mm_input,
+ const ITensor *mm_weights,
+ const ITensor *bias,
+ Tensor *mm_res,
+ Tensor *outstage_res,
+ float gemmlowp_scale,
+ const TensorInfo &mm_res_info,
+ const TensorInfo &outstage_tensor_info);
MemoryGroup _memory_group;
@@ -223,8 +255,8 @@ private:
{
static constexpr uint32_t max_dimension_supported = 2;
- ITensor *_src{ nullptr };
- ITensor *_dst{ nullptr };
+ ITensor *_src{nullptr};
+ ITensor *_dst{nullptr};
size_t _row_size{};
Window _window{};
@@ -250,70 +282,73 @@ private:
};
// Functions used
- NETranspose _transpose_input_to_forget_weights;
- NETranspose _transpose_input_to_cell_weights;
- NETranspose _transpose_input_to_output_weights;
- NETranspose _transpose_input_to_input_weights;
- NETranspose _transpose_recurrent_to_forget_weights;
- NETranspose _transpose_recurrent_to_cell_weights;
- NETranspose _transpose_recurrent_to_output_weights;
- NETranspose _transpose_recurrent_to_input_weights;
- NETranspose _transpose_projection_weights;
- std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _input_to_input_reduction;
- std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _recurrent_to_input_reduction;
- std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _input_to_forget_reduction;
- std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _recurrent_to_forget_reduction;
- std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _input_to_cell_reduction;
- std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _recurrent_to_cell_reduction;
- std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _input_to_output_reduction;
- std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _recurrent_to_output_reduction;
- std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _projection_reduction;
- NEArithmeticAddition _projection_bias_add;
- NEGEMMLowpMatrixMultiplyCore _mm_input_to_forget;
- NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_forget;
- NEPixelWiseMultiplication _pixelwise_mul_cell_to_forget;
- NEGEMMLowpOutputStage _input_to_forget_outstage;
- NEGEMMLowpOutputStage _recurrent_to_forget_outstage;
- NEGEMMLowpOutputStage _cell_to_forget_outstage;
- NEArithmeticAddition _accumulate_input_recurrent_forget;
- NEArithmeticAddition _accumulate_cell_forget;
- NEActivationLayer _forget_gate_sigmoid;
- NEGEMMLowpMatrixMultiplyCore _mm_input_to_cell;
- NEGEMMLowpOutputStage _input_to_cell_outstage;
- NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_cell;
- NEGEMMLowpOutputStage _recurrent_to_cell_outstage;
- NEArithmeticAddition _accumulate_input_recurrent_modulation;
- NEActivationLayer _cell_gate_tanh;
- NEArithmeticSubtraction _input_gate_sub;
- NEGEMMLowpMatrixMultiplyCore _mm_input_to_input;
- NEGEMMLowpOutputStage _input_to_input_outstage;
- NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_input;
- NEGEMMLowpOutputStage _recurrent_to_input_outstage;
- NEArithmeticAddition _accumulate_input_recurrent_input;
- NEPixelWiseMultiplication _pixelwise_mul_cell_to_input;
- NEGEMMLowpOutputStage _cell_to_input_outstage;
- NEArithmeticAddition _accumulate_cell_input;
- NEActivationLayer _input_gate_sigmoid;
- NEPixelWiseMultiplication _pixelwise_mul_forget_cell;
- NEPixelWiseMultiplication _pixelwise_mul_input_cell;
- NEArithmeticAddition _add_forget_cell;
- NEActivationLayer _cell_clip;
- NEGEMMLowpMatrixMultiplyCore _mm_input_to_output;
- NEGEMMLowpOutputStage _input_to_output_outstage;
- NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_output;
- NEGEMMLowpOutputStage _recurrent_to_output_outstage;
- NEArithmeticAddition _accumulate_input_recurrent_output;
- NEPixelWiseMultiplication _pixelwise_mul_cell_to_output;
- NEGEMMLowpOutputStage _cell_to_output_outstage;
- NEArithmeticAddition _accumulate_cell_to_output;
- NEActivationLayer _output_gate_sigmoid;
- NEActivationLayer _hidden_tanh;
- NEPixelWiseMultiplication _pixelwise_mul_hidden;
- NEGEMMLowpOutputStage _hidden_outstage;
- NEGEMMLowpMatrixMultiplyCore _mm_projection;
- NEGEMMLowpOutputStage _projection_outstage;
- NEArithmeticAddition _accumulate_projection;
- NEActivationLayer _projection_clip;
+
+ NEDequantizationLayer _dequantize_input_to_forget_weights;
+ NEQuantizationLayer _quantize_input_to_forget_weights;
+ NETranspose _transpose_input_to_forget_weights;
+ NETranspose _transpose_input_to_cell_weights;
+ NETranspose _transpose_input_to_output_weights;
+ NETranspose _transpose_input_to_input_weights;
+ NETranspose _transpose_recurrent_to_forget_weights;
+ NETranspose _transpose_recurrent_to_cell_weights;
+ NETranspose _transpose_recurrent_to_output_weights;
+ NETranspose _transpose_recurrent_to_input_weights;
+ NETranspose _transpose_projection_weights;
+ std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _input_to_input_reduction;
+ std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _recurrent_to_input_reduction;
+ std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _input_to_forget_reduction;
+ std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _recurrent_to_forget_reduction;
+ std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _input_to_cell_reduction;
+ std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _recurrent_to_cell_reduction;
+ std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _input_to_output_reduction;
+ std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _recurrent_to_output_reduction;
+ std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _projection_reduction;
+ NEArithmeticAddition _projection_bias_add;
+ NEGEMMLowpMatrixMultiplyCore _mm_input_to_forget;
+ NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_forget;
+ NEPixelWiseMultiplication _pixelwise_mul_cell_to_forget;
+ NEGEMMLowpOutputStage _input_to_forget_outstage;
+ NEGEMMLowpOutputStage _recurrent_to_forget_outstage;
+ NEGEMMLowpOutputStage _cell_to_forget_outstage;
+ NEArithmeticAddition _accumulate_input_recurrent_forget;
+ NEArithmeticAddition _accumulate_cell_forget;
+ NEActivationLayer _forget_gate_sigmoid;
+ NEGEMMLowpMatrixMultiplyCore _mm_input_to_cell;
+ NEGEMMLowpOutputStage _input_to_cell_outstage;
+ NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_cell;
+ NEGEMMLowpOutputStage _recurrent_to_cell_outstage;
+ NEArithmeticAddition _accumulate_input_recurrent_modulation;
+ NEActivationLayer _cell_gate_tanh;
+ NEArithmeticSubtraction _input_gate_sub;
+ NEGEMMLowpMatrixMultiplyCore _mm_input_to_input;
+ NEGEMMLowpOutputStage _input_to_input_outstage;
+ NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_input;
+ NEGEMMLowpOutputStage _recurrent_to_input_outstage;
+ NEArithmeticAddition _accumulate_input_recurrent_input;
+ NEPixelWiseMultiplication _pixelwise_mul_cell_to_input;
+ NEGEMMLowpOutputStage _cell_to_input_outstage;
+ NEArithmeticAddition _accumulate_cell_input;
+ NEActivationLayer _input_gate_sigmoid;
+ NEPixelWiseMultiplication _pixelwise_mul_forget_cell;
+ NEPixelWiseMultiplication _pixelwise_mul_input_cell;
+ NEArithmeticAddition _add_forget_cell;
+ NEActivationLayer _cell_clip;
+ NEGEMMLowpMatrixMultiplyCore _mm_input_to_output;
+ NEGEMMLowpOutputStage _input_to_output_outstage;
+ NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_output;
+ NEGEMMLowpOutputStage _recurrent_to_output_outstage;
+ NEArithmeticAddition _accumulate_input_recurrent_output;
+ NEPixelWiseMultiplication _pixelwise_mul_cell_to_output;
+ NEGEMMLowpOutputStage _cell_to_output_outstage;
+ NEArithmeticAddition _accumulate_cell_to_output;
+ NEActivationLayer _output_gate_sigmoid;
+ NEActivationLayer _hidden_tanh;
+ NEPixelWiseMultiplication _pixelwise_mul_hidden;
+ NEGEMMLowpOutputStage _hidden_outstage;
+ NEGEMMLowpMatrixMultiplyCore _mm_projection;
+ NEGEMMLowpOutputStage _projection_outstage;
+ NEArithmeticAddition _accumulate_projection;
+ NEActivationLayer _projection_clip;
TensorCopyKernel _projection_bias_copy;
TensorCopyKernel _projection_output_to_accumulate_copy;
@@ -325,19 +360,16 @@ private:
NECopy _copy_output;
// Tensor pointers
- const ITensor *_input_to_input_weights
- {
- nullptr
- };
- const ITensor *_recurrent_to_input_weights{ nullptr };
- const ITensor *_projection_bias{ nullptr };
- const ITensor *_input_to_forget_weights{ nullptr };
- const ITensor *_input_to_cell_weights{ nullptr };
- const ITensor *_input_to_output_weights{ nullptr };
- const ITensor *_recurrent_to_forget_weights{ nullptr };
- const ITensor *_recurrent_to_cell_weights{ nullptr };
- const ITensor *_recurrent_to_output_weights{ nullptr };
- const ITensor *_projection_weights{ nullptr };
+ const ITensor *_input_to_input_weights{nullptr};
+ const ITensor *_recurrent_to_input_weights{nullptr};
+ const ITensor *_projection_bias{nullptr};
+ const ITensor *_input_to_forget_weights{nullptr};
+ const ITensor *_input_to_cell_weights{nullptr};
+ const ITensor *_input_to_output_weights{nullptr};
+ const ITensor *_recurrent_to_forget_weights{nullptr};
+ const ITensor *_recurrent_to_cell_weights{nullptr};
+ const ITensor *_recurrent_to_output_weights{nullptr};
+ const ITensor *_projection_weights{nullptr};
std::array<const ITensor *, _layer_norm_count> _layer_norm_weights{};
std::array<const ITensor *, _layer_norm_count> _layer_norm_bias{};
@@ -372,63 +404,66 @@ private:
return _layer_norms[getGateIndex(g)];
}
- void configure_layer_norm(LayerNormGate g, const ITensor *in);
+ void configure_layer_norm(LayerNormGate g, const ITensor *in);
static Status validate_layer_norm(const ITensorInfo &in, const ITensorInfo &weight, const ITensorInfo &bias);
// Temporary tensors
- Tensor _input_to_forget_weights_transposed{ nullptr };
- Tensor _input_to_cell_weights_transposed{ nullptr };
- Tensor _input_to_output_weights_transposed{ nullptr };
- Tensor _input_to_input_weights_transposed{ nullptr };
- Tensor _recurrent_to_forget_weights_transposed{ nullptr };
- Tensor _recurrent_to_cell_weights_transposed{ nullptr };
- Tensor _recurrent_to_output_weights_transposed{ nullptr };
- Tensor _recurrent_to_input_weights_transposed{ nullptr };
- Tensor _projection_weights_transposed{ nullptr };
- Tensor _input_to_input_eff_bias{ nullptr };
- Tensor _recurrent_to_input_eff_bias{ nullptr };
- Tensor _input_to_forget_eff_bias{ nullptr };
- Tensor _recurrent_to_forget_eff_bias{ nullptr };
- Tensor _input_to_cell_eff_bias{ nullptr };
- Tensor _recurrent_to_cell_eff_bias{ nullptr };
- Tensor _input_to_output_eff_bias{ nullptr };
- Tensor _recurrent_to_output_eff_bias{ nullptr };
- Tensor _projection_reduction_res{ nullptr };
- Tensor _projection_eff_bias{ nullptr };
- Tensor _mm_input_to_forget_res{ nullptr };
- Tensor _mm_recurrent_to_forget_res{ nullptr };
- Tensor _mul_cell_to_forget_res{ nullptr };
- Tensor _input_to_forget_outstage_res{ nullptr };
- Tensor _cell_to_forget_outstage_res{ nullptr };
- Tensor _recurrent_to_forget_outstage_res{ nullptr };
- Tensor _forget_gate{ nullptr };
- Tensor _mm_input_to_cell_res{ nullptr };
- Tensor _input_to_cell_outstage_res{ nullptr };
- Tensor _mm_recurrent_to_cell_res{ nullptr };
- Tensor _recurrent_to_cell_outstage_res{ nullptr };
- Tensor _cell_gate{ nullptr };
- Tensor _mul_input_cell_res{ nullptr };
- Tensor _mm_input_to_input_res{ nullptr };
- Tensor _input_to_input_outstage_res{ nullptr };
- Tensor _mm_recurrent_to_input_res{ nullptr };
- Tensor _mul_cell_to_input_res{ nullptr };
- Tensor _cell_to_input_outstage_res{ nullptr };
- Tensor _recurrent_to_input_outstage_res{ nullptr };
- Tensor _input_gate{ nullptr };
- Tensor _mm_input_to_output_res{ nullptr };
- Tensor _input_to_output_outstage_res{ nullptr };
- Tensor _mm_recurrent_to_output_res{ nullptr };
- Tensor _mul_cell_to_output_res{ nullptr };
- Tensor _cell_to_output_outstage_res{ nullptr };
- Tensor _recurrent_to_output_outstage_res{ nullptr };
- Tensor _output_gate{ nullptr };
- Tensor _hidden_mul_res{ nullptr };
- Tensor _hidden_gate{ nullptr };
- Tensor _mm_projection_res{ nullptr };
- Tensor _projection_outstage_res{ nullptr };
- Tensor _projection_out_res{ nullptr };
- Tensor _projection_accumulate_res{ nullptr };
- Tensor _ones{ nullptr };
+ Tensor _input_to_forget_weights_f32{nullptr};
+ Tensor _input_to_forget_weights_symm8{nullptr};
+
+ Tensor _input_to_forget_weights_transposed{nullptr};
+ Tensor _input_to_cell_weights_transposed{nullptr};
+ Tensor _input_to_output_weights_transposed{nullptr};
+ Tensor _input_to_input_weights_transposed{nullptr};
+ Tensor _recurrent_to_forget_weights_transposed{nullptr};
+ Tensor _recurrent_to_cell_weights_transposed{nullptr};
+ Tensor _recurrent_to_output_weights_transposed{nullptr};
+ Tensor _recurrent_to_input_weights_transposed{nullptr};
+ Tensor _projection_weights_transposed{nullptr};
+ Tensor _input_to_input_eff_bias{nullptr};
+ Tensor _recurrent_to_input_eff_bias{nullptr};
+ Tensor _input_to_forget_eff_bias{nullptr};
+ Tensor _recurrent_to_forget_eff_bias{nullptr};
+ Tensor _input_to_cell_eff_bias{nullptr};
+ Tensor _recurrent_to_cell_eff_bias{nullptr};
+ Tensor _input_to_output_eff_bias{nullptr};
+ Tensor _recurrent_to_output_eff_bias{nullptr};
+ Tensor _projection_reduction_res{nullptr};
+ Tensor _projection_eff_bias{nullptr};
+ Tensor _mm_input_to_forget_res{nullptr};
+ Tensor _mm_recurrent_to_forget_res{nullptr};
+ Tensor _mul_cell_to_forget_res{nullptr};
+ Tensor _input_to_forget_outstage_res{nullptr};
+ Tensor _cell_to_forget_outstage_res{nullptr};
+ Tensor _recurrent_to_forget_outstage_res{nullptr};
+ Tensor _forget_gate{nullptr};
+ Tensor _mm_input_to_cell_res{nullptr};
+ Tensor _input_to_cell_outstage_res{nullptr};
+ Tensor _mm_recurrent_to_cell_res{nullptr};
+ Tensor _recurrent_to_cell_outstage_res{nullptr};
+ Tensor _cell_gate{nullptr};
+ Tensor _mul_input_cell_res{nullptr};
+ Tensor _mm_input_to_input_res{nullptr};
+ Tensor _input_to_input_outstage_res{nullptr};
+ Tensor _mm_recurrent_to_input_res{nullptr};
+ Tensor _mul_cell_to_input_res{nullptr};
+ Tensor _cell_to_input_outstage_res{nullptr};
+ Tensor _recurrent_to_input_outstage_res{nullptr};
+ Tensor _input_gate{nullptr};
+ Tensor _mm_input_to_output_res{nullptr};
+ Tensor _input_to_output_outstage_res{nullptr};
+ Tensor _mm_recurrent_to_output_res{nullptr};
+ Tensor _mul_cell_to_output_res{nullptr};
+ Tensor _cell_to_output_outstage_res{nullptr};
+ Tensor _recurrent_to_output_outstage_res{nullptr};
+ Tensor _output_gate{nullptr};
+ Tensor _hidden_mul_res{nullptr};
+ Tensor _hidden_gate{nullptr};
+ Tensor _mm_projection_res{nullptr};
+ Tensor _projection_outstage_res{nullptr};
+ Tensor _projection_out_res{nullptr};
+ Tensor _projection_accumulate_res{nullptr};
+ Tensor _ones{nullptr};
std::array<Tensor, _layer_norm_count> _layer_norm_output{};
inline Tensor &get_layer_norm_output(LayerNormGate g)
@@ -436,14 +471,15 @@ private:
return _layer_norm_output[getGateIndex(g)];
}
- bool _is_prepared{ false };
- bool _has_cifg{ false };
- bool _has_cell_clipping{ false };
- bool _has_projection{ false };
- bool _has_projection_clipping{ false };
- bool _has_peephole{ false };
- bool _has_layer_norm{ false };
- bool _projection_tensor_copy_required{ false };
+ bool _is_prepared{false};
+ bool _has_cifg{false};
+ bool _has_cell_clipping{false};
+ bool _has_projection{false};
+ bool _has_projection_clipping{false};
+ bool _has_peephole{false};
+ bool _has_layer_norm{false};
+ bool _projection_tensor_copy_required{false};
+ bool _convert_input_to_forget_weights_to_qsymm8{false};
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEQLSTMLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NERNNLayer.h b/arm_compute/runtime/NEON/functions/NERNNLayer.h
index 667d3144ac..af7f464ac9 100644
--- a/arm_compute/runtime/NEON/functions/NERNNLayer.h
+++ b/arm_compute/runtime/NEON/functions/NERNNLayer.h
@@ -72,7 +72,13 @@ public:
* @param[in,out] hidden_state Output tensor of shape [num_units, batch_size]. Data types supported: Same as @p input
* @param[in] info Activation layer parameter.
*/
- void configure(const ITensor *input, const ITensor *weights, const ITensor *recurrent_weights, const ITensor *bias, ITensor *hidden_state, ITensor *output, ActivationLayerInfo &info);
+ void configure(const ITensor *input,
+ const ITensor *weights,
+ const ITensor *recurrent_weights,
+ const ITensor *bias,
+ ITensor *hidden_state,
+ ITensor *output,
+ ActivationLayerInfo &info);
/** Initialize the function
*
* @param[in] input Input is a 2-D tensor of shape [input_size, batch_size]. Data types supported: F16/F32
@@ -85,7 +91,12 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *recurrent_weights, const ITensorInfo *bias, const ITensorInfo *hidden_state, const ITensorInfo *output,
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *recurrent_weights,
+ const ITensorInfo *bias,
+ const ITensorInfo *hidden_state,
+ const ITensorInfo *output,
const ActivationLayerInfo &info);
// Inherited methods overridden:
diff --git a/arm_compute/runtime/NEON/functions/NEROIAlignLayer.h b/arm_compute/runtime/NEON/functions/NEROIAlignLayer.h
index ea1af4daea..b06ebe899d 100644
--- a/arm_compute/runtime/NEON/functions/NEROIAlignLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEROIAlignLayer.h
@@ -77,7 +77,10 @@ public:
*
* @return a Status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *rois, ITensorInfo *output, const ROIPoolingLayerInfo &pool_info);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *rois,
+ ITensorInfo *output,
+ const ROIPoolingLayerInfo &pool_info);
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEROIALIGNLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h b/arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h
index 2992b3eb95..929111ad4b 100644
--- a/arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h
@@ -26,6 +26,7 @@
#include "arm_compute/core/IArray.h"
#include "arm_compute/runtime/IFunction.h"
+
#include <memory>
namespace arm_compute
@@ -73,7 +74,8 @@ public:
* @note The z dimensions of @p output tensor and @p input tensor must be the same.
* @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
*/
- void configure(const ITensor *input, const ITensor *rois, const ITensor *output, const ROIPoolingLayerInfo &pool_info);
+ void
+ configure(const ITensor *input, const ITensor *rois, const ITensor *output, const ROIPoolingLayerInfo &pool_info);
// Inherited methods overridden:
void run() override;
@@ -91,7 +93,10 @@ public:
* @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
* @return a Status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *rois, const ITensorInfo *output, const ROIPoolingLayerInfo &pool_info);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *rois,
+ const ITensorInfo *output,
+ const ROIPoolingLayerInfo &pool_info);
private:
std::unique_ptr<NEROIPoolingLayerKernel> _roi_kernel;
diff --git a/arm_compute/runtime/NEON/functions/NERange.h b/arm_compute/runtime/NEON/functions/NERange.h
index cb14c8fdde..609456a4ef 100644
--- a/arm_compute/runtime/NEON/functions/NERange.h
+++ b/arm_compute/runtime/NEON/functions/NERange.h
@@ -26,6 +26,7 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
+
#include <memory>
namespace arm_compute
diff --git a/arm_compute/runtime/NEON/functions/NEReduceMean.h b/arm_compute/runtime/NEON/functions/NEReduceMean.h
index 7512115a3f..5b8d8cdf2b 100644
--- a/arm_compute/runtime/NEON/functions/NEReduceMean.h
+++ b/arm_compute/runtime/NEON/functions/NEReduceMean.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,12 +24,9 @@
#ifndef ARM_COMPUTE_NEON_REDUCE_MEAN_H
#define ARM_COMPUTE_NEON_REDUCE_MEAN_H
-#include "arm_compute/runtime/IFunction.h"
-
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/functions/NEDequantizationLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEQuantizationLayer.h"
#include "arm_compute/runtime/NEON/functions/NEReductionOperation.h"
#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h"
#include "arm_compute/runtime/Tensor.h"
@@ -83,7 +80,8 @@ public:
*
* @return A status
*/
- static Status validate(const ITensorInfo *input, const Coordinates &reduction_axis, bool keep_dims, const ITensorInfo *output);
+ static Status
+ validate(const ITensorInfo *input, const Coordinates &reduction_axis, bool keep_dims, const ITensorInfo *output);
// Inherited methods overridden:
void run() override;
@@ -93,13 +91,8 @@ private:
std::vector<NEReductionOperation> _reduction_kernels;
std::vector<Tensor> _reduced_outs;
NEReshapeLayer _reshape;
- NEDequantizationLayer _dequant;
- NEQuantizationLayer _requant;
int _reduction_ops;
bool _keep_dims;
- bool _do_requant;
- Tensor _input_no_quant;
- Tensor _output_no_quant;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEON_REDUCE_MEAN_H */
diff --git a/arm_compute/runtime/NEON/functions/NEReductionOperation.h b/arm_compute/runtime/NEON/functions/NEReductionOperation.h
index 533c10adcf..f5391a6d0e 100644
--- a/arm_compute/runtime/NEON/functions/NEReductionOperation.h
+++ b/arm_compute/runtime/NEON/functions/NEReductionOperation.h
@@ -25,9 +25,9 @@
#define ARM_COMPUTE_NEREDUCTIONOPERATION_H
#include "arm_compute/runtime/IFunction.h"
-
#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h"
#include "arm_compute/runtime/Tensor.h"
+
#include <memory>
namespace arm_compute
@@ -88,7 +88,11 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op, bool keep_dims = true);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *output,
+ unsigned int axis,
+ ReductionOperation op,
+ bool keep_dims = true);
// Inherited methods overridden:
void run() override;
diff --git a/arm_compute/runtime/NEON/functions/NERemap.h b/arm_compute/runtime/NEON/functions/NERemap.h
deleted file mode 100644
index 271ac9739b..0000000000
--- a/arm_compute/runtime/NEON/functions/NERemap.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2016-2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEREMAP_H
-#define ARM_COMPUTE_NEREMAP_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-#include "arm_compute/runtime/Tensor.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to execute remap. This function calls the following kernels:
- *
- * -# @ref NERemapKernel
- */
-class NERemap : public INESimpleFunctionNoBorder
-{
-public:
- /** Initialise the function's sources, destination, interpolation policy and border mode.
- *
- * Valid data layouts:
- * - All
- *
- * Valid data type configurations:
- * |src0 |src1 |src2 |dst |
- * |:------|:------|:------|:------|
- * |U8 |F32 |F32 |U 8 |
- *
- * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[in] map_x Map for X coordinates. Data type supported: F32.
- * @param[in] map_y Map for Y coordinates. Data type supported: F32.
- * @param[out] output Output tensor. Data type supported: U8.
- * @param[in] policy Interpolation policy to use. Only NEAREST and BILINEAR are supported.
- * @param[in] border_mode Border mode to use on the input tensor.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. Defaults to 0.
- *
- */
- void configure(ITensor *input, const ITensor *map_x, const ITensor *map_y, ITensor *output,
- InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0);
-};
-}
-#endif /*ARM_COMPUTE_NEREMAP_H */
diff --git a/arm_compute/runtime/NEON/functions/NEReorderLayer.h b/arm_compute/runtime/NEON/functions/NEReorderLayer.h
new file mode 100644
index 0000000000..e3fa7b9c16
--- /dev/null
+++ b/arm_compute/runtime/NEON/functions/NEReorderLayer.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#if defined(__aarch64__)
+
+#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEREORDERLAYER
+#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEREORDERLAYER
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
+
+namespace arm_compute
+{
+class ITensor;
+class ITensorInfo;
+class NEReorderKernel;
+/** Function to compute blocked reorder. */
+class NEReorderLayer : public IFunction
+{
+public:
+ /** Default constructor */
+ NEReorderLayer();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEReorderLayer(const NEReorderLayer &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEReorderLayer &operator=(const NEReorderLayer &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEReorderLayer(NEReorderLayer &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEReorderLayer &operator=(NEReorderLayer &&) = delete;
+ /** Default destructor */
+ ~NEReorderLayer();
+ /** Set the input and output tensors.
+ *
+ * Valid data layouts:
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------|:---------|
+ * |F32 |F32 |
+ *
+ * @param[in] input Source tensor. Data type supported: F32. Data layouts supported: NCHW.
+ * @param[out] output Destination with the same dimensions, data type, data layout as @p input
+ * except last dimension of data layout which needs to be multiple of blocking parameter ksize
+ * @param[in] input_wf WeightFormat of input.
+ * @param[in] output_wf WeightFormat of output.
+ */
+ void configure(const ITensor *input,
+ ITensor *output,
+ arm_compute::WeightFormat input_wf,
+ arm_compute::WeightFormat output_wf);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref NEReorderLayer
+ *
+ * Similar to @ref NEReorderLayer::configure()
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *output,
+ arm_compute::WeightFormat input_wf,
+ arm_compute::WeightFormat output_wf);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ std::unique_ptr<NEReorderKernel> _reorder_kernel; /**< Reorder layer kernel */
+};
+} // namespace arm_compute
+#endif /* ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEREORDERLAYER */
+
+#endif // defined(__aarch64__)
diff --git a/arm_compute/runtime/NEON/functions/NEReverse.h b/arm_compute/runtime/NEON/functions/NEReverse.h
index c02fff54a5..e03e415068 100644
--- a/arm_compute/runtime/NEON/functions/NEReverse.h
+++ b/arm_compute/runtime/NEON/functions/NEReverse.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,12 +21,11 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_NEREVERSE_H
-#define ARM_COMPUTE_NEREVERSE_H
-
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEREVERSE_H
+#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEREVERSE_H
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
namespace arm_compute
{
@@ -45,22 +44,33 @@ public:
* Valid data type configurations:
* |src0 |src1 |dst |
* |:--------------|:--------------|:--------------|
- * |All |U32 |All |
+ * |All |U32, S32 |All |
+ *
+ * @param[in] input Input tensor. Data types supported: All
+ * @param[out] output Output tensor. Data type supported: Same as @p input
+ * @param[in] axis Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32/S32
+ * @param[in] use_inverted_axis Reverse ACL axis indices convention, if true, (inverted)axis = (tensor_rank - 1) - axis
+ *
+ * @note The value of each axis should be between [-rank, rank)
+ * @note If there are duplicate values in the tensor, the subsequent axis values are ignored. e.g. an array of [2, 2] has the same effects as [2].
+ *
+ * @deprecated Support for U32 in axis tensor will be removed in 24.02 release
*
- * @param[in] input Input tensor. Data types supported: All
- * @param[out] output Output tensor. Data type supported: Same as @p input
- * @param[in] axis Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32
*/
- void configure(const ITensor *input, ITensor *output, const ITensor *axis);
+ void configure(const ITensor *input, ITensor *output, const ITensor *axis, const bool use_inverted_axis = false);
/** Static function to check if given info will lead to a valid configuration of @ref NEReverseKernel
*
- * @param[in] input Input tensor info. Data types supported: All
- * @param[in] output Output tensor info. Data type supported: Same as @p input
- * @param[in] axis Axis tensor info. Contains the indices of the dimensions to reverse. Data type supported: U32
+ * @param[in] input Input tensor info. Data types supported: All
+ * @param[in] output Output tensor info. Data type supported: Same as @p input
+ * @param[in] axis Axis tensor info. Contains the indices of the dimensions to reverse. Data type supported: U32/S32
+ * @param[in] use_inverted_axis Reverse ACL axis indices convention, if true, (inverted)axis = (tensor_rank - 1) - axis
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *axis);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *output,
+ const ITensorInfo *axis,
+ const bool use_inverted_axis = false);
};
} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEREVERSE_H */
+#endif // ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEREVERSE_H
diff --git a/arm_compute/runtime/NEON/functions/NEScale.h b/arm_compute/runtime/NEON/functions/NEScale.h
index 0b7dddacb2..72dfa3bda4 100644
--- a/arm_compute/runtime/NEON/functions/NEScale.h
+++ b/arm_compute/runtime/NEON/functions/NEScale.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2021 Arm Limited.
+ * Copyright (c) 2016-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,10 +24,9 @@
#ifndef ARM_COMPUTE_NESCALEIMAGE_H
#define ARM_COMPUTE_NESCALEIMAGE_H
-#include "arm_compute/runtime/IFunction.h"
-
#include "arm_compute/core/KernelDescriptors.h"
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
#include <memory>
@@ -66,16 +65,19 @@ public:
* |F16 |F16 |
* |F32 |F32 |
* |U8 |U8 |
+ * |S8 |S8 |
* |S16 |S16 |
*
- * @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/U8/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED)
+ * @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/U8/S8/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED)
* @param[out] output Destination tensor. Data type supported: Same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
* @param[in] info @ref ScaleKernelInfo to be used for configuration
+ *
+ * @note Using S8 data type only supports NHWC, @p border_mode Replicate, and @p policy Bilinear
*/
void configure(ITensor *input, ITensor *output, const ScaleKernelInfo &info);
/** Static function to check if given info will lead to a valid configuration of @ref NEScale
*
- * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/U8/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED)
+ * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/U8/S8/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED)
* @param[in] output Destination tensor. Data type supported: Same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
* @param[in] info @ref ScaleKernelInfo to be used for validation
*
diff --git a/arm_compute/runtime/NEON/functions/NESlice.h b/arm_compute/runtime/NEON/functions/NESlice.h
index ac79a5c633..70a688d3b0 100644
--- a/arm_compute/runtime/NEON/functions/NESlice.h
+++ b/arm_compute/runtime/NEON/functions/NESlice.h
@@ -85,7 +85,8 @@ public:
*
* @return A status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Coordinates &starts, const Coordinates &ends);
+ static Status
+ validate(const ITensorInfo *input, const ITensorInfo *output, const Coordinates &starts, const Coordinates &ends);
// Inherited methods overridden:
void run() override;
@@ -129,7 +130,8 @@ public:
*
* @return A status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Coordinates &starts, const Coordinates &ends);
+ static Status
+ validate(const ITensorInfo *input, const ITensorInfo *output, const Coordinates &starts, const Coordinates &ends);
};
} // namespace experimental
} // namespace arm_compute
diff --git a/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h b/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h
index ad8c1467d0..5dee61a4a8 100644
--- a/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h
+++ b/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h
@@ -24,9 +24,9 @@
#ifndef ARM_COMPUTE_NESPACETOBATCHLAYER_H
#define ARM_COMPUTE_NESPACETOBATCHLAYER_H
+#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/Types.h"
#include <memory>
namespace arm_compute
@@ -82,7 +82,12 @@ public:
* @param[in] padding_right The padding at the end of every dimension of the output tensor.
* @param[out] output Tensor output. Data types supported: same as @p input
*/
- void configure(const ITensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, ITensor *output);
+ void configure(const ITensor *input,
+ const int block_shape_x,
+ const int block_shape_y,
+ const Size2D &padding_left,
+ const Size2D &padding_right,
+ ITensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref NESpaceToBatchLayer
*
* @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All.
@@ -92,7 +97,10 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *paddings, const ITensorInfo *output);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *block_shape,
+ const ITensorInfo *paddings,
+ const ITensorInfo *output);
/** Static function to check if given info will lead to a valid configuration of @ref NESpaceToBatchLayer (Static block shape and paddings)
*
* @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All.
@@ -104,7 +112,12 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, const ITensorInfo *output);
+ static Status validate(const ITensorInfo *input,
+ const int block_shape_x,
+ const int block_shape_y,
+ const Size2D &padding_left,
+ const Size2D &padding_right,
+ const ITensorInfo *output);
// Inherited methods overridden:
void run() override;
diff --git a/arm_compute/runtime/NEON/functions/NESplit.h b/arm_compute/runtime/NEON/functions/NESplit.h
index 206f299c06..36358a7094 100644
--- a/arm_compute/runtime/NEON/functions/NESplit.h
+++ b/arm_compute/runtime/NEON/functions/NESplit.h
@@ -26,7 +26,6 @@
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/Types.h"
-
#include "arm_compute/runtime/CPP/functions/CPPSplit.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/NEON/functions/NESlice.h"
diff --git a/arm_compute/runtime/NEON/functions/NEStackLayer.h b/arm_compute/runtime/NEON/functions/NEStackLayer.h
index ae4e468f21..98dacde0c1 100644
--- a/arm_compute/runtime/NEON/functions/NEStackLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEStackLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_NESTACKLAYER_H
-#define ARM_COMPUTE_NESTACKLAYER_H
+#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NESTACKLAYER_H
+#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NESTACKLAYER_H
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
@@ -91,9 +91,8 @@ public:
void run() override;
private:
- std::vector<ITensor *> _input;
- std::vector<std::unique_ptr<NEStackLayerKernel>> _stack_kernels;
- unsigned int _num_inputs;
+ std::unique_ptr<NEStackLayerKernel> _stack_kernel;
+ bool _is_prepared;
};
} // namespace arm_compute
-#endif /* ARM_COMPUTE_NESTACKLAYER_H */
+#endif // ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NESTACKLAYER_H
diff --git a/arm_compute/runtime/NEON/functions/NEStridedSlice.h b/arm_compute/runtime/NEON/functions/NEStridedSlice.h
index 4b14d946f6..fa1113ffec 100644
--- a/arm_compute/runtime/NEON/functions/NEStridedSlice.h
+++ b/arm_compute/runtime/NEON/functions/NEStridedSlice.h
@@ -71,9 +71,14 @@ public:
* @param[in] shrink_axis_mask (Optional) If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1.
* A slice of size 1 starting from starts[i] in the dimension must be preserved.
*/
- void configure(const ITensor *input, ITensor *output,
- const Coordinates &starts, const Coordinates &ends, const BiStrides &strides,
- int32_t begin_mask = 0, int32_t end_mask = 0, int32_t shrink_axis_mask = 0);
+ void configure(const ITensor *input,
+ ITensor *output,
+ const Coordinates &starts,
+ const Coordinates &ends,
+ const BiStrides &strides,
+ int32_t begin_mask = 0,
+ int32_t end_mask = 0,
+ int32_t shrink_axis_mask = 0);
/** Static function to check if given info will lead to a valid configuration of @ref NEStridedSlice
*
@@ -89,9 +94,14 @@ public:
* @param[in] shrink_axis_mask (Optional) If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1.
* A slice of size 1 starting from starts[i] in the dimension must be preserved.
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output,
- const Coordinates &starts, const Coordinates &ends, const BiStrides &strides,
- int32_t begin_mask = 0, int32_t end_mask = 0, int32_t shrink_axis_mask = 0);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *output,
+ const Coordinates &starts,
+ const Coordinates &ends,
+ const BiStrides &strides,
+ int32_t begin_mask = 0,
+ int32_t end_mask = 0,
+ int32_t shrink_axis_mask = 0);
// Inherited methods overridden:
void run() override;
@@ -121,9 +131,14 @@ public:
* @param[in] shrink_axis_mask (Optional) If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1.
* A slice of size 1 starting from starts[i] in the dimension must be preserved.
*/
- void configure(const ITensorInfo *input, ITensorInfo *output,
- const Coordinates &starts, const Coordinates &ends, const BiStrides &strides,
- int32_t begin_mask = 0, int32_t end_mask = 0, int32_t shrink_axis_mask = 0);
+ void configure(const ITensorInfo *input,
+ ITensorInfo *output,
+ const Coordinates &starts,
+ const Coordinates &ends,
+ const BiStrides &strides,
+ int32_t begin_mask = 0,
+ int32_t end_mask = 0,
+ int32_t shrink_axis_mask = 0);
/** Static function to check if given info will lead to a valid configuration of @ref NEStridedSlice
*
@@ -139,9 +154,14 @@ public:
* @param[in] shrink_axis_mask (Optional) If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1.
* A slice of size 1 starting from starts[i] in the dimension must be preserved.
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output,
- const Coordinates &starts, const Coordinates &ends, const BiStrides &strides,
- int32_t begin_mask = 0, int32_t end_mask = 0, int32_t shrink_axis_mask = 0);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *output,
+ const Coordinates &starts,
+ const Coordinates &ends,
+ const BiStrides &strides,
+ int32_t begin_mask = 0,
+ int32_t end_mask = 0,
+ int32_t shrink_axis_mask = 0);
};
} // namespace experimental
} // namespace arm_compute
diff --git a/arm_compute/runtime/NEON/functions/NETile.h b/arm_compute/runtime/NEON/functions/NETile.h
index 915e5aa1da..001a0a4128 100644
--- a/arm_compute/runtime/NEON/functions/NETile.h
+++ b/arm_compute/runtime/NEON/functions/NETile.h
@@ -24,9 +24,8 @@
#ifndef ARM_COMPUTE_NETILE_H
#define ARM_COMPUTE_NETILE_H
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
namespace arm_compute
{
diff --git a/arm_compute/runtime/NEON/functions/NETranspose.h b/arm_compute/runtime/NEON/functions/NETranspose.h
index 581fe74309..5d2d1f1b01 100644
--- a/arm_compute/runtime/NEON/functions/NETranspose.h
+++ b/arm_compute/runtime/NEON/functions/NETranspose.h
@@ -24,9 +24,8 @@
#ifndef ARM_COMPUTE_NETRANSPOSE_H
#define ARM_COMPUTE_NETRANSPOSE_H
-#include "arm_compute/runtime/IFunction.h"
-
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
#include <memory>
@@ -83,4 +82,4 @@ private:
std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
-#endif /* ARM_COMPUTE_NETRANSPOSE_H */ \ No newline at end of file
+#endif /* ARM_COMPUTE_NETRANSPOSE_H */
diff --git a/arm_compute/runtime/NEON/functions/NEUnstack.h b/arm_compute/runtime/NEON/functions/NEUnstack.h
index 079fee5b9e..e1af96d08d 100644
--- a/arm_compute/runtime/NEON/functions/NEUnstack.h
+++ b/arm_compute/runtime/NEON/functions/NEUnstack.h
@@ -26,7 +26,6 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
-
#include "arm_compute/runtime/NEON/functions/NEStridedSlice.h"
#include <memory>
diff --git a/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h
index f9ebf608cb..6caa2aeb59 100644
--- a/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,17 +21,12 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_NEWINOGRADCONVOLUTIONLAYER_H
-#define ARM_COMPUTE_NEWINOGRADCONVOLUTIONLAYER_H
-
-#include "arm_compute/runtime/IFunction.h"
+#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEWINOGRADCONVOLUTIONLAYER_H
+#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEWINOGRADCONVOLUTIONLAYER_H
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CPP/functions/CPPPermute.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMM.h"
-
+#include "arm_compute/function_info/ActivationLayerInfo.h"
+#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/Tensor.h"
#include <memory>
@@ -40,13 +35,11 @@ namespace arm_compute
{
// Forward declarations
class ITensor;
-class ICPPKernel;
/** Basic function to simulate a convolution layer. This function calls the following kernels:
*
- * -# @ref NEWinogradLayerTransformWeightsKernel (executed only once in the first call to the run() method )
- * -# @ref NEWinogradLayerTransformInputKernel
- * -# @ref NEWinogradLayerTransformOutputKernel
+ * -# @ref cpu::CpuWinogradConv2dTransformInputKernel
+ * -# @ref cpu::CpuWinogradConv2dTransformOutputKernel
* -# @ref cpu::CpuGemmAssemblyDispatch
* -# @ref CPPPermute (three times: weights, input and output)
*
@@ -57,12 +50,16 @@ class NEWinogradConvolutionLayer : public IFunction
public:
/** Constructor */
NEWinogradConvolutionLayer(const std::shared_ptr<IMemoryManager> &memory_manager = nullptr);
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEWinogradConvolutionLayer(NEWinogradConvolutionLayer &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEWinogradConvolutionLayer &operator=(NEWinogradConvolutionLayer &&) = delete;
- /** Default destructor */
- ~NEWinogradConvolutionLayer() = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEWinogradConvolutionLayer(const NEWinogradConvolutionLayer &) = delete;
+ /** Default move constructor */
+ NEWinogradConvolutionLayer(NEWinogradConvolutionLayer &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEWinogradConvolutionLayer &operator=(const NEWinogradConvolutionLayer &) = delete;
+ /** Default move assignment operator */
+ NEWinogradConvolutionLayer &operator=(NEWinogradConvolutionLayer &&) = default;
+ /** Destructor */
+ ~NEWinogradConvolutionLayer();
/** Set the input and output tensors.
*
@@ -80,7 +77,8 @@ public:
* while every optional dimension from 4 and above represent a batch of inputs.
* Data types supported: F16/F32.
* @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: Same as @p input.
- * Currently only 3x3 and 5x5 kernels are supported.
+ * Supported kernel sizes: (height, width) -> 3x3, 1x3, 3x1, 5x5, 1x5, 5x1 for Fp32
+ * -> 3x3 for Fp16
* @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights.
* @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
* Data types supported: Same as @p input.
@@ -89,63 +87,35 @@ public:
* @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
* available which may introduce a drop of accuracy as well. Default is false
*/
- void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info = ActivationLayerInfo(),
- bool enable_fast_math = false);
+ void configure(const ITensor *input,
+ const ITensor *weights,
+ const ITensor *biases,
+ ITensor *output,
+ const PadStrideInfo &conv_info,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ bool enable_fast_math = false);
// Inherited methods overridden:
void run() override;
void prepare() override;
- /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMConvolutionLayer
+ /** Static function to check if given info will lead to a valid configuration of @ref NEWinogradConvolutionLayer
*
- * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: F16/F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
- * Currently only 3x3 and 5x5 kernels are supported.
- * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights.
- * @param[in] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
- * Data types supported: Same as @p input.
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. Currently only unit strides are supported.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
- * available which may introduce a drop of accuracy as well. Default is false
+ * Similar to @ref NEWinogradConvolutionLayer::configure()
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false);
-
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEWinogradConvolutionLayer(const NEWinogradConvolutionLayer &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEWinogradConvolutionLayer &operator=(const NEWinogradConvolutionLayer &) = delete;
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *biases,
+ const ITensorInfo *output,
+ const PadStrideInfo &conv_info,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ bool enable_fast_math = false);
private:
- MemoryGroup _memory_group;
- NEGEMM _gemm_function;
- std::unique_ptr<ICPPKernel> _transform_input_kernel;
- std::unique_ptr<ICPPKernel> _transform_output_kernel;
- std::unique_ptr<ICPPKernel> _transform_weights_kernel;
- NEActivationLayer _activationlayer_function;
-
- CPPPermute _permute_input;
- CPPPermute _permute_weights;
- CPPPermute _permute_output;
- Tensor _input_transformed;
- Tensor _output_transformed;
- Tensor _input_workspace;
- Tensor _output_workspace;
- Tensor _kernel_storage;
- Tensor _input_nhwc;
- Tensor _output_nhwc;
- Tensor _weights_hwio;
- const ITensor *_input;
- const ITensor *_weights;
- ITensor *_output;
- bool _is_prepared;
- bool _is_activationlayer_enabled;
- DataLayout _data_layout;
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEWINOGRADCONVOLUTIONLAYER_H */
+#endif // ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEWINOGRADCONVOLUTIONLAYER_H