aboutsummaryrefslogtreecommitdiff
path: root/arm_compute
diff options
context:
space:
mode:
authorMichalis Spyrou <michalis.spyrou@arm.com>2020-06-23 17:25:43 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2020-06-25 13:14:06 +0000
commit173ba9bbb19ea83f951318d9989e440768b4de8f (patch)
tree840a28e1cc4d0adf47097c8ab27092531c8e0958 /arm_compute
parent0f954eb6c8bf2f6c8600c56f21fec6aa9ebf082e (diff)
downloadComputeLibrary-173ba9bbb19ea83f951318d9989e440768b4de8f.tar.gz
COMPMID-3373: Async support to NEArithmetic* kernels/functions (Pt. 1)
Added support on NEArithmeticAddition and NEArithmeticSubtraction Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com> Change-Id: Ifa805f8455ef6eff1ee627752dc1c7fe9740ec47 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3451 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'arm_compute')
-rw-r--r--arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h17
-rw-r--r--arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h11
-rw-r--r--arm_compute/runtime/NEON/functions/NEArithmeticAddition.h72
-rw-r--r--arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h65
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMM.h4
-rw-r--r--arm_compute/runtime/NEON/functions/NELSTMLayer.h18
-rw-r--r--arm_compute/runtime/NEON/functions/NEQLSTMLayer.h38
-rw-r--r--arm_compute/runtime/NEON/functions/NERNNLayer.h22
8 files changed, 186 insertions, 61 deletions
diff --git a/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h b/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h
index bff34dfda2..f254027e0e 100644
--- a/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h
+++ b/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h
@@ -68,12 +68,12 @@ public:
* - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
* - (QSYMM16,QSYMM16) -> QSYMM16
*
- * @param[in] input1 First input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32
- * @param[in] input2 Second input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32
- * @param[out] output The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32.
+ * @param[in] input1 First input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32
+ * @param[in] input2 Second input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32
+ * @param[out] output The output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32.
* @param[in] policy Overflow policy.
*/
- void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy);
+ void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, ConvertPolicy policy);
/** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticAdditionKernel
*
* @param[in] input1 First input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32
@@ -86,7 +86,7 @@ public:
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy);
// Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
+ void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, const ThreadInfo &info) override;
private:
/** Common signature for all the specialised add functions
@@ -99,11 +99,8 @@ private:
*/
using AddFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy, const Window &window);
/** Add function to use for the particular tensor types passed to configure() */
- AddFunction *_func;
- const ITensor *_input1;
- const ITensor *_input2;
- ITensor *_output;
- ConvertPolicy _policy;
+ AddFunction *_func;
+ ConvertPolicy _policy;
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_NEARITHMETICADDITIONKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h b/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h
index f75c6bfb98..dfd08d9b06 100644
--- a/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h
+++ b/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h
@@ -71,7 +71,7 @@ public:
* @param[out] output The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32.
* @param[in] policy Overflow policy. Convert policy cannot be WRAP if datatype is quantized.
*/
- void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy);
+ void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, ConvertPolicy policy);
/** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticSubtractionKernel
*
* @note Convert policy cannot be WRAP if datatype is QASYMM8
@@ -86,7 +86,7 @@ public:
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy);
// Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
+ void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, const ThreadInfo &info) override;
private:
/** Common signature for all the specialised sub functions
@@ -99,11 +99,8 @@ private:
*/
using SubFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window, bool is_sat);
/** Sub function to use for the particular tensor types passed to configure() */
- SubFunction *_func;
- const ITensor *_input1;
- const ITensor *_input2;
- ITensor *_output;
- ConvertPolicy _policy;
+ SubFunction *_func;
+ ConvertPolicy _policy;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEARITHMETICSUBTRACTIONKERNEL_H */
diff --git a/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h
index 2bf12df4df..589e0624eb 100644
--- a/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h
+++ b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h
@@ -25,14 +25,17 @@
#define ARM_COMPUTE_NEARITHMETICADDITION_H
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/NEON/INEOperator.h"
namespace arm_compute
{
class ITensor;
+namespace experimental
+{
/** Basic function to run @ref NEArithmeticAdditionKernel */
-class NEArithmeticAddition : public INESimpleFunctionNoBorder
+class NEArithmeticAddition : public INEOperator
{
public:
/** Initialise the kernel's inputs, output and conversion policy.
@@ -51,13 +54,69 @@ public:
* - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
* - (QSYMM16,QSYMM16) -> QSYMM16
*
+ * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32
+ * @param[in] input2 Second tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32
+ * @param[out] output Output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32
+ * @param[in] policy Policy to use to handle overflow.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
+ */
+ void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticAddition
+ *
+ * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32
+ * @param[in] input2 Second tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32
+ * @param[in] output Output tensor info. Data types supported: U8/SQASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32
+ * @param[in] policy Policy to use to handle overflow
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+
+ // Inherited methods overridden:
+ MemoryRequirements workspace() const override;
+};
+} // namespace experimental
+
+/** Basic function to run @ref NEArithmeticAdditionKernel */
+class NEArithmeticAddition : public IFunction
+{
+public:
+ /** Default Constructor */
+ NEArithmeticAddition();
+ /** Default Destructor */
+ ~NEArithmeticAddition();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEArithmeticAddition(const NEArithmeticAddition &) = delete;
+ /** Default move constructor */
+ NEArithmeticAddition(NEArithmeticAddition &&);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEArithmeticAddition &operator=(const NEArithmeticAddition &) = delete;
+ /** Default move assignment operator */
+ NEArithmeticAddition &operator=(NEArithmeticAddition &&);
+ /** Initialise the kernel's inputs, output and conversion policy.
+ *
+ * Valid configurations (Input1,Input2) -> Output :
+ *
+ * - (U8,U8) -> U8
+ * - (U8,U8) -> S16
+ * - (S16,U8) -> S16
+ * - (U8,S16) -> S16
+ * - (S16,S16) -> S16
+ * - (S32,S32) -> S32
+ * - (F16,F16) -> F16
+ * - (F32,F32) -> F32
+ * - (QASYMM8,QASYMM8) -> QASYMM8
+ * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
+ * - (QSYMM16,QSYMM16) -> QSYMM16
+ *
* @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32
* @param[in] input2 Second tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32
* @param[out] output Output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32
* @param[in] policy Policy to use to handle overflow.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
*/
- void configure(ITensor *input1, ITensor *input2, ITensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
/** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticAddition
*
* @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32
@@ -69,6 +128,13 @@ public:
* @return a status
*/
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_NEARITHMETICADDITION_H */
diff --git a/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h b/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h
index 31d1698aea..0bab911c1a 100644
--- a/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h
+++ b/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h
@@ -25,12 +25,52 @@
#define ARM_COMPUTE_NEARITHMETICSUBTRACTION_H
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/NEON/INEOperator.h"
namespace arm_compute
{
class ITensor;
+namespace experimental
+{
+/** Basic function to run @ref NEArithmeticSubtractionKernel
+ *
+ * @note The tensor data type for the inputs must be U8/QASYMM8/S16/F16/F32.
+ * @note The function performs an arithmetic subtraction between two tensors.
+ *
+ * This function calls the following kernels:
+ * -# @ref NEArithmeticSubtractionKernel
+ */
+class NEArithmeticSubtraction : public INEOperator
+{
+public:
+ /** Initialise the kernel's inputs, output and conversion policy.
+ *
+ * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32
+ * @param[in] input2 Second tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32
+ * @param[out] output Output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32
+ * @param[in] policy Policy to use to handle overflow. Convert policy cannot be WRAP if datatype is quantized.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
+ */
+ void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticSubtraction
+ *
+ * @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32
+ * @param[in] input2 Second tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32
+ * @param[in] output Output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32
+ * @param[in] policy Policy to use to handle overflow. Convert policy cannot be WRAP if datatype is quantized.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+
+ // Inherited methods overridden:
+ MemoryRequirements workspace() const override;
+};
+} // namespace experimental
+
/** Basic function to run @ref NEArithmeticSubtractionKernel
*
* @note The tensor data type for the inputs must be U8/QASYMM8/S16/F16/F32.
@@ -39,9 +79,21 @@ class ITensor;
* This function calls the following kernels:
* -# @ref NEArithmeticSubtractionKernel
*/
-class NEArithmeticSubtraction : public INESimpleFunction
+class NEArithmeticSubtraction : public IFunction
{
public:
+ /** Default Constructor */
+ NEArithmeticSubtraction();
+ /** Default Destructor */
+ ~NEArithmeticSubtraction();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEArithmeticSubtraction(const NEArithmeticSubtraction &) = delete;
+ /** Default move constructor */
+ NEArithmeticSubtraction(NEArithmeticSubtraction &&);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEArithmeticSubtraction &operator=(const NEArithmeticSubtraction &) = delete;
+ /** Default move assignment operator */
+ NEArithmeticSubtraction &operator=(NEArithmeticSubtraction &&);
/** Initialise the kernel's inputs, output and conversion policy.
*
* @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32
@@ -50,7 +102,7 @@ public:
* @param[in] policy Policy to use to handle overflow. Convert policy cannot be WRAP if datatype is quantized.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
*/
- void configure(ITensor *input1, ITensor *input2, ITensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
/** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticSubtraction
*
* @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32
@@ -62,6 +114,13 @@ public:
* @return a status
*/
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEARITHMETICSUBTRACTION_H */
diff --git a/arm_compute/runtime/NEON/functions/NEGEMM.h b/arm_compute/runtime/NEON/functions/NEGEMM.h
index 8dc6b88bb0..b89a373c47 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMM.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMM.h
@@ -24,7 +24,6 @@
#ifndef ARM_COMPUTE_NEGEMM_H
#define ARM_COMPUTE_NEGEMM_H
-#include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h"
#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h"
@@ -35,6 +34,7 @@
#include "arm_compute/runtime/IWeightsManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h"
#include "arm_compute/runtime/Tensor.h"
@@ -112,7 +112,7 @@ private:
NEGEMMAssemblyDispatch _asm_glue;
NEGEMMMatrixAdditionKernel _ma_kernel;
NEActivationLayer _alpha_scale_func;
- NEArithmeticAdditionKernel _add_bias_kernel;
+ NEArithmeticAddition _add_bias;
NEActivationLayer _activation_func;
Tensor _tmp_a;
diff --git a/arm_compute/runtime/NEON/functions/NELSTMLayer.h b/arm_compute/runtime/NEON/functions/NELSTMLayer.h
index 64845115b8..b9b581c484 100644
--- a/arm_compute/runtime/NEON/functions/NELSTMLayer.h
+++ b/arm_compute/runtime/NEON/functions/NELSTMLayer.h
@@ -25,13 +25,13 @@
#define ARM_COMPUTE_NELSTMLAYER_H
#include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h"
#include "arm_compute/core/NEON/kernels/NECopyKernel.h"
#include "arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
+#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
+#include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h"
#include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h"
#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h"
#include "arm_compute/runtime/NEON/functions/NEGEMM.h"
@@ -149,7 +149,7 @@ private:
MemoryGroup _memory_group;
NEFullyConnectedLayer _fully_connected_input_gate;
NEArithmeticAddition _accum_input_gate1;
- NEArithmeticSubtractionKernel _subtract_input_gate;
+ NEArithmeticSubtraction _subtract_input_gate;
NEPixelWiseMultiplicationKernel _pixelwise_mul_input_gate;
NEActivationLayer _activation_input_gate;
NEFullyConnectedLayer _fully_connected_forget_gate;
@@ -159,8 +159,8 @@ private:
NEFullyConnectedLayer _fully_connected_cell_state;
NEGEMM _gemm_cell_state1;
NETransposeKernel _transpose_cell_state;
- NEArithmeticAdditionKernel _accum_cell_state1;
- NEArithmeticAdditionKernel _accum_cell_state2;
+ NEArithmeticAddition _accum_cell_state1;
+ NEArithmeticAddition _accum_cell_state2;
NEPixelWiseMultiplicationKernel _pixelwise_mul_cell_state1;
NEActivationLayer _activation_cell_state;
NEActivationLayer _cell_clip;
@@ -182,16 +182,16 @@ private:
NEConcatenateLayer _concat_weights_output;
NEMeanStdDevNormalizationLayer _mean_std_norm_input_gate;
NEPixelWiseMultiplicationKernel _pixelwise_mul_input_gate_coeff;
- NEArithmeticAdditionKernel _accum_input_gate_bias;
+ NEArithmeticAddition _accum_input_gate_bias;
NEMeanStdDevNormalizationLayer _mean_std_norm_forget_gate;
NEPixelWiseMultiplicationKernel _pixelwise_mul_forget_gate_coeff;
- NEArithmeticAdditionKernel _accum_forget_gate_bias;
+ NEArithmeticAddition _accum_forget_gate_bias;
NEMeanStdDevNormalizationLayer _mean_std_norm_cell_gate;
NEPixelWiseMultiplicationKernel _pixelwise_mul_cell_gate_coeff;
- NEArithmeticAdditionKernel _accum_cell_gate_bias;
+ NEArithmeticAddition _accum_cell_gate_bias;
NEMeanStdDevNormalizationLayer _mean_std_norm_output_gate;
NEPixelWiseMultiplicationKernel _pixelwise_mul_output_gate_coeff;
- NEArithmeticAdditionKernel _accum_output_gate_bias;
+ NEArithmeticAddition _accum_output_gate_bias;
Tensor _input_gate_out1;
Tensor _input_gate_out2;
Tensor _input_gate_out3;
diff --git a/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h b/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h
index d1cc962940..60c8fa1226 100644
--- a/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h
@@ -24,14 +24,14 @@
#ifndef ARM_COMPUTE_NEQLSTMLAYER_H
#define ARM_COMPUTE_NEQLSTMLAYER_H
-#include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h"
#include "arm_compute/core/NEON/kernels/NECopyKernel.h"
#include "arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h"
#include "arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h"
#include "arm_compute/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
+#include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h"
#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h"
#include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h"
#include "arm_compute/runtime/NEON/functions/NETranspose.h"
@@ -48,7 +48,7 @@ class ITensor;
* This function calls the following NEON functions/kernels:
*
* -# @ref NEActivationLayer Activation functions (tanh and logistic)
- * -# @ref NEArithmeticAdditionKernel Elementwise addition
+ * -# @ref NEArithmeticAddition Elementwise addition
* -# @ref NEArithmeticSubtractionKernel Elementwise subtraction
* -# @ref NECopyKernel Copy kernel for copying output_state_out to output
* -# @ref NEGEMMLowpMatrixMultiplyCore Quantized matrix multiplication core. Accumulators are 32-bit integers
@@ -254,51 +254,51 @@ private:
NEGEMMLowpMatrixAReductionKernel _input_to_output_reduction{};
NEGEMMLowpMatrixAReductionKernel _recurrent_to_output_reduction{};
NEGEMMLowpMatrixAReductionKernel _projection_reduction{};
- NEArithmeticAdditionKernel _projection_bias_add{};
+ NEArithmeticAddition _projection_bias_add{};
NEGEMMLowpMatrixMultiplyCore _mm_input_to_forget{};
NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_forget{};
NEPixelWiseMultiplicationKernel _pixelwise_mul_cell_to_forget{};
NEGEMMLowpOutputStage _input_to_forget_outstage{};
NEGEMMLowpOutputStage _recurrent_to_forget_outstage{};
NEGEMMLowpOutputStage _cell_to_forget_outstage{};
- NEArithmeticAdditionKernel _accumulate_input_recurrent_forget{};
- NEArithmeticAdditionKernel _accumulate_cell_forget{};
+ NEArithmeticAddition _accumulate_input_recurrent_forget{};
+ NEArithmeticAddition _accumulate_cell_forget{};
NEActivationLayer _forget_gate_sigmoid{};
NEGEMMLowpMatrixMultiplyCore _mm_input_to_cell{};
NEGEMMLowpOutputStage _input_to_cell_outstage{};
NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_cell{};
NEGEMMLowpOutputStage _recurrent_to_cell_outstage{};
- NEArithmeticAdditionKernel _accumulate_input_recurrent_modulation{};
+ NEArithmeticAddition _accumulate_input_recurrent_modulation{};
NEActivationLayer _cell_gate_tanh{};
- NEArithmeticSubtractionKernel _input_gate_sub{};
+ NEArithmeticSubtraction _input_gate_sub{};
NEGEMMLowpMatrixMultiplyCore _mm_input_to_input{};
NEGEMMLowpOutputStage _input_to_input_outstage{};
NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_input{};
NEGEMMLowpOutputStage _recurrent_to_input_outstage{};
- NEArithmeticAdditionKernel _accumulate_input_recurrent_input{};
+ NEArithmeticAddition _accumulate_input_recurrent_input{};
NEPixelWiseMultiplicationKernel _pixelwise_mul_cell_to_input{};
NEGEMMLowpOutputStage _cell_to_input_outstage{};
- NEArithmeticAdditionKernel _accumulate_cell_input{};
+ NEArithmeticAddition _accumulate_cell_input{};
NEActivationLayer _input_gate_sigmoid{};
NEPixelWiseMultiplicationKernel _pixelwise_mul_forget_cell{};
NEPixelWiseMultiplicationKernel _pixelwise_mul_input_cell{};
- NEArithmeticAdditionKernel _add_forget_cell{};
+ NEArithmeticAddition _add_forget_cell{};
NEActivationLayer _cell_clip{};
NEGEMMLowpMatrixMultiplyCore _mm_input_to_output{};
NEGEMMLowpOutputStage _input_to_output_outstage{};
NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_output{};
NEGEMMLowpOutputStage _recurrent_to_output_outstage{};
- NEArithmeticAdditionKernel _accumulate_input_recurrent_output{};
+ NEArithmeticAddition _accumulate_input_recurrent_output{};
NEPixelWiseMultiplicationKernel _pixelwise_mul_cell_to_output{};
NEGEMMLowpOutputStage _cell_to_output_outstage{};
- NEArithmeticAdditionKernel _accumulate_cell_to_output{};
+ NEArithmeticAddition _accumulate_cell_to_output{};
NEActivationLayer _output_gate_sigmoid{};
NEActivationLayer _hidden_tanh{};
NEPixelWiseMultiplicationKernel _pixelwise_mul_hidden{};
NEGEMMLowpOutputStage _hidden_outstage{};
NEGEMMLowpMatrixMultiplyCore _mm_projection{};
NEGEMMLowpOutputStage _projection_outstage{};
- NEArithmeticAdditionKernel _accumulate_projection{};
+ NEArithmeticAddition _accumulate_projection{};
NEActivationLayer _projection_clip{};
TensorCopyKernel _projection_bias_copy{};
@@ -311,7 +311,10 @@ private:
NECopyKernel _copy_output{};
// Tensor pointers
- const ITensor *_input_to_input_weights{ nullptr };
+ const ITensor *_input_to_input_weights
+ {
+ nullptr
+ };
const ITensor *_recurrent_to_input_weights{ nullptr };
const ITensor *_projection_bias{ nullptr };
const ITensor *_input_to_forget_weights{ nullptr };
@@ -370,7 +373,10 @@ private:
{
// Output quantization scale will be different, but ignored here
// since it will be configured at configure() stage.
- const TensorInfo out{ in };
+ const TensorInfo out
+ {
+ in
+ };
return NEQLSTMLayerNormalizationKernel::validate(&in, &out, &weight, &bias);
}
diff --git a/arm_compute/runtime/NEON/functions/NERNNLayer.h b/arm_compute/runtime/NEON/functions/NERNNLayer.h
index db4134fd2d..25cb74d978 100644
--- a/arm_compute/runtime/NEON/functions/NERNNLayer.h
+++ b/arm_compute/runtime/NEON/functions/NERNNLayer.h
@@ -24,11 +24,11 @@
#ifndef ARM_COMPUTE_NERNNLAYER_H
#define ARM_COMPUTE_NERNNLAYER_H
-#include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h"
#include "arm_compute/core/NEON/kernels/NECopyKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h"
#include "arm_compute/runtime/NEON/functions/NEGEMM.h"
@@ -82,16 +82,16 @@ public:
void prepare() override;
private:
- MemoryGroup _memory_group;
- NEGEMM _gemm_state_f;
- NEArithmeticAdditionKernel _add_kernel;
- NEActivationLayer _activation;
- NEFullyConnectedLayer _fully_connected;
- NECopyKernel _copy_kernel;
- Tensor _fully_connected_out;
- Tensor _gemm_output;
- Tensor _add_output;
- bool _is_prepared;
+ MemoryGroup _memory_group;
+ NEGEMM _gemm_state_f;
+ NEArithmeticAddition _add_f;
+ NEActivationLayer _activation;
+ NEFullyConnectedLayer _fully_connected;
+ NECopyKernel _copy_kernel;
+ Tensor _fully_connected_out;
+ Tensor _gemm_output;
+ Tensor _add_output;
+ bool _is_prepared;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NERNNLAYER_H */