From 173ba9bbb19ea83f951318d9989e440768b4de8f Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Tue, 23 Jun 2020 17:25:43 +0100 Subject: COMPMID-3373: Async support to NEArithmetic* kernels/functions (Pt. 1) Added support on NEArithmeticAddition and NEArithmeticSubtraction Signed-off-by: Michalis Spyrou Change-Id: Ifa805f8455ef6eff1ee627752dc1c7fe9740ec47 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3451 Tested-by: Arm Jenkins Reviewed-by: Georgios Pinitas --- .../core/NEON/kernels/NEArithmeticAdditionKernel.h | 17 +++-- .../NEON/kernels/NEArithmeticSubtractionKernel.h | 11 ++-- .../runtime/NEON/functions/NEArithmeticAddition.h | 72 +++++++++++++++++++++- .../NEON/functions/NEArithmeticSubtraction.h | 65 ++++++++++++++++++- arm_compute/runtime/NEON/functions/NEGEMM.h | 4 +- arm_compute/runtime/NEON/functions/NELSTMLayer.h | 18 +++--- arm_compute/runtime/NEON/functions/NEQLSTMLayer.h | 38 +++++++----- arm_compute/runtime/NEON/functions/NERNNLayer.h | 22 +++---- 8 files changed, 186 insertions(+), 61 deletions(-) (limited to 'arm_compute') diff --git a/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h b/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h index bff34dfda2..f254027e0e 100644 --- a/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h +++ b/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h @@ -68,12 +68,12 @@ public: * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED * - (QSYMM16,QSYMM16) -> QSYMM16 * - * @param[in] input1 First input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 - * @param[in] input2 Second input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 - * @param[out] output The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32. + * @param[in] input1 First input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 + * @param[in] input2 Second input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 + * @param[out] output The output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32. * @param[in] policy Overflow policy. */ - void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy); + void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, ConvertPolicy policy); /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticAdditionKernel * * @param[in] input1 First input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 @@ -86,7 +86,7 @@ public: static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy); // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; + void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, const ThreadInfo &info) override; private: /** Common signature for all the specialised add functions @@ -99,11 +99,8 @@ private: */ using AddFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy, const Window &window); /** Add function to use for the particular tensor types passed to configure() */ - AddFunction *_func; - const ITensor *_input1; - const ITensor *_input2; - ITensor *_output; - ConvertPolicy _policy; + AddFunction *_func; + ConvertPolicy _policy; }; } // namespace arm_compute #endif /*ARM_COMPUTE_NEARITHMETICADDITIONKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h b/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h index f75c6bfb98..dfd08d9b06 100644 --- a/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h +++ b/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h @@ -71,7 +71,7 @@ public: * @param[out] output The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32. * @param[in] policy Overflow policy. Convert policy cannot be WRAP if datatype is quantized. */ - void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy); + void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, ConvertPolicy policy); /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticSubtractionKernel * * @note Convert policy cannot be WRAP if datatype is QASYMM8 @@ -86,7 +86,7 @@ public: static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy); // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; + void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, const ThreadInfo &info) override; private: /** Common signature for all the specialised sub functions @@ -99,11 +99,8 @@ private: */ using SubFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window, bool is_sat); /** Sub function to use for the particular tensor types passed to configure() */ - SubFunction *_func; - const ITensor *_input1; - const ITensor *_input2; - ITensor *_output; - ConvertPolicy _policy; + SubFunction *_func; + ConvertPolicy _policy; }; } // namespace arm_compute #endif /* ARM_COMPUTE_NEARITHMETICSUBTRACTIONKERNEL_H */ diff --git a/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h index 2bf12df4df..589e0624eb 100644 --- a/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h +++ b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h @@ -25,16 +25,75 @@ #define ARM_COMPUTE_NEARITHMETICADDITION_H #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/NEON/INEOperator.h" namespace arm_compute { class ITensor; +namespace experimental +{ /** Basic function to run @ref NEArithmeticAdditionKernel */ -class NEArithmeticAddition : public INESimpleFunctionNoBorder +class NEArithmeticAddition : public INEOperator { public: + /** Initialise the kernel's inputs, output and conversion policy. + * + * Valid configurations (Input1,Input2) -> Output : + * + * - (U8,U8) -> U8 + * - (U8,U8) -> S16 + * - (S16,U8) -> S16 + * - (U8,S16) -> S16 + * - (S16,S16) -> S16 + * - (S32,S32) -> S32 + * - (F16,F16) -> F16 + * - (F32,F32) -> F32 + * - (QASYMM8,QASYMM8) -> QASYMM8 + * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED + * - (QSYMM16,QSYMM16) -> QSYMM16 + * + * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 + * @param[in] input2 Second tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 + * @param[out] output Output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 + * @param[in] policy Policy to use to handle overflow. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported. + */ + void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticAddition + * + * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 + * @param[in] input2 Second tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 + * @param[in] output Output tensor info. Data types supported: U8/SQASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 + * @param[in] policy Policy to use to handle overflow + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported. + * + * @return a status + */ + static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + + // Inherited methods overridden: + MemoryRequirements workspace() const override; +}; +} // namespace experimental + +/** Basic function to run @ref NEArithmeticAdditionKernel */ +class NEArithmeticAddition : public IFunction +{ +public: + /** Default Constructor */ + NEArithmeticAddition(); + /** Default Destructor */ + ~NEArithmeticAddition(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEArithmeticAddition(const NEArithmeticAddition &) = delete; + /** Default move constructor */ + NEArithmeticAddition(NEArithmeticAddition &&); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEArithmeticAddition &operator=(const NEArithmeticAddition &) = delete; + /** Default move assignment operator */ + NEArithmeticAddition &operator=(NEArithmeticAddition &&); /** Initialise the kernel's inputs, output and conversion policy. * * Valid configurations (Input1,Input2) -> Output : @@ -57,7 +116,7 @@ public: * @param[in] policy Policy to use to handle overflow. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported. */ - void configure(ITensor *input1, ITensor *input2, ITensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticAddition * * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 @@ -69,6 +128,13 @@ public: * @return a status */ static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + + // Inherited methods overridden: + void run() override; + +private: + struct Impl; + std::unique_ptr _impl; }; } // namespace arm_compute #endif /*ARM_COMPUTE_NEARITHMETICADDITION_H */ diff --git a/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h b/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h index 31d1698aea..0bab911c1a 100644 --- a/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h +++ b/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h @@ -25,12 +25,52 @@ #define ARM_COMPUTE_NEARITHMETICSUBTRACTION_H #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/NEON/INEOperator.h" namespace arm_compute { class ITensor; +namespace experimental +{ +/** Basic function to run @ref NEArithmeticSubtractionKernel + * + * @note The tensor data type for the inputs must be U8/QASYMM8/S16/F16/F32. + * @note The function performs an arithmetic subtraction between two tensors. + * + * This function calls the following kernels: + * -# @ref NEArithmeticSubtractionKernel + */ +class NEArithmeticSubtraction : public INEOperator +{ +public: + /** Initialise the kernel's inputs, output and conversion policy. + * + * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32 + * @param[in] input2 Second tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32 + * @param[out] output Output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32 + * @param[in] policy Policy to use to handle overflow. Convert policy cannot be WRAP if datatype is quantized. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported. + */ + void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticSubtraction + * + * @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32 + * @param[in] input2 Second tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32 + * @param[in] output Output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32 + * @param[in] policy Policy to use to handle overflow. Convert policy cannot be WRAP if datatype is quantized. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported. + * + * @return a status + */ + static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + + // Inherited methods overridden: + MemoryRequirements workspace() const override; +}; +} // namespace experimental + /** Basic function to run @ref NEArithmeticSubtractionKernel * * @note The tensor data type for the inputs must be U8/QASYMM8/S16/F16/F32. @@ -39,9 +79,21 @@ class ITensor; * This function calls the following kernels: * -# @ref NEArithmeticSubtractionKernel */ -class NEArithmeticSubtraction : public INESimpleFunction +class NEArithmeticSubtraction : public IFunction { public: + /** Default Constructor */ + NEArithmeticSubtraction(); + /** Default Destructor */ + ~NEArithmeticSubtraction(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEArithmeticSubtraction(const NEArithmeticSubtraction &) = delete; + /** Default move constructor */ + NEArithmeticSubtraction(NEArithmeticSubtraction &&); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEArithmeticSubtraction &operator=(const NEArithmeticSubtraction &) = delete; + /** Default move assignment operator */ + NEArithmeticSubtraction &operator=(NEArithmeticSubtraction &&); /** Initialise the kernel's inputs, output and conversion policy. * * @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32 @@ -50,7 +102,7 @@ public: * @param[in] policy Policy to use to handle overflow. Convert policy cannot be WRAP if datatype is quantized. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported. */ - void configure(ITensor *input1, ITensor *input2, ITensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticSubtraction * * @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32 @@ -62,6 +114,13 @@ public: * @return a status */ static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + + // Inherited methods overridden: + void run() override; + +private: + struct Impl; + std::unique_ptr _impl; }; } // namespace arm_compute #endif /* ARM_COMPUTE_NEARITHMETICSUBTRACTION_H */ diff --git a/arm_compute/runtime/NEON/functions/NEGEMM.h b/arm_compute/runtime/NEON/functions/NEGEMM.h index 8dc6b88bb0..b89a373c47 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMM.h +++ b/arm_compute/runtime/NEON/functions/NEGEMM.h @@ -24,7 +24,6 @@ #ifndef ARM_COMPUTE_NEGEMM_H #define ARM_COMPUTE_NEGEMM_H -#include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h" #include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" #include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" #include "arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h" @@ -35,6 +34,7 @@ #include "arm_compute/runtime/IWeightsManager.h" #include "arm_compute/runtime/MemoryGroup.h" #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" +#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" #include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" #include "arm_compute/runtime/Tensor.h" @@ -112,7 +112,7 @@ private: NEGEMMAssemblyDispatch _asm_glue; NEGEMMMatrixAdditionKernel _ma_kernel; NEActivationLayer _alpha_scale_func; - NEArithmeticAdditionKernel _add_bias_kernel; + NEArithmeticAddition _add_bias; NEActivationLayer _activation_func; Tensor _tmp_a; diff --git a/arm_compute/runtime/NEON/functions/NELSTMLayer.h b/arm_compute/runtime/NEON/functions/NELSTMLayer.h index 64845115b8..b9b581c484 100644 --- a/arm_compute/runtime/NEON/functions/NELSTMLayer.h +++ b/arm_compute/runtime/NEON/functions/NELSTMLayer.h @@ -25,13 +25,13 @@ #define ARM_COMPUTE_NELSTMLAYER_H #include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h" -#include "arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h" #include "arm_compute/core/NEON/kernels/NECopyKernel.h" #include "arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" +#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" +#include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h" #include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h" #include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h" #include "arm_compute/runtime/NEON/functions/NEGEMM.h" @@ -149,7 +149,7 @@ private: MemoryGroup _memory_group; NEFullyConnectedLayer _fully_connected_input_gate; NEArithmeticAddition _accum_input_gate1; - NEArithmeticSubtractionKernel _subtract_input_gate; + NEArithmeticSubtraction _subtract_input_gate; NEPixelWiseMultiplicationKernel _pixelwise_mul_input_gate; NEActivationLayer _activation_input_gate; NEFullyConnectedLayer _fully_connected_forget_gate; @@ -159,8 +159,8 @@ private: NEFullyConnectedLayer _fully_connected_cell_state; NEGEMM _gemm_cell_state1; NETransposeKernel _transpose_cell_state; - NEArithmeticAdditionKernel _accum_cell_state1; - NEArithmeticAdditionKernel _accum_cell_state2; + NEArithmeticAddition _accum_cell_state1; + NEArithmeticAddition _accum_cell_state2; NEPixelWiseMultiplicationKernel _pixelwise_mul_cell_state1; NEActivationLayer _activation_cell_state; NEActivationLayer _cell_clip; @@ -182,16 +182,16 @@ private: NEConcatenateLayer _concat_weights_output; NEMeanStdDevNormalizationLayer _mean_std_norm_input_gate; NEPixelWiseMultiplicationKernel _pixelwise_mul_input_gate_coeff; - NEArithmeticAdditionKernel _accum_input_gate_bias; + NEArithmeticAddition _accum_input_gate_bias; NEMeanStdDevNormalizationLayer _mean_std_norm_forget_gate; NEPixelWiseMultiplicationKernel _pixelwise_mul_forget_gate_coeff; - NEArithmeticAdditionKernel _accum_forget_gate_bias; + NEArithmeticAddition _accum_forget_gate_bias; NEMeanStdDevNormalizationLayer _mean_std_norm_cell_gate; NEPixelWiseMultiplicationKernel _pixelwise_mul_cell_gate_coeff; - NEArithmeticAdditionKernel _accum_cell_gate_bias; + NEArithmeticAddition _accum_cell_gate_bias; NEMeanStdDevNormalizationLayer _mean_std_norm_output_gate; NEPixelWiseMultiplicationKernel _pixelwise_mul_output_gate_coeff; - NEArithmeticAdditionKernel _accum_output_gate_bias; + NEArithmeticAddition _accum_output_gate_bias; Tensor _input_gate_out1; Tensor _input_gate_out2; Tensor _input_gate_out3; diff --git a/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h b/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h index d1cc962940..60c8fa1226 100644 --- a/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h +++ b/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h @@ -24,14 +24,14 @@ #ifndef ARM_COMPUTE_NEQLSTMLAYER_H #define ARM_COMPUTE_NEQLSTMLAYER_H -#include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h" -#include "arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h" #include "arm_compute/core/NEON/kernels/NECopyKernel.h" #include "arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h" #include "arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h" #include "arm_compute/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" +#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" +#include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h" #include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h" #include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h" #include "arm_compute/runtime/NEON/functions/NETranspose.h" @@ -48,7 +48,7 @@ class ITensor; * This function calls the following NEON functions/kernels: * * -# @ref NEActivationLayer Activation functions (tanh and logistic) - * -# @ref NEArithmeticAdditionKernel Elementwise addition + * -# @ref NEArithmeticAddition Elementwise addition * -# @ref NEArithmeticSubtractionKernel Elementwise subtraction * -# @ref NECopyKernel Copy kernel for copying output_state_out to output * -# @ref NEGEMMLowpMatrixMultiplyCore Quantized matrix multiplication core. Accumulators are 32-bit integers @@ -254,51 +254,51 @@ private: NEGEMMLowpMatrixAReductionKernel _input_to_output_reduction{}; NEGEMMLowpMatrixAReductionKernel _recurrent_to_output_reduction{}; NEGEMMLowpMatrixAReductionKernel _projection_reduction{}; - NEArithmeticAdditionKernel _projection_bias_add{}; + NEArithmeticAddition _projection_bias_add{}; NEGEMMLowpMatrixMultiplyCore _mm_input_to_forget{}; NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_forget{}; NEPixelWiseMultiplicationKernel _pixelwise_mul_cell_to_forget{}; NEGEMMLowpOutputStage _input_to_forget_outstage{}; NEGEMMLowpOutputStage _recurrent_to_forget_outstage{}; NEGEMMLowpOutputStage _cell_to_forget_outstage{}; - NEArithmeticAdditionKernel _accumulate_input_recurrent_forget{}; - NEArithmeticAdditionKernel _accumulate_cell_forget{}; + NEArithmeticAddition _accumulate_input_recurrent_forget{}; + NEArithmeticAddition _accumulate_cell_forget{}; NEActivationLayer _forget_gate_sigmoid{}; NEGEMMLowpMatrixMultiplyCore _mm_input_to_cell{}; NEGEMMLowpOutputStage _input_to_cell_outstage{}; NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_cell{}; NEGEMMLowpOutputStage _recurrent_to_cell_outstage{}; - NEArithmeticAdditionKernel _accumulate_input_recurrent_modulation{}; + NEArithmeticAddition _accumulate_input_recurrent_modulation{}; NEActivationLayer _cell_gate_tanh{}; - NEArithmeticSubtractionKernel _input_gate_sub{}; + NEArithmeticSubtraction _input_gate_sub{}; NEGEMMLowpMatrixMultiplyCore _mm_input_to_input{}; NEGEMMLowpOutputStage _input_to_input_outstage{}; NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_input{}; NEGEMMLowpOutputStage _recurrent_to_input_outstage{}; - NEArithmeticAdditionKernel _accumulate_input_recurrent_input{}; + NEArithmeticAddition _accumulate_input_recurrent_input{}; NEPixelWiseMultiplicationKernel _pixelwise_mul_cell_to_input{}; NEGEMMLowpOutputStage _cell_to_input_outstage{}; - NEArithmeticAdditionKernel _accumulate_cell_input{}; + NEArithmeticAddition _accumulate_cell_input{}; NEActivationLayer _input_gate_sigmoid{}; NEPixelWiseMultiplicationKernel _pixelwise_mul_forget_cell{}; NEPixelWiseMultiplicationKernel _pixelwise_mul_input_cell{}; - NEArithmeticAdditionKernel _add_forget_cell{}; + NEArithmeticAddition _add_forget_cell{}; NEActivationLayer _cell_clip{}; NEGEMMLowpMatrixMultiplyCore _mm_input_to_output{}; NEGEMMLowpOutputStage _input_to_output_outstage{}; NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_output{}; NEGEMMLowpOutputStage _recurrent_to_output_outstage{}; - NEArithmeticAdditionKernel _accumulate_input_recurrent_output{}; + NEArithmeticAddition _accumulate_input_recurrent_output{}; NEPixelWiseMultiplicationKernel _pixelwise_mul_cell_to_output{}; NEGEMMLowpOutputStage _cell_to_output_outstage{}; - NEArithmeticAdditionKernel _accumulate_cell_to_output{}; + NEArithmeticAddition _accumulate_cell_to_output{}; NEActivationLayer _output_gate_sigmoid{}; NEActivationLayer _hidden_tanh{}; NEPixelWiseMultiplicationKernel _pixelwise_mul_hidden{}; NEGEMMLowpOutputStage _hidden_outstage{}; NEGEMMLowpMatrixMultiplyCore _mm_projection{}; NEGEMMLowpOutputStage _projection_outstage{}; - NEArithmeticAdditionKernel _accumulate_projection{}; + NEArithmeticAddition _accumulate_projection{}; NEActivationLayer _projection_clip{}; TensorCopyKernel _projection_bias_copy{}; @@ -311,7 +311,10 @@ private: NECopyKernel _copy_output{}; // Tensor pointers - const ITensor *_input_to_input_weights{ nullptr }; + const ITensor *_input_to_input_weights + { + nullptr + }; const ITensor *_recurrent_to_input_weights{ nullptr }; const ITensor *_projection_bias{ nullptr }; const ITensor *_input_to_forget_weights{ nullptr }; @@ -370,7 +373,10 @@ private: { // Output quantization scale will be different, but ignored here // since it will be configured at configure() stage. - const TensorInfo out{ in }; + const TensorInfo out + { + in + }; return NEQLSTMLayerNormalizationKernel::validate(&in, &out, &weight, &bias); } diff --git a/arm_compute/runtime/NEON/functions/NERNNLayer.h b/arm_compute/runtime/NEON/functions/NERNNLayer.h index db4134fd2d..25cb74d978 100644 --- a/arm_compute/runtime/NEON/functions/NERNNLayer.h +++ b/arm_compute/runtime/NEON/functions/NERNNLayer.h @@ -24,11 +24,11 @@ #ifndef ARM_COMPUTE_NERNNLAYER_H #define ARM_COMPUTE_NERNNLAYER_H -#include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h" #include "arm_compute/core/NEON/kernels/NECopyKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" +#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" #include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h" #include "arm_compute/runtime/NEON/functions/NEGEMM.h" @@ -82,16 +82,16 @@ public: void prepare() override; private: - MemoryGroup _memory_group; - NEGEMM _gemm_state_f; - NEArithmeticAdditionKernel _add_kernel; - NEActivationLayer _activation; - NEFullyConnectedLayer _fully_connected; - NECopyKernel _copy_kernel; - Tensor _fully_connected_out; - Tensor _gemm_output; - Tensor _add_output; - bool _is_prepared; + MemoryGroup _memory_group; + NEGEMM _gemm_state_f; + NEArithmeticAddition _add_f; + NEActivationLayer _activation; + NEFullyConnectedLayer _fully_connected; + NECopyKernel _copy_kernel; + Tensor _fully_connected_out; + Tensor _gemm_output; + Tensor _add_output; + bool _is_prepared; }; } // namespace arm_compute #endif /* ARM_COMPUTE_NERNNLAYER_H */ -- cgit v1.2.1