From 173ba9bbb19ea83f951318d9989e440768b4de8f Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Tue, 23 Jun 2020 17:25:43 +0100 Subject: COMPMID-3373: Async support to NEArithmetic* kernels/functions (Pt. 1) Added support on NEArithmeticAddition and NEArithmeticSubtraction Signed-off-by: Michalis Spyrou Change-Id: Ifa805f8455ef6eff1ee627752dc1c7fe9740ec47 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3451 Tested-by: Arm Jenkins Reviewed-by: Georgios Pinitas --- arm_compute/runtime/NEON/functions/NEQLSTMLayer.h | 38 +++++++++++++---------- 1 file changed, 22 insertions(+), 16 deletions(-) (limited to 'arm_compute/runtime/NEON/functions/NEQLSTMLayer.h') diff --git a/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h b/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h index d1cc962940..60c8fa1226 100644 --- a/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h +++ b/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h @@ -24,14 +24,14 @@ #ifndef ARM_COMPUTE_NEQLSTMLAYER_H #define ARM_COMPUTE_NEQLSTMLAYER_H -#include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h" -#include "arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h" #include "arm_compute/core/NEON/kernels/NECopyKernel.h" #include "arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h" #include "arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h" #include "arm_compute/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" +#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" +#include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h" #include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h" #include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h" #include "arm_compute/runtime/NEON/functions/NETranspose.h" @@ -48,7 +48,7 @@ class ITensor; * This function calls the following NEON functions/kernels: * * -# @ref NEActivationLayer Activation functions (tanh and logistic) - * -# @ref NEArithmeticAdditionKernel Elementwise addition + * -# @ref NEArithmeticAddition Elementwise addition * -# @ref NEArithmeticSubtractionKernel Elementwise subtraction * -# @ref NECopyKernel Copy kernel for copying output_state_out to output * -# @ref NEGEMMLowpMatrixMultiplyCore Quantized matrix multiplication core. Accumulators are 32-bit integers @@ -254,51 +254,51 @@ private: NEGEMMLowpMatrixAReductionKernel _input_to_output_reduction{}; NEGEMMLowpMatrixAReductionKernel _recurrent_to_output_reduction{}; NEGEMMLowpMatrixAReductionKernel _projection_reduction{}; - NEArithmeticAdditionKernel _projection_bias_add{}; + NEArithmeticAddition _projection_bias_add{}; NEGEMMLowpMatrixMultiplyCore _mm_input_to_forget{}; NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_forget{}; NEPixelWiseMultiplicationKernel _pixelwise_mul_cell_to_forget{}; NEGEMMLowpOutputStage _input_to_forget_outstage{}; NEGEMMLowpOutputStage _recurrent_to_forget_outstage{}; NEGEMMLowpOutputStage _cell_to_forget_outstage{}; - NEArithmeticAdditionKernel _accumulate_input_recurrent_forget{}; - NEArithmeticAdditionKernel _accumulate_cell_forget{}; + NEArithmeticAddition _accumulate_input_recurrent_forget{}; + NEArithmeticAddition _accumulate_cell_forget{}; NEActivationLayer _forget_gate_sigmoid{}; NEGEMMLowpMatrixMultiplyCore _mm_input_to_cell{}; NEGEMMLowpOutputStage _input_to_cell_outstage{}; NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_cell{}; NEGEMMLowpOutputStage _recurrent_to_cell_outstage{}; - NEArithmeticAdditionKernel _accumulate_input_recurrent_modulation{}; + NEArithmeticAddition _accumulate_input_recurrent_modulation{}; NEActivationLayer _cell_gate_tanh{}; - NEArithmeticSubtractionKernel _input_gate_sub{}; + NEArithmeticSubtraction _input_gate_sub{}; NEGEMMLowpMatrixMultiplyCore _mm_input_to_input{}; NEGEMMLowpOutputStage _input_to_input_outstage{}; NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_input{}; NEGEMMLowpOutputStage _recurrent_to_input_outstage{}; - NEArithmeticAdditionKernel _accumulate_input_recurrent_input{}; + NEArithmeticAddition _accumulate_input_recurrent_input{}; NEPixelWiseMultiplicationKernel _pixelwise_mul_cell_to_input{}; NEGEMMLowpOutputStage _cell_to_input_outstage{}; - NEArithmeticAdditionKernel _accumulate_cell_input{}; + NEArithmeticAddition _accumulate_cell_input{}; NEActivationLayer _input_gate_sigmoid{}; NEPixelWiseMultiplicationKernel _pixelwise_mul_forget_cell{}; NEPixelWiseMultiplicationKernel _pixelwise_mul_input_cell{}; - NEArithmeticAdditionKernel _add_forget_cell{}; + NEArithmeticAddition _add_forget_cell{}; NEActivationLayer _cell_clip{}; NEGEMMLowpMatrixMultiplyCore _mm_input_to_output{}; NEGEMMLowpOutputStage _input_to_output_outstage{}; NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_output{}; NEGEMMLowpOutputStage _recurrent_to_output_outstage{}; - NEArithmeticAdditionKernel _accumulate_input_recurrent_output{}; + NEArithmeticAddition _accumulate_input_recurrent_output{}; NEPixelWiseMultiplicationKernel _pixelwise_mul_cell_to_output{}; NEGEMMLowpOutputStage _cell_to_output_outstage{}; - NEArithmeticAdditionKernel _accumulate_cell_to_output{}; + NEArithmeticAddition _accumulate_cell_to_output{}; NEActivationLayer _output_gate_sigmoid{}; NEActivationLayer _hidden_tanh{}; NEPixelWiseMultiplicationKernel _pixelwise_mul_hidden{}; NEGEMMLowpOutputStage _hidden_outstage{}; NEGEMMLowpMatrixMultiplyCore _mm_projection{}; NEGEMMLowpOutputStage _projection_outstage{}; - NEArithmeticAdditionKernel _accumulate_projection{}; + NEArithmeticAddition _accumulate_projection{}; NEActivationLayer _projection_clip{}; TensorCopyKernel _projection_bias_copy{}; @@ -311,7 +311,10 @@ private: NECopyKernel _copy_output{}; // Tensor pointers - const ITensor *_input_to_input_weights{ nullptr }; + const ITensor *_input_to_input_weights + { + nullptr + }; const ITensor *_recurrent_to_input_weights{ nullptr }; const ITensor *_projection_bias{ nullptr }; const ITensor *_input_to_forget_weights{ nullptr }; @@ -370,7 +373,10 @@ private: { // Output quantization scale will be different, but ignored here // since it will be configured at configure() stage. - const TensorInfo out{ in }; + const TensorInfo out + { + in + }; return NEQLSTMLayerNormalizationKernel::validate(&in, &out, &weight, &bias); } -- cgit v1.2.1