From 0cbfda629dd8f684e625173341bab972f004222c Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Thu, 13 Jun 2019 17:01:29 +0100 Subject: COMPMID-2343: Add layer normalization support in NELSTMLayer Change-Id: I1f620d70c6eaadfb9e3a1b345de350ac0253b65c Signed-off-by: Michele Di Giorgio Reviewed-on: https://review.mlplatform.org/c/1366 Tested-by: Arm Jenkins Reviewed-by: Manuel Bottini Comments-Addressed: Arm Jenkins Reviewed-by: Georgios Pinitas --- arm_compute/runtime/NEON/functions/NELSTMLayer.h | 80 +++++++++++++++--------- 1 file changed, 49 insertions(+), 31 deletions(-) (limited to 'arm_compute/runtime') diff --git a/arm_compute/runtime/NEON/functions/NELSTMLayer.h b/arm_compute/runtime/NEON/functions/NELSTMLayer.h index cf0f06c215..183745c185 100644 --- a/arm_compute/runtime/NEON/functions/NELSTMLayer.h +++ b/arm_compute/runtime/NEON/functions/NELSTMLayer.h @@ -35,6 +35,7 @@ #include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h" #include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h" #include "arm_compute/runtime/NEON/functions/NEGEMM.h" +#include "arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h" #include "arm_compute/runtime/common/LSTMParams.h" namespace arm_compute @@ -68,14 +69,18 @@ public: * @param[out] output Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size]. * Data types supported: Same as @p input. * @param[in] lstm_params (Optional) Weights tensors used in peephole optimization: - * input_to_input_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input. - * recurrent_to_input_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input. - * cell_to_input_weights 1D weights tensor with dimensions [num_units]. Can be nullptr. Data type supported: Same as @p input. - * cell_to_forget_weights 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. - * cell_to_output_weights 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. - * input_gate_bias 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input - * projection_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input. - * projection_bias 1D weights tensor with dimensions [output_size]. Data type supported: Same as @p input. + * input_to_input_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input. + * recurrent_to_input_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input. + * cell_to_input_weights 1D weights tensor with dimensions [num_units]. Can be nullptr. Data type supported: Same as @p input. + * cell_to_forget_weights 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. + * cell_to_output_weights 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. + * input_gate_bias 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input + * projection_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input. + * projection_bias 1D weights tensor with dimensions [output_size]. Data type supported: Same as @p input. + * input_layer_norm_coefficients 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. + * forget_layer_norm_coefficients 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. + * cell_layer_norm_coefficients 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. + * output_layer_norm_coefficients 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. * @param[in] activation_info Contains activation information described in @ref ActivationLayerInfo. * @param[in] cell_threshold The clipping threshold for the cell state, such that values are bound within [-cell_clip, cell_clip]. If set to 0.0 then clipping is disabled. * @param[in] projection_threshold The clipping threshold for the output from the projection layer, such that values are bound within [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled. @@ -108,14 +113,18 @@ public: * @param[in] output Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size]. * Data types supported: Same as @p input. * @param[in] lstm_params (Optional) Weights tensors used in peephole optimization: - * input_to_input_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input. - * recurrent_to_input_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input. - * cell_to_input_weights 1D weights tensor with dimensions [num_units]. Can be nullptr. Data type supported: Same as @p input. - * cell_to_forget_weights 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. - * cell_to_output_weights 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. - * input_gate_bias 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input - * projection_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input. - * projection_bias 1D weights tensor with dimensions [output_size]. Data type supported: Same as @p input. + * input_to_input_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input. + * recurrent_to_input_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input. + * cell_to_input_weights 1D weights tensor with dimensions [num_units]. Can be nullptr. Data type supported: Same as @p input. + * cell_to_forget_weights 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. + * cell_to_output_weights 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. + * input_gate_bias 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input + * projection_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input. + * projection_bias 1D weights tensor with dimensions [output_size]. Data type supported: Same as @p input. + * input_layer_norm_coefficients 1D weights tensor info with dimensions [num_units]. Data type supported: Same as @p input. + * forget_layer_norm_coefficients 1D weights tensor info with dimensions [num_units]. Data type supported: Same as @p input. + * cell_layer_norm_coefficients 1D weights tensor info with dimensions [num_units]. Data type supported: Same as @p input. + * output_layer_norm_coefficients 1D weights tensor info with dimensions [num_units]. Data type supported: Same as @p input. * @param[in] activation_info Contains activation information described in @ref ActivationLayerInfo. * @param[in] cell_threshold The clipping threshold for the cell state, such that values are bound within [-cell_clip, cell_clip]. If set to 0.0 then clipping is disabled. * @param[in] projection_threshold The clipping threshold for the output from the projection layer, such that values are bound within [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled. @@ -137,23 +146,16 @@ public: private: MemoryGroup _memory_group; NEFullyConnectedLayer _fully_connected_input_gate; - NEGEMM _gemm_input_gate; - NETransposeKernel _transpose_input_gate; - NEArithmeticAdditionKernel _accum_input_gate1; - NEArithmeticAddition _accum_input_gate2; + NEArithmeticAddition _accum_input_gate1; NEArithmeticSubtractionKernel _subtract_input_gate; NEPixelWiseMultiplicationKernel _pixelwise_mul_input_gate; NEActivationLayerKernel _activation_input_gate; NEFullyConnectedLayer _fully_connected_forget_gate; - NEGEMM _gemm_forget_gate; - NETransposeKernel _transpose_forget_gate; - NEArithmeticAdditionKernel _accum_forget_gate1; - NEArithmeticAddition _accum_forget_gate2; + NEArithmeticAddition _accum_forget_gate1; NEPixelWiseMultiplicationKernel _pixelwise_mul_forget_gate; NEActivationLayerKernel _activation_forget_gate; NEFullyConnectedLayer _fully_connected_cell_state; NEGEMM _gemm_cell_state1; - NEGEMM _gemm_cell_state2; NETransposeKernel _transpose_cell_state; NEArithmeticAdditionKernel _accum_cell_state1; NEArithmeticAdditionKernel _accum_cell_state2; @@ -162,17 +164,12 @@ private: NEActivationLayerKernel _cell_clip; NEPixelWiseMultiplicationKernel _pixelwise_mul_cell_state2; NEFullyConnectedLayer _fully_connected_output; - NEGEMM _gemm_output; NEPixelWiseMultiplicationKernel _pixelwise_mul_output_state1; - NETransposeKernel _transpose_output; - NEArithmeticAdditionKernel _accum_output1; - NEArithmeticAddition _accum_output2; + NEArithmeticAddition _accum_output1; NEActivationLayerKernel _activation_output; NEActivationLayerKernel _activation_output_state; NEPixelWiseMultiplicationKernel _pixelwise_mul_output_state2; NEFullyConnectedLayer _fully_connected_output_state; - NEGEMM _gemm_output_state; - NEArithmeticAdditionKernel _accum_output_state; NEActivationLayerKernel _projection_clip; NECopyKernel _copy_cell_state; NECopyKernel _copy_output; @@ -181,6 +178,18 @@ private: NEConcatenateLayer _concat_weights_forget_gate; NEConcatenateLayer _concat_weights_input_gate; NEConcatenateLayer _concat_weights_output; + NEMeanStdDevNormalizationLayer _mean_std_norm_input_gate; + NEPixelWiseMultiplicationKernel _pixelwise_mul_input_gate_coeff; + NEArithmeticAdditionKernel _accum_input_gate_bias; + NEMeanStdDevNormalizationLayer _mean_std_norm_forget_gate; + NEPixelWiseMultiplicationKernel _pixelwise_mul_forget_gate_coeff; + NEArithmeticAdditionKernel _accum_forget_gate_bias; + NEMeanStdDevNormalizationLayer _mean_std_norm_cell_gate; + NEPixelWiseMultiplicationKernel _pixelwise_mul_cell_gate_coeff; + NEArithmeticAdditionKernel _accum_cell_gate_bias; + NEMeanStdDevNormalizationLayer _mean_std_norm_output_gate; + NEPixelWiseMultiplicationKernel _pixelwise_mul_output_gate_coeff; + NEArithmeticAdditionKernel _accum_output_gate_bias; Tensor _input_gate_out1; Tensor _input_gate_out2; Tensor _input_gate_out3; @@ -203,12 +212,21 @@ private: Tensor _cell_state_activation; Tensor _output_state1; Tensor _ones; + Tensor _input_layer_norm_out1; + Tensor _input_layer_norm_out2; + Tensor _forget_layer_norm_out1; + Tensor _forget_layer_norm_out2; + Tensor _cell_layer_norm_out1; + Tensor _cell_layer_norm_out2; + Tensor _output_layer_norm_out1; + Tensor _output_layer_norm_out2; bool _run_peephole_opt; bool _run_cifg_opt; bool _perform_cell_clipping; bool _has_projection_weights; bool _perform_projection_clipping; bool _is_prepared; + bool _is_layer_norm_lstm; }; } // namespace arm_compute #endif /* __ARM_COMPUTE_NELSTMLAYER_H__ */ -- cgit v1.2.1