diff options
Diffstat (limited to 'arm_compute/runtime/NEON/functions/NELSTMLayer.h')
-rw-r--r-- | arm_compute/runtime/NEON/functions/NELSTMLayer.h | 254 |
1 files changed, 149 insertions, 105 deletions
diff --git a/arm_compute/runtime/NEON/functions/NELSTMLayer.h b/arm_compute/runtime/NEON/functions/NELSTMLayer.h index e85e87b88e..629c5d10a0 100644 --- a/arm_compute/runtime/NEON/functions/NELSTMLayer.h +++ b/arm_compute/runtime/NEON/functions/NELSTMLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 ARM Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,19 +24,18 @@ #ifndef ARM_COMPUTE_NELSTMLAYER_H #define ARM_COMPUTE_NELSTMLAYER_H -#include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h" -#include "arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h" -#include "arm_compute/core/NEON/kernels/NECopyKernel.h" -#include "arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h" - #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/common/LSTMParams.h" +#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" #include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" +#include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h" #include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h" +#include "arm_compute/runtime/NEON/functions/NECopy.h" #include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h" #include "arm_compute/runtime/NEON/functions/NEGEMM.h" #include "arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h" -#include "arm_compute/runtime/common/LSTMParams.h" +#include "arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h" +#include "arm_compute/runtime/NEON/functions/NETranspose.h" namespace arm_compute { @@ -49,8 +48,27 @@ class NELSTMLayer : public IFunction public: /** Default constructor */ NELSTMLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELSTMLayer(const NELSTMLayer &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELSTMLayer &operator=(const NELSTMLayer &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NELSTMLayer(NELSTMLayer &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NELSTMLayer &operator=(NELSTMLayer &&) = delete; + /** Default destructor */ + ~NELSTMLayer(); /** Initialize function's tensors. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 - src13 | dst0 - dst3 | + * |:------------|:------------| + * |F16 |F16 | + * |F32 |F32 | + * * @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: F16/F32. * @param[in] input_to_forget_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input. * @param[in] input_to_cell_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input. @@ -86,13 +104,26 @@ public: * @param[in] projection_threshold The clipping threshold for the output from the projection layer, such that values are bound within [-proj_clip, proj_clip]. * If set to 0.0 then clipping is disabled. */ - void configure(const ITensor *input, - const ITensor *input_to_forget_weights, const ITensor *input_to_cell_weights, const ITensor *input_to_output_weights, - const ITensor *recurrent_to_forget_weights, const ITensor *recurrent_to_cell_weights, const ITensor *recurrent_to_output_weights, - const ITensor *forget_gate_bias, const ITensor *cell_bias, const ITensor *output_gate_bias, - const ITensor *output_state_in, const ITensor *cell_state_in, - ITensor *scratch_buffer, ITensor *output_state_out, ITensor *cell_state_out, ITensor *output, - const LSTMParams<ITensor> &lstm_params, const ActivationLayerInfo &activation_info, float cell_threshold = 0.f, float projection_threshold = 0.f); + void configure(const ITensor *input, + const ITensor *input_to_forget_weights, + const ITensor *input_to_cell_weights, + const ITensor *input_to_output_weights, + const ITensor *recurrent_to_forget_weights, + const ITensor *recurrent_to_cell_weights, + const ITensor *recurrent_to_output_weights, + const ITensor *forget_gate_bias, + const ITensor *cell_bias, + const ITensor *output_gate_bias, + const ITensor *output_state_in, + const ITensor *cell_state_in, + ITensor *scratch_buffer, + ITensor *output_state_out, + ITensor *cell_state_out, + ITensor *output, + const LSTMParams<ITensor> &lstm_params, + const ActivationLayerInfo &activation_info, + float cell_threshold = 0.f, + float projection_threshold = 0.f); /** Static function to check if given info will lead to a valid configuration of @ref NELSTMLayer * @@ -133,102 +164,115 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input, - const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights, - const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights, - const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias, - const ITensorInfo *output_state_in, const ITensorInfo *cell_state_in, - const ITensorInfo *scratch_buffer, const ITensorInfo *output_state_out, const ITensorInfo *cell_state_out, const ITensorInfo *output, - const LSTMParams<ITensorInfo> &lstm_params, const ActivationLayerInfo &activation_info, float cell_threshold = 0.f, float projection_threshold = 0.f); + static Status validate(const ITensorInfo *input, + const ITensorInfo *input_to_forget_weights, + const ITensorInfo *input_to_cell_weights, + const ITensorInfo *input_to_output_weights, + const ITensorInfo *recurrent_to_forget_weights, + const ITensorInfo *recurrent_to_cell_weights, + const ITensorInfo *recurrent_to_output_weights, + const ITensorInfo *forget_gate_bias, + const ITensorInfo *cell_bias, + const ITensorInfo *output_gate_bias, + const ITensorInfo *output_state_in, + const ITensorInfo *cell_state_in, + const ITensorInfo *scratch_buffer, + const ITensorInfo *output_state_out, + const ITensorInfo *cell_state_out, + const ITensorInfo *output, + const LSTMParams<ITensorInfo> &lstm_params, + const ActivationLayerInfo &activation_info, + float cell_threshold = 0.f, + float projection_threshold = 0.f); // Inherited methods overridden: void run() override; void prepare() override; private: - MemoryGroup _memory_group; - NEFullyConnectedLayer _fully_connected_input_gate; - NEArithmeticAddition _accum_input_gate1; - NEArithmeticSubtractionKernel _subtract_input_gate; - NEPixelWiseMultiplicationKernel _pixelwise_mul_input_gate; - NEActivationLayerKernel _activation_input_gate; - NEFullyConnectedLayer _fully_connected_forget_gate; - NEArithmeticAddition _accum_forget_gate1; - NEPixelWiseMultiplicationKernel _pixelwise_mul_forget_gate; - NEActivationLayerKernel _activation_forget_gate; - NEFullyConnectedLayer _fully_connected_cell_state; - NEGEMM _gemm_cell_state1; - NETransposeKernel _transpose_cell_state; - NEArithmeticAdditionKernel _accum_cell_state1; - NEArithmeticAdditionKernel _accum_cell_state2; - NEPixelWiseMultiplicationKernel _pixelwise_mul_cell_state1; - NEActivationLayerKernel _activation_cell_state; - NEActivationLayerKernel _cell_clip; - NEPixelWiseMultiplicationKernel _pixelwise_mul_cell_state2; - NEFullyConnectedLayer _fully_connected_output; - NEPixelWiseMultiplicationKernel _pixelwise_mul_output_state1; - NEArithmeticAddition _accum_output1; - NEActivationLayerKernel _activation_output; - NEActivationLayerKernel _activation_output_state; - NEPixelWiseMultiplicationKernel _pixelwise_mul_output_state2; - NEFullyConnectedLayer _fully_connected_output_state; - NEActivationLayerKernel _projection_clip; - NECopyKernel _copy_cell_state; - NECopyKernel _copy_output; - NEConcatenateLayer _concat_scratch_buffer; - NEConcatenateLayer _concat_inputs_forget_gate; - NEConcatenateLayer _concat_weights_forget_gate; - NEConcatenateLayer _concat_weights_input_gate; - NEConcatenateLayer _concat_weights_output; - NEMeanStdDevNormalizationLayer _mean_std_norm_input_gate; - NEPixelWiseMultiplicationKernel _pixelwise_mul_input_gate_coeff; - NEArithmeticAdditionKernel _accum_input_gate_bias; - NEMeanStdDevNormalizationLayer _mean_std_norm_forget_gate; - NEPixelWiseMultiplicationKernel _pixelwise_mul_forget_gate_coeff; - NEArithmeticAdditionKernel _accum_forget_gate_bias; - NEMeanStdDevNormalizationLayer _mean_std_norm_cell_gate; - NEPixelWiseMultiplicationKernel _pixelwise_mul_cell_gate_coeff; - NEArithmeticAdditionKernel _accum_cell_gate_bias; - NEMeanStdDevNormalizationLayer _mean_std_norm_output_gate; - NEPixelWiseMultiplicationKernel _pixelwise_mul_output_gate_coeff; - NEArithmeticAdditionKernel _accum_output_gate_bias; - Tensor _input_gate_out1; - Tensor _input_gate_out2; - Tensor _input_gate_out3; - Tensor _input_gate_out4; - Tensor _forget_gate_out1; - Tensor _forget_gate_out2; - Tensor _forget_gate_out3; - Tensor _forget_gate_out4; - Tensor _forget_gate_out5; - Tensor _forget_gate_out6; - Tensor _cell_state_out1; - Tensor _cell_state_out2; - Tensor _cell_state_out3; - Tensor _cell_state_out4; - Tensor _cell_state_out5; - Tensor _output1; - Tensor _output2; - Tensor _output3; - Tensor _output4; - Tensor _cell_state_activation; - Tensor _output_state1; - Tensor _ones; - Tensor _input_layer_norm_out1; - Tensor _input_layer_norm_out2; - Tensor _forget_layer_norm_out1; - Tensor _forget_layer_norm_out2; - Tensor _cell_layer_norm_out1; - Tensor _cell_layer_norm_out2; - Tensor _output_layer_norm_out1; - Tensor _output_layer_norm_out2; - bool _run_peephole_opt; - bool _run_cifg_opt; - bool _perform_cell_clipping; - bool _has_projection_weights; - bool _perform_projection_clipping; - bool _is_prepared; - bool _is_layer_norm_lstm; + MemoryGroup _memory_group; + NEFullyConnectedLayer _fully_connected_input_gate; + NEArithmeticAddition _accum_input_gate1; + NEArithmeticSubtraction _subtract_input_gate; + NEPixelWiseMultiplication _pixelwise_mul_input_gate; + NEActivationLayer _activation_input_gate; + NEFullyConnectedLayer _fully_connected_forget_gate; + NEArithmeticAddition _accum_forget_gate1; + NEPixelWiseMultiplication _pixelwise_mul_forget_gate; + NEActivationLayer _activation_forget_gate; + NEFullyConnectedLayer _fully_connected_cell_state; + NEGEMM _gemm_cell_state1; + NETranspose _transpose_cell_state; + NEArithmeticAddition _accum_cell_state1; + NEArithmeticAddition _accum_cell_state2; + NEPixelWiseMultiplication _pixelwise_mul_cell_state1; + NEActivationLayer _activation_cell_state; + NEActivationLayer _cell_clip; + NEPixelWiseMultiplication _pixelwise_mul_cell_state2; + NEFullyConnectedLayer _fully_connected_output; + NEPixelWiseMultiplication _pixelwise_mul_output_state1; + NEArithmeticAddition _accum_output1; + NEActivationLayer _activation_output; + NEActivationLayer _activation_output_state; + NEPixelWiseMultiplication _pixelwise_mul_output_state2; + NEFullyConnectedLayer _fully_connected_output_state; + NEActivationLayer _projection_clip; + NECopy _copy_cell_state; + NECopy _copy_output; + NEConcatenateLayer _concat_scratch_buffer; + NEConcatenateLayer _concat_inputs_forget_gate; + NEConcatenateLayer _concat_weights_forget_gate; + NEConcatenateLayer _concat_weights_input_gate; + NEConcatenateLayer _concat_weights_output; + NEMeanStdDevNormalizationLayer _mean_std_norm_input_gate; + NEPixelWiseMultiplication _pixelwise_mul_input_gate_coeff; + NEArithmeticAddition _accum_input_gate_bias; + NEMeanStdDevNormalizationLayer _mean_std_norm_forget_gate; + NEPixelWiseMultiplication _pixelwise_mul_forget_gate_coeff; + NEArithmeticAddition _accum_forget_gate_bias; + NEMeanStdDevNormalizationLayer _mean_std_norm_cell_gate; + NEPixelWiseMultiplication _pixelwise_mul_cell_gate_coeff; + NEArithmeticAddition _accum_cell_gate_bias; + NEMeanStdDevNormalizationLayer _mean_std_norm_output_gate; + NEPixelWiseMultiplication _pixelwise_mul_output_gate_coeff; + NEArithmeticAddition _accum_output_gate_bias; + Tensor _input_gate_out1; + Tensor _input_gate_out2; + Tensor _input_gate_out3; + Tensor _input_gate_out4; + Tensor _forget_gate_out1; + Tensor _forget_gate_out2; + Tensor _forget_gate_out3; + Tensor _forget_gate_out4; + Tensor _forget_gate_out5; + Tensor _forget_gate_out6; + Tensor _cell_state_out1; + Tensor _cell_state_out2; + Tensor _cell_state_out3; + Tensor _cell_state_out4; + Tensor _cell_state_out5; + Tensor _output1; + Tensor _output2; + Tensor _output3; + Tensor _output4; + Tensor _cell_state_activation; + Tensor _output_state1; + Tensor _ones; + Tensor _input_layer_norm_out1; + Tensor _input_layer_norm_out2; + Tensor _forget_layer_norm_out1; + Tensor _forget_layer_norm_out2; + Tensor _cell_layer_norm_out1; + Tensor _cell_layer_norm_out2; + Tensor _output_layer_norm_out1; + Tensor _output_layer_norm_out2; + bool _run_peephole_opt; + bool _run_cifg_opt; + bool _perform_cell_clipping; + bool _has_projection_weights; + bool _perform_projection_clipping; + bool _is_prepared; + bool _is_layer_norm_lstm; }; } // namespace arm_compute #endif /* ARM_COMPUTE_NELSTMLAYER_H */ |