diff options
Diffstat (limited to 'arm_compute/runtime/NEON')
-rw-r--r-- | arm_compute/runtime/NEON/functions/NEQLSTMLayer.h | 47 |
1 files changed, 45 insertions, 2 deletions
diff --git a/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h b/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h index 0553e4f266..9eb0654cfe 100644 --- a/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h +++ b/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h @@ -178,7 +178,8 @@ private: Output, Count }; - static constexpr uint8_t _layer_norm_count = static_cast<uint8_t>(LayerNormGate::Count); + static constexpr uint8_t _layer_norm_count = static_cast<uint8_t>(LayerNormGate::Count); + static constexpr uint32_t _out_state_output_size_dimension_idx = 0; /** Internal method to configure matrix multiplication plus output stage of each gate. * @@ -201,6 +202,35 @@ private: MemoryGroup _memory_group{}; + /** A small internel kernel do the copy between two tensors */ + class TensorCopyKernel + { + static constexpr uint32_t max_dimension_supported = 2; + + ITensor *_src{ nullptr }; + ITensor *_dst{ nullptr }; + size_t _row_size{}; + Window _window{}; + + public: + /** Static function to check if given info will lead to a valid configuration of @ref NEQLSTMLayer::TensorCopyKernel + * + * @param[in] src Source tensor info. + * @param[in] dst Destination tensor info + * + * @return a status + */ + static Status validate(const ITensorInfo &src, const ITensorInfo &dst); + /** Set the input and output tensors. + * + * @param[in] src Source tensor + * @param[out] dst Destination tensor + */ + void configure(ITensor &src, ITensor &dst); + /** run the kernel */ + void run(); + }; + // Functions used NETranspose _transpose_input_to_forget_weights{}; NETranspose _transpose_input_to_cell_weights{}; @@ -245,7 +275,7 @@ private: NEPixelWiseMultiplicationKernel _pixelwise_mul_cell_to_input{}; NEGEMMLowpOutputStage _cell_to_input_outstage{}; NEArithmeticAdditionKernel _accumulate_cell_input{}; - NEActivationLayer _input_gate_tanh{}; + NEActivationLayer _input_gate_sigmoid{}; NEPixelWiseMultiplicationKernel _pixelwise_mul_forget_cell{}; NEPixelWiseMultiplicationKernel _pixelwise_mul_input_cell{}; NEArithmeticAdditionKernel _add_forget_cell{}; @@ -256,6 +286,7 @@ private: NEGEMMLowpOutputStage _recurrent_to_output_outstage{}; NEArithmeticAdditionKernel _accumulate_input_recurrent_output{}; NEPixelWiseMultiplicationKernel _pixelwise_mul_cell_to_output{}; + NEGEMMLowpOutputStage _cell_to_output_outstage{}; NEArithmeticAdditionKernel _accumulate_cell_to_output{}; NEActivationLayer _output_gate_sigmoid{}; NEActivationLayer _hidden_tanh{}; @@ -265,6 +296,12 @@ private: NEGEMMLowpOutputStage _projection_outstage{}; NEArithmeticAdditionKernel _accumulate_projection{}; NEActivationLayer _projection_clip{}; + + TensorCopyKernel _projection_bias_copy{}; + TensorCopyKernel _projection_output_to_accumulate_copy{}; + TensorCopyKernel _projection_accumulate_to_output_copy{}; + TensorCopyKernel _hidden_to_output_copy{}; + std::array<NEQLSTMLayerNormalizationKernel, _layer_norm_count> _layer_norms{ {} }; // Tensor pointers @@ -375,11 +412,16 @@ private: Tensor _input_to_output_outstage_res{ nullptr }; Tensor _mm_recurrent_to_output_res{ nullptr }; Tensor _mul_cell_to_output_res{ nullptr }; + Tensor _cell_to_output_outstage_res{ nullptr }; Tensor _recurrent_to_output_outstage_res{ nullptr }; Tensor _output_gate{ nullptr }; Tensor _hidden_mul_res{ nullptr }; + Tensor _hidden_gate{ nullptr }; Tensor _mm_projection_res{ nullptr }; Tensor _projection_outstage_res{ nullptr }; + Tensor _projection_out_res{ nullptr }; + Tensor _projection_eff_bias_adjusted{ nullptr }; + Tensor _projection_accumulate_res{ nullptr }; Tensor _ones{ nullptr }; std::array<Tensor, _layer_norm_count> _layer_norm_output{ {} }; @@ -395,6 +437,7 @@ private: bool _has_projection_clipping{ false }; bool _has_peephole{ false }; bool _has_layer_norm{ false }; + bool _projection_tensor_copy_required{ false }; }; } // namespace arm_compute #endif /* ARM_COMPUTE_NEQLSTMLAYER_H */ |