diff options
Diffstat (limited to 'arm_compute/runtime/CL/functions/CLQLSTMLayer.h')
-rw-r--r-- | arm_compute/runtime/CL/functions/CLQLSTMLayer.h | 380 |
1 files changed, 209 insertions, 171 deletions
diff --git a/arm_compute/runtime/CL/functions/CLQLSTMLayer.h b/arm_compute/runtime/CL/functions/CLQLSTMLayer.h index bd00d56468..3e76da086f 100644 --- a/arm_compute/runtime/CL/functions/CLQLSTMLayer.h +++ b/arm_compute/runtime/CL/functions/CLQLSTMLayer.h @@ -32,7 +32,6 @@ #include "arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h" #include "arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h" #include "arm_compute/runtime/CL/functions/CLTranspose.h" - #include "arm_compute/runtime/common/LSTMParams.h" namespace arm_compute @@ -40,9 +39,15 @@ namespace arm_compute // Forward declarations class CLCompileContext; class ICLTensor; -class CLGEMMLowpMatrixAReductionKernel; class CLQLSTMLayerNormalizationKernel; class ITensorInfo; +namespace opencl +{ +namespace kernels +{ +class ClGemmLowpMatrixAReductionKernel; +} // namespace kernels +} // namespace opencl /** Basic function to run @ref CLQLSTMLayer * @@ -52,8 +57,8 @@ class ITensorInfo; * -# @ref CLCopy Copy function for copying output_state_out to output * -# @ref CLArithmeticAddition Elementwise addition and subtraction * -# @ref CLGEMMLowpMatrixMultiplyCore Quantized matrix multiplication core. Accumulators are 32-bit integers - * -# @ref CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint Convert 32-bit integers into QSYMM16 - * -# @ref CLGEMMLowpMatrixAReductionKernel For precomputing effective biases to use + * -# @ref CLGEMMLowpOutputStage Convert 32-bit integers into QSYMM16 + * -# @ref opencl::kernels::ClGemmLowpMatrixAReductionKernel For precomputing effective biases to use * -# @ref CLPixelWiseMultiplication Elementwise multiplication * -# @ref CLTranspose Transpose function for reshaping the weights * */ @@ -121,12 +126,21 @@ public: * projection_threshold (Optional) The clipping threshold for the output from the projection layer, such that values are bound within * [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled. */ - void configure(const ICLTensor *input, - const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights, - const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights, - const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias, - ICLTensor *cell_state_in, ICLTensor *output_state_in, - ICLTensor *cell_state_out, ICLTensor *output_state_out, ICLTensor *output, + void configure(const ICLTensor *input, + const ICLTensor *input_to_forget_weights, + const ICLTensor *input_to_cell_weights, + const ICLTensor *input_to_output_weights, + const ICLTensor *recurrent_to_forget_weights, + const ICLTensor *recurrent_to_cell_weights, + const ICLTensor *recurrent_to_output_weights, + const ICLTensor *forget_gate_bias, + const ICLTensor *cell_bias, + const ICLTensor *output_gate_bias, + ICLTensor *cell_state_in, + ICLTensor *output_state_in, + ICLTensor *cell_state_out, + ICLTensor *output_state_out, + ICLTensor *output, const LSTMParams<ICLTensor> &lstm_params); /** Initialize function's tensors. @@ -171,12 +185,22 @@ public: * projection_threshold (Optional) The clipping threshold for the output from the projection layer, such that values are bound within * [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled. */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, - const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights, - const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights, - const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias, - ICLTensor *cell_state_in, ICLTensor *output_state_in, - ICLTensor *cell_state_out, ICLTensor *output_state_out, ICLTensor *output, + void configure(const CLCompileContext &compile_context, + const ICLTensor *input, + const ICLTensor *input_to_forget_weights, + const ICLTensor *input_to_cell_weights, + const ICLTensor *input_to_output_weights, + const ICLTensor *recurrent_to_forget_weights, + const ICLTensor *recurrent_to_cell_weights, + const ICLTensor *recurrent_to_output_weights, + const ICLTensor *forget_gate_bias, + const ICLTensor *cell_bias, + const ICLTensor *output_gate_bias, + ICLTensor *cell_state_in, + ICLTensor *output_state_in, + ICLTensor *cell_state_out, + ICLTensor *output_state_out, + ICLTensor *output, const LSTMParams<ICLTensor> &lstm_params); /** Static function to check if given info will lead to a valid configuration of @ref CLQLSTMLayer @@ -221,12 +245,21 @@ public: * [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled. * @return a status */ - static Status validate(const ITensorInfo *input, - const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights, - const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights, - const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias, - const ITensorInfo *cell_state_in, const ITensorInfo *output_state_in, - const ITensorInfo *cell_state_out, const ITensorInfo *output_state_out, const ITensorInfo *output, + static Status validate(const ITensorInfo *input, + const ITensorInfo *input_to_forget_weights, + const ITensorInfo *input_to_cell_weights, + const ITensorInfo *input_to_output_weights, + const ITensorInfo *recurrent_to_forget_weights, + const ITensorInfo *recurrent_to_cell_weights, + const ITensorInfo *recurrent_to_output_weights, + const ITensorInfo *forget_gate_bias, + const ITensorInfo *cell_bias, + const ITensorInfo *output_gate_bias, + const ITensorInfo *cell_state_in, + const ITensorInfo *output_state_in, + const ITensorInfo *cell_state_out, + const ITensorInfo *output_state_out, + const ITensorInfo *output, const LSTMParams<ITensorInfo> &lstm_params); // Inherited methods overridden: @@ -260,10 +293,18 @@ private: * @param[in] mm_res_info Tensor info to be used to initialize output stage result tensor. * */ - void configure_mm(const CLCompileContext &compile_context, CLGEMMLowpMatrixMultiplyCore &mm, CLGEMMLowpOutputStage &outstage, GEMMLowpOutputStageInfo &gemmlowp_info, - const ICLTensor *mm_input, const ICLTensor *mm_weights, const ICLTensor *bias, CLTensor *mm_res, - CLTensor *outstage_res, float gemmlowp_scale, - const TensorInfo &mm_res_info, const TensorInfo &outstage_tensor_info); + void configure_mm(const CLCompileContext &compile_context, + CLGEMMLowpMatrixMultiplyCore &mm, + CLGEMMLowpOutputStage &outstage, + GEMMLowpOutputStageInfo &gemmlowp_info, + const ICLTensor *mm_input, + const ICLTensor *mm_weights, + const ICLTensor *bias, + CLTensor *mm_res, + CLTensor *outstage_res, + float gemmlowp_scale, + const TensorInfo &mm_res_info, + const TensorInfo &outstage_tensor_info); MemoryGroup _memory_group{}; @@ -272,8 +313,8 @@ private: { static constexpr uint32_t max_dimension_supported = 2; - ICLTensor *_src{ nullptr }; - ICLTensor *_dst{ nullptr }; + ICLTensor *_src{nullptr}; + ICLTensor *_dst{nullptr}; size_t _row_size{}; Window _window{}; @@ -297,72 +338,72 @@ private: }; // Functions used - CLTranspose _transpose_input_to_forget_weights{}; - CLTranspose _transpose_input_to_cell_weights{}; - CLTranspose _transpose_input_to_output_weights{}; - CLTranspose _transpose_input_to_input_weights{}; - CLTranspose _transpose_recurrent_to_forget_weights{}; - CLTranspose _transpose_recurrent_to_cell_weights{}; - CLTranspose _transpose_recurrent_to_output_weights{}; - CLTranspose _transpose_recurrent_to_input_weights{}; - CLTranspose _transpose_projection_weights{}; - std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _input_to_input_reduction; - std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _recurrent_to_input_reduction; - std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _input_to_forget_reduction; - std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _recurrent_to_forget_reduction; - std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _input_to_cell_reduction; - std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _recurrent_to_cell_reduction; - std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _input_to_output_reduction; - std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _recurrent_to_output_reduction; - std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _projection_reduction; - CLArithmeticAddition _projection_bias_add{}; - CLGEMMLowpMatrixMultiplyCore _mm_input_to_forget{}; - CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_forget{}; - CLPixelWiseMultiplication _pixelwise_mul_cell_to_forget{}; - CLGEMMLowpOutputStage _input_to_forget_outstage{}; - CLGEMMLowpOutputStage _recurrent_to_forget_outstage{}; - CLGEMMLowpOutputStage _cell_to_forget_outstage{}; - CLArithmeticAddition _accumulate_input_recurrent_forget{}; - CLArithmeticAddition _accumulate_cell_forget{}; - CLActivationLayer _forget_gate_sigmoid{}; - CLGEMMLowpMatrixMultiplyCore _mm_input_to_cell{}; - CLGEMMLowpOutputStage _input_to_cell_outstage{}; - CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_cell{}; - CLGEMMLowpOutputStage _recurrent_to_cell_outstage{}; - CLArithmeticAddition _accumulate_input_recurrent_modulation{}; - CLActivationLayer _cell_gate_tanh{}; - CLArithmeticSubtraction _input_gate_sub{}; - CLGEMMLowpMatrixMultiplyCore _mm_input_to_input{}; - CLGEMMLowpOutputStage _input_to_input_outstage{}; - CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_input{}; - CLGEMMLowpOutputStage _recurrent_to_input_outstage{}; - CLArithmeticAddition _accumulate_input_recurrent_input{}; - CLPixelWiseMultiplication _pixelwise_mul_cell_to_input{}; - CLGEMMLowpOutputStage _cell_to_input_outstage{}; - CLArithmeticAddition _accumulate_cell_input{}; - CLActivationLayer _input_gate_sigmoid{}; - CLPixelWiseMultiplication _pixelwise_mul_forget_cell{}; - CLPixelWiseMultiplication _pixelwise_mul_input_cell{}; - CLArithmeticAddition _add_forget_cell{}; - CLActivationLayer _cell_clip{}; - CLGEMMLowpMatrixMultiplyCore _mm_input_to_output{}; - CLGEMMLowpOutputStage _input_to_output_outstage{}; - CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_output{}; - CLGEMMLowpOutputStage _recurrent_to_output_outstage{}; - CLArithmeticAddition _accumulate_input_recurrent_output{}; - CLPixelWiseMultiplication _pixelwise_mul_cell_to_output{}; - CLGEMMLowpOutputStage _cell_to_output_outstage{}; - CLArithmeticAddition _accumulate_cell_to_output{}; - CLActivationLayer _output_gate_sigmoid{}; - CLActivationLayer _hidden_tanh{}; - CLPixelWiseMultiplication _pixelwise_mul_hidden{}; - CLGEMMLowpOutputStage _hidden_outstage{}; - CLGEMMLowpMatrixMultiplyCore _mm_projection{}; - CLGEMMLowpOutputStage _projection_outstage{}; - CLArithmeticAddition _accumulate_projection{}; - CLActivationLayer _projection_clip{}; + CLTranspose _transpose_input_to_forget_weights{}; + CLTranspose _transpose_input_to_cell_weights{}; + CLTranspose _transpose_input_to_output_weights{}; + CLTranspose _transpose_input_to_input_weights{}; + CLTranspose _transpose_recurrent_to_forget_weights{}; + CLTranspose _transpose_recurrent_to_cell_weights{}; + CLTranspose _transpose_recurrent_to_output_weights{}; + CLTranspose _transpose_recurrent_to_input_weights{}; + CLTranspose _transpose_projection_weights{}; + std::unique_ptr<opencl::kernels::ClGemmLowpMatrixAReductionKernel> _input_to_input_reduction; + std::unique_ptr<opencl::kernels::ClGemmLowpMatrixAReductionKernel> _recurrent_to_input_reduction; + std::unique_ptr<opencl::kernels::ClGemmLowpMatrixAReductionKernel> _input_to_forget_reduction; + std::unique_ptr<opencl::kernels::ClGemmLowpMatrixAReductionKernel> _recurrent_to_forget_reduction; + std::unique_ptr<opencl::kernels::ClGemmLowpMatrixAReductionKernel> _input_to_cell_reduction; + std::unique_ptr<opencl::kernels::ClGemmLowpMatrixAReductionKernel> _recurrent_to_cell_reduction; + std::unique_ptr<opencl::kernels::ClGemmLowpMatrixAReductionKernel> _input_to_output_reduction; + std::unique_ptr<opencl::kernels::ClGemmLowpMatrixAReductionKernel> _recurrent_to_output_reduction; + std::unique_ptr<opencl::kernels::ClGemmLowpMatrixAReductionKernel> _projection_reduction; + CLArithmeticAddition _projection_bias_add{}; + CLGEMMLowpMatrixMultiplyCore _mm_input_to_forget{}; + CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_forget{}; + CLPixelWiseMultiplication _pixelwise_mul_cell_to_forget{}; + CLGEMMLowpOutputStage _input_to_forget_outstage{}; + CLGEMMLowpOutputStage _recurrent_to_forget_outstage{}; + CLGEMMLowpOutputStage _cell_to_forget_outstage{}; + CLArithmeticAddition _accumulate_input_recurrent_forget{}; + CLArithmeticAddition _accumulate_cell_forget{}; + CLActivationLayer _forget_gate_sigmoid{}; + CLGEMMLowpMatrixMultiplyCore _mm_input_to_cell{}; + CLGEMMLowpOutputStage _input_to_cell_outstage{}; + CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_cell{}; + CLGEMMLowpOutputStage _recurrent_to_cell_outstage{}; + CLArithmeticAddition _accumulate_input_recurrent_modulation{}; + CLActivationLayer _cell_gate_tanh{}; + CLArithmeticSubtraction _input_gate_sub{}; + CLGEMMLowpMatrixMultiplyCore _mm_input_to_input{}; + CLGEMMLowpOutputStage _input_to_input_outstage{}; + CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_input{}; + CLGEMMLowpOutputStage _recurrent_to_input_outstage{}; + CLArithmeticAddition _accumulate_input_recurrent_input{}; + CLPixelWiseMultiplication _pixelwise_mul_cell_to_input{}; + CLGEMMLowpOutputStage _cell_to_input_outstage{}; + CLArithmeticAddition _accumulate_cell_input{}; + CLActivationLayer _input_gate_sigmoid{}; + CLPixelWiseMultiplication _pixelwise_mul_forget_cell{}; + CLPixelWiseMultiplication _pixelwise_mul_input_cell{}; + CLArithmeticAddition _add_forget_cell{}; + CLActivationLayer _cell_clip{}; + CLGEMMLowpMatrixMultiplyCore _mm_input_to_output{}; + CLGEMMLowpOutputStage _input_to_output_outstage{}; + CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_output{}; + CLGEMMLowpOutputStage _recurrent_to_output_outstage{}; + CLArithmeticAddition _accumulate_input_recurrent_output{}; + CLPixelWiseMultiplication _pixelwise_mul_cell_to_output{}; + CLGEMMLowpOutputStage _cell_to_output_outstage{}; + CLArithmeticAddition _accumulate_cell_to_output{}; + CLActivationLayer _output_gate_sigmoid{}; + CLActivationLayer _hidden_tanh{}; + CLPixelWiseMultiplication _pixelwise_mul_hidden{}; + CLGEMMLowpOutputStage _hidden_outstage{}; + CLGEMMLowpMatrixMultiplyCore _mm_projection{}; + CLGEMMLowpOutputStage _projection_outstage{}; + CLArithmeticAddition _accumulate_projection{}; + CLActivationLayer _projection_clip{}; std::array<std::unique_ptr<CLQLSTMLayerNormalizationKernel>, _layer_norm_count> _layer_norms; - CLCopy _copy_output; + CLCopy _copy_output; TensorCopyKernel _projection_bias_copy{}; TensorCopyKernel _projection_output_to_accumulate_copy{}; @@ -370,21 +411,18 @@ private: TensorCopyKernel _hidden_to_output_copy{}; // Tensor pointers - const ICLTensor *_input_to_input_weights - { - nullptr - }; - const ICLTensor *_recurrent_to_input_weights{ nullptr }; - const ICLTensor *_projection_bias{ nullptr }; - const ICLTensor *_input_to_forget_weights{ nullptr }; - const ICLTensor *_input_to_cell_weights{ nullptr }; - const ICLTensor *_input_to_output_weights{ nullptr }; - const ICLTensor *_recurrent_to_forget_weights{ nullptr }; - const ICLTensor *_recurrent_to_cell_weights{ nullptr }; - const ICLTensor *_recurrent_to_output_weights{ nullptr }; - const ICLTensor *_projection_weights{ nullptr }; - std::array<const ICLTensor *, _layer_norm_count> _layer_norm_weights{ {} }; - std::array<const ICLTensor *, _layer_norm_count> _layer_norm_bias{ {} }; + const ICLTensor *_input_to_input_weights{nullptr}; + const ICLTensor *_recurrent_to_input_weights{nullptr}; + const ICLTensor *_projection_bias{nullptr}; + const ICLTensor *_input_to_forget_weights{nullptr}; + const ICLTensor *_input_to_cell_weights{nullptr}; + const ICLTensor *_input_to_output_weights{nullptr}; + const ICLTensor *_recurrent_to_forget_weights{nullptr}; + const ICLTensor *_recurrent_to_cell_weights{nullptr}; + const ICLTensor *_recurrent_to_output_weights{nullptr}; + const ICLTensor *_projection_weights{nullptr}; + std::array<const ICLTensor *, _layer_norm_count> _layer_norm_weights{{}}; + std::array<const ICLTensor *, _layer_norm_count> _layer_norm_bias{{}}; using LayerNormIndexType = typename std::underlying_type<LayerNormGate>::type; inline LayerNormIndexType getGateIndex(LayerNormGate g) @@ -417,78 +455,78 @@ private: return *_layer_norms[getGateIndex(g)]; } - inline void configure_layer_norm(LayerNormGate g, const ICLTensor *in); + inline void configure_layer_norm(LayerNormGate g, const ICLTensor *in); inline static Status validate_layer_norm(const ITensorInfo &in, const ITensorInfo &weight, const ITensorInfo &bias); // Temporary tensors - CLTensor _input_to_forget_weights_transposed{ nullptr }; - CLTensor _input_to_cell_weights_transposed{ nullptr }; - CLTensor _input_to_output_weights_transposed{ nullptr }; - CLTensor _input_to_input_weights_transposed{ nullptr }; - CLTensor _recurrent_to_forget_weights_transposed{ nullptr }; - CLTensor _recurrent_to_cell_weights_transposed{ nullptr }; - CLTensor _recurrent_to_output_weights_transposed{ nullptr }; - CLTensor _recurrent_to_input_weights_transposed{ nullptr }; - CLTensor _projection_weights_transposed{ nullptr }; - CLTensor _input_to_input_eff_bias{ nullptr }; - CLTensor _recurrent_to_input_eff_bias{ nullptr }; - CLTensor _input_to_forget_eff_bias{ nullptr }; - CLTensor _recurrent_to_forget_eff_bias{ nullptr }; - CLTensor _input_to_cell_eff_bias{ nullptr }; - CLTensor _recurrent_to_cell_eff_bias{ nullptr }; - CLTensor _input_to_output_eff_bias{ nullptr }; - CLTensor _recurrent_to_output_eff_bias{ nullptr }; - CLTensor _projection_reduction_res{ nullptr }; - CLTensor _projection_eff_bias{ nullptr }; - CLTensor _mm_input_to_forget_res{ nullptr }; - CLTensor _mm_recurrent_to_forget_res{ nullptr }; - CLTensor _mul_cell_to_forget_res{ nullptr }; - CLTensor _input_to_forget_outstage_res{ nullptr }; - CLTensor _cell_to_forget_outstage_res{ nullptr }; - CLTensor _recurrent_to_forget_outstage_res{ nullptr }; - CLTensor _forget_gate{ nullptr }; - CLTensor _mm_input_to_cell_res{ nullptr }; - CLTensor _input_to_cell_outstage_res{ nullptr }; - CLTensor _mm_recurrent_to_cell_res{ nullptr }; - CLTensor _recurrent_to_cell_outstage_res{ nullptr }; - CLTensor _cell_gate{ nullptr }; - CLTensor _mul_input_cell_res{ nullptr }; - CLTensor _mm_input_to_input_res{ nullptr }; - CLTensor _input_to_input_outstage_res{ nullptr }; - CLTensor _mm_recurrent_to_input_res{ nullptr }; - CLTensor _mul_cell_to_input_res{ nullptr }; - CLTensor _cell_to_input_outstage_res{ nullptr }; - CLTensor _recurrent_to_input_outstage_res{ nullptr }; - CLTensor _input_gate{ nullptr }; - CLTensor _mm_input_to_output_res{ nullptr }; - CLTensor _input_to_output_outstage_res{ nullptr }; - CLTensor _mm_recurrent_to_output_res{ nullptr }; - CLTensor _mul_cell_to_output_res{ nullptr }; - CLTensor _cell_to_output_outstage_res{ nullptr }; - CLTensor _recurrent_to_output_outstage_res{ nullptr }; - CLTensor _output_gate{ nullptr }; - CLTensor _hidden_mul_res{ nullptr }; - CLTensor _hidden_gate{ nullptr }; - CLTensor _mm_projection_res{ nullptr }; - CLTensor _projection_outstage_res{ nullptr }; - CLTensor _projection_out_res{ nullptr }; - CLTensor _projection_accumulate_res{ nullptr }; - CLTensor _ones{ nullptr }; - std::array<CLTensor, _layer_norm_count> _layer_norm_output{ {} }; + CLTensor _input_to_forget_weights_transposed{nullptr}; + CLTensor _input_to_cell_weights_transposed{nullptr}; + CLTensor _input_to_output_weights_transposed{nullptr}; + CLTensor _input_to_input_weights_transposed{nullptr}; + CLTensor _recurrent_to_forget_weights_transposed{nullptr}; + CLTensor _recurrent_to_cell_weights_transposed{nullptr}; + CLTensor _recurrent_to_output_weights_transposed{nullptr}; + CLTensor _recurrent_to_input_weights_transposed{nullptr}; + CLTensor _projection_weights_transposed{nullptr}; + CLTensor _input_to_input_eff_bias{nullptr}; + CLTensor _recurrent_to_input_eff_bias{nullptr}; + CLTensor _input_to_forget_eff_bias{nullptr}; + CLTensor _recurrent_to_forget_eff_bias{nullptr}; + CLTensor _input_to_cell_eff_bias{nullptr}; + CLTensor _recurrent_to_cell_eff_bias{nullptr}; + CLTensor _input_to_output_eff_bias{nullptr}; + CLTensor _recurrent_to_output_eff_bias{nullptr}; + CLTensor _projection_reduction_res{nullptr}; + CLTensor _projection_eff_bias{nullptr}; + CLTensor _mm_input_to_forget_res{nullptr}; + CLTensor _mm_recurrent_to_forget_res{nullptr}; + CLTensor _mul_cell_to_forget_res{nullptr}; + CLTensor _input_to_forget_outstage_res{nullptr}; + CLTensor _cell_to_forget_outstage_res{nullptr}; + CLTensor _recurrent_to_forget_outstage_res{nullptr}; + CLTensor _forget_gate{nullptr}; + CLTensor _mm_input_to_cell_res{nullptr}; + CLTensor _input_to_cell_outstage_res{nullptr}; + CLTensor _mm_recurrent_to_cell_res{nullptr}; + CLTensor _recurrent_to_cell_outstage_res{nullptr}; + CLTensor _cell_gate{nullptr}; + CLTensor _mul_input_cell_res{nullptr}; + CLTensor _mm_input_to_input_res{nullptr}; + CLTensor _input_to_input_outstage_res{nullptr}; + CLTensor _mm_recurrent_to_input_res{nullptr}; + CLTensor _mul_cell_to_input_res{nullptr}; + CLTensor _cell_to_input_outstage_res{nullptr}; + CLTensor _recurrent_to_input_outstage_res{nullptr}; + CLTensor _input_gate{nullptr}; + CLTensor _mm_input_to_output_res{nullptr}; + CLTensor _input_to_output_outstage_res{nullptr}; + CLTensor _mm_recurrent_to_output_res{nullptr}; + CLTensor _mul_cell_to_output_res{nullptr}; + CLTensor _cell_to_output_outstage_res{nullptr}; + CLTensor _recurrent_to_output_outstage_res{nullptr}; + CLTensor _output_gate{nullptr}; + CLTensor _hidden_mul_res{nullptr}; + CLTensor _hidden_gate{nullptr}; + CLTensor _mm_projection_res{nullptr}; + CLTensor _projection_outstage_res{nullptr}; + CLTensor _projection_out_res{nullptr}; + CLTensor _projection_accumulate_res{nullptr}; + CLTensor _ones{nullptr}; + std::array<CLTensor, _layer_norm_count> _layer_norm_output{{}}; inline CLTensor &get_layer_norm_output(LayerNormGate g) { return _layer_norm_output[getGateIndex(g)]; } - bool _is_prepared{ false }; - bool _has_cifg{ false }; - bool _has_cell_clipping{ false }; - bool _has_projection{ false }; - bool _has_projection_clipping{ false }; - bool _has_peephole{ false }; - bool _has_layer_norm{ false }; - bool _projection_tensor_copy_required{ false }; + bool _is_prepared{false}; + bool _has_cifg{false}; + bool _has_cell_clipping{false}; + bool _has_projection{false}; + bool _has_projection_clipping{false}; + bool _has_peephole{false}; + bool _has_layer_norm{false}; + bool _projection_tensor_copy_required{false}; }; } // namespace arm_compute #endif /* ARM_COMPUTE_CLQLSTMLAYER_H */ |