diff options
Diffstat (limited to 'arm_compute/runtime/CL/functions/CLLSTMLayerQuantized.h')
-rw-r--r-- | arm_compute/runtime/CL/functions/CLLSTMLayerQuantized.h | 135 |
1 files changed, 84 insertions, 51 deletions
diff --git a/arm_compute/runtime/CL/functions/CLLSTMLayerQuantized.h b/arm_compute/runtime/CL/functions/CLLSTMLayerQuantized.h index 2ef7427a5a..8c116b1482 100644 --- a/arm_compute/runtime/CL/functions/CLLSTMLayerQuantized.h +++ b/arm_compute/runtime/CL/functions/CLLSTMLayerQuantized.h @@ -35,7 +35,6 @@ #include "arm_compute/runtime/CL/functions/CLQuantizationLayer.h" #include "arm_compute/runtime/CL/functions/CLSlice.h" #include "arm_compute/runtime/CL/functions/CLTranspose.h" - #include "arm_compute/runtime/common/LSTMParams.h" namespace arm_compute @@ -47,16 +46,16 @@ class ICLTensor; * * This function calls the following CL functions/kernels: * - * -# @ref CLGEMMLowpMatrixMultiplyCore Quantized matrix multiplication core. Accumulators are 32-bit integers - * -# @ref CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint Convert 32-bit integers into QSYMM16 - * -# @ref CLTranspose Matrix transpose - * -# @ref CLConcatenateLayer Tensor concatenation - * -# @ref CLActivationLayer Activation functions (tanh and logistic) - * -# @ref CLArithmeticAddition Elementwise addition - * -# @ref CLPixelWiseMultiplication Elementwise multiplication - * -# @ref CLSlice Tensor slicing - * -# @ref CLDequantizationLayer Dequantize into float - * -# @ref CLQuantizationLayer Quantize from float + * -# @ref CLGEMMLowpMatrixMultiplyCore Quantized matrix multiplication core. Accumulators are 32-bit integers + * -# @ref CLGEMMLowpOutputStage Convert 32-bit integers into QSYMM16 + * -# @ref CLTranspose Matrix transpose + * -# @ref CLConcatenateLayer Tensor concatenation + * -# @ref CLActivationLayer Activation functions (tanh and logistic) + * -# @ref CLArithmeticAddition Elementwise addition + * -# @ref CLPixelWiseMultiplication Elementwise multiplication + * -# @ref CLSlice Tensor slicing + * -# @ref CLDequantizationLayer Dequantize into float + * -# @ref CLQuantizationLayer Quantize from float * */ class CLLSTMLayerQuantized : public IFunction { @@ -100,11 +99,22 @@ public: * @param[out] output_state_out Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size].Data types supported: Same as @p input. */ void configure(const ICLTensor *input, - const ICLTensor *input_to_input_weights, const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights, - const ICLTensor *recurrent_to_input_weights, const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights, - const ICLTensor *input_gate_bias, const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias, - ICLTensor *cell_state_in, const ICLTensor *output_state_in, - ICLTensor *cell_state_out, ICLTensor *output_state_out); + const ICLTensor *input_to_input_weights, + const ICLTensor *input_to_forget_weights, + const ICLTensor *input_to_cell_weights, + const ICLTensor *input_to_output_weights, + const ICLTensor *recurrent_to_input_weights, + const ICLTensor *recurrent_to_forget_weights, + const ICLTensor *recurrent_to_cell_weights, + const ICLTensor *recurrent_to_output_weights, + const ICLTensor *input_gate_bias, + const ICLTensor *forget_gate_bias, + const ICLTensor *cell_bias, + const ICLTensor *output_gate_bias, + ICLTensor *cell_state_in, + const ICLTensor *output_state_in, + ICLTensor *cell_state_out, + ICLTensor *output_state_out); /** Initialize function's tensors. * * @param[in] compile_context The compile context to be used. @@ -126,12 +136,24 @@ public: * @param[out] cell_state_out Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size]. Data type supported: QSYMM16. * @param[out] output_state_out Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size].Data types supported: Same as @p input. */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, - const ICLTensor *input_to_input_weights, const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights, - const ICLTensor *recurrent_to_input_weights, const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights, - const ICLTensor *input_gate_bias, const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias, - ICLTensor *cell_state_in, const ICLTensor *output_state_in, - ICLTensor *cell_state_out, ICLTensor *output_state_out); + void configure(const CLCompileContext &compile_context, + const ICLTensor *input, + const ICLTensor *input_to_input_weights, + const ICLTensor *input_to_forget_weights, + const ICLTensor *input_to_cell_weights, + const ICLTensor *input_to_output_weights, + const ICLTensor *recurrent_to_input_weights, + const ICLTensor *recurrent_to_forget_weights, + const ICLTensor *recurrent_to_cell_weights, + const ICLTensor *recurrent_to_output_weights, + const ICLTensor *input_gate_bias, + const ICLTensor *forget_gate_bias, + const ICLTensor *cell_bias, + const ICLTensor *output_gate_bias, + ICLTensor *cell_state_in, + const ICLTensor *output_state_in, + ICLTensor *cell_state_out, + ICLTensor *output_state_out); /** Static function to check if given info will lead to a valid configuration of @ref CLLSTMLayerQuantized * @@ -156,11 +178,22 @@ public: * @return a status */ static Status validate(const ITensorInfo *input, - const ITensorInfo *input_to_input_weights, const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights, - const ITensorInfo *recurrent_to_input_weights, const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights, - const ITensorInfo *input_gate_bias, const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias, - const ITensorInfo *cell_state_in, const ITensorInfo *output_state_in, - const ITensorInfo *cell_state_out, const ITensorInfo *output_state_out); + const ITensorInfo *input_to_input_weights, + const ITensorInfo *input_to_forget_weights, + const ITensorInfo *input_to_cell_weights, + const ITensorInfo *input_to_output_weights, + const ITensorInfo *recurrent_to_input_weights, + const ITensorInfo *recurrent_to_forget_weights, + const ITensorInfo *recurrent_to_cell_weights, + const ITensorInfo *recurrent_to_output_weights, + const ITensorInfo *input_gate_bias, + const ITensorInfo *forget_gate_bias, + const ITensorInfo *cell_bias, + const ITensorInfo *output_gate_bias, + const ITensorInfo *cell_state_in, + const ITensorInfo *output_state_in, + const ITensorInfo *cell_state_out, + const ITensorInfo *output_state_out); // Inherited methods overridden: void run() override; @@ -170,30 +203,30 @@ private: MemoryGroup _memory_group; // Functions used - CLGEMMLowpMatrixMultiplyCore _gemmlowp; - CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint _output_stage; - CLTranspose _transpose_weights; - CLConcatenateLayer _concat_input_weights; - CLConcatenateLayer _concat_recurrent_weights; - CLConcatenateLayer _concat_weights; - CLConcatenateLayer _concat_inputs; - CLConcatenateLayer _concat_bias; - CLActivationLayer _sigmoid_forget_gate; - CLActivationLayer _sigmoid_input_gate; - CLActivationLayer _sigmoid_output_gate; - CLActivationLayer _tanh_modulation_gate; - CLActivationLayer _tanh_output_state; - CLArithmeticAddition _add_cell_state_tmps; - CLArithmeticAddition _add2; - CLPixelWiseMultiplication _mul_forget_gate_cell_state; - CLPixelWiseMultiplication _mul_input_gate_input_mod_gate; - CLPixelWiseMultiplication _mul_output_state_tmp_output_gate; - CLSlice _slice_input_tensor; - CLSlice _slice_forget_tensor; - CLSlice _slice_cell_tensor; - CLSlice _slice_output_tensor; - CLDequantizationLayer _dequantize; - CLQuantizationLayer _quantize; + CLGEMMLowpMatrixMultiplyCore _gemmlowp; + CLGEMMLowpOutputStage _output_stage; + CLTranspose _transpose_weights; + CLConcatenateLayer _concat_input_weights; + CLConcatenateLayer _concat_recurrent_weights; + CLConcatenateLayer _concat_weights; + CLConcatenateLayer _concat_inputs; + CLConcatenateLayer _concat_bias; + CLActivationLayer _sigmoid_forget_gate; + CLActivationLayer _sigmoid_input_gate; + CLActivationLayer _sigmoid_output_gate; + CLActivationLayer _tanh_modulation_gate; + CLActivationLayer _tanh_output_state; + CLArithmeticAddition _add_cell_state_tmps; + CLArithmeticAddition _add2; + CLPixelWiseMultiplication _mul_forget_gate_cell_state; + CLPixelWiseMultiplication _mul_input_gate_input_mod_gate; + CLPixelWiseMultiplication _mul_output_state_tmp_output_gate; + CLSlice _slice_input_tensor; + CLSlice _slice_forget_tensor; + CLSlice _slice_cell_tensor; + CLSlice _slice_output_tensor; + CLDequantizationLayer _dequantize; + CLQuantizationLayer _quantize; // Tensor pointers const ICLTensor *_input_to_input_weights; |