1 files changed, 84 insertions, 51 deletions
diff --git a/arm_compute/runtime/CL/functions/CLLSTMLayerQuantized.h b/arm_compute/runtime/CL/functions/CLLSTMLayerQuantized.h
index 2ef7427a5a..8c116b1482 100644
--- a/arm_compute/runtime/CL/functions/CLLSTMLayerQuantized.h
+++ b/arm_compute/runtime/CL/functions/CLLSTMLayerQuantized.h
@@ -35,7 +35,6 @@
 #include "arm_compute/runtime/CL/functions/CLQuantizationLayer.h"
 #include "arm_compute/runtime/CL/functions/CLSlice.h"
 #include "arm_compute/runtime/CL/functions/CLTranspose.h"
-
 #include "arm_compute/runtime/common/LSTMParams.h"
 
 namespace arm_compute
@@ -47,16 +46,16 @@ class ICLTensor;
  *
  * This function calls the following CL functions/kernels:
  *
- * -# @ref CLGEMMLowpMatrixMultiplyCore                          Quantized matrix multiplication core. Accumulators are 32-bit integers
- * -# @ref CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint   Convert 32-bit integers into QSYMM16
- * -# @ref CLTranspose                                           Matrix transpose
- * -# @ref CLConcatenateLayer                                    Tensor concatenation
- * -# @ref CLActivationLayer                                     Activation functions (tanh and logistic)
- * -# @ref CLArithmeticAddition                                  Elementwise addition
- * -# @ref CLPixelWiseMultiplication                             Elementwise multiplication
- * -# @ref CLSlice                                               Tensor slicing
- * -# @ref CLDequantizationLayer                                 Dequantize into float
- * -# @ref CLQuantizationLayer                                   Quantize from float
+ * -# @ref CLGEMMLowpMatrixMultiplyCore      Quantized matrix multiplication core. Accumulators are 32-bit integers
+ * -# @ref CLGEMMLowpOutputStage             Convert 32-bit integers into QSYMM16
+ * -# @ref CLTranspose                       Matrix transpose
+ * -# @ref CLConcatenateLayer                Tensor concatenation
+ * -# @ref CLActivationLayer                 Activation functions (tanh and logistic)
+ * -# @ref CLArithmeticAddition              Elementwise addition
+ * -# @ref CLPixelWiseMultiplication         Elementwise multiplication
+ * -# @ref CLSlice                           Tensor slicing
+ * -# @ref CLDequantizationLayer             Dequantize into float
+ * -# @ref CLQuantizationLayer               Quantize from float
  * */
 class CLLSTMLayerQuantized : public IFunction
 {
@@ -100,11 +99,22 @@ public:
      * @param[out] output_state_out            Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size].Data types supported: Same as @p input.
      */
     void configure(const ICLTensor *input,
-                   const ICLTensor *input_to_input_weights, const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights,
-                   const ICLTensor *recurrent_to_input_weights, const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights,
-                   const ICLTensor *input_gate_bias, const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias,
-                   ICLTensor *cell_state_in, const ICLTensor *output_state_in,
-                   ICLTensor *cell_state_out, ICLTensor *output_state_out);
+                   const ICLTensor *input_to_input_weights,
+                   const ICLTensor *input_to_forget_weights,
+                   const ICLTensor *input_to_cell_weights,
+                   const ICLTensor *input_to_output_weights,
+                   const ICLTensor *recurrent_to_input_weights,
+                   const ICLTensor *recurrent_to_forget_weights,
+                   const ICLTensor *recurrent_to_cell_weights,
+                   const ICLTensor *recurrent_to_output_weights,
+                   const ICLTensor *input_gate_bias,
+                   const ICLTensor *forget_gate_bias,
+                   const ICLTensor *cell_bias,
+                   const ICLTensor *output_gate_bias,
+                   ICLTensor       *cell_state_in,
+                   const ICLTensor *output_state_in,
+                   ICLTensor       *cell_state_out,
+                   ICLTensor       *output_state_out);
     /** Initialize function's tensors.
      *
      * @param[in]  compile_context             The compile context to be used.
@@ -126,12 +136,24 @@ public:
      * @param[out] cell_state_out              Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size]. Data type supported:  QSYMM16.
      * @param[out] output_state_out            Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size].Data types supported: Same as @p input.
      */
-    void configure(const CLCompileContext &compile_context, const ICLTensor *input,
-                   const ICLTensor *input_to_input_weights, const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights,
-                   const ICLTensor *recurrent_to_input_weights, const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights,
-                   const ICLTensor *input_gate_bias, const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias,
-                   ICLTensor *cell_state_in, const ICLTensor *output_state_in,
-                   ICLTensor *cell_state_out, ICLTensor *output_state_out);
+    void configure(const CLCompileContext &compile_context,
+                   const ICLTensor        *input,
+                   const ICLTensor        *input_to_input_weights,
+                   const ICLTensor        *input_to_forget_weights,
+                   const ICLTensor        *input_to_cell_weights,
+                   const ICLTensor        *input_to_output_weights,
+                   const ICLTensor        *recurrent_to_input_weights,
+                   const ICLTensor        *recurrent_to_forget_weights,
+                   const ICLTensor        *recurrent_to_cell_weights,
+                   const ICLTensor        *recurrent_to_output_weights,
+                   const ICLTensor        *input_gate_bias,
+                   const ICLTensor        *forget_gate_bias,
+                   const ICLTensor        *cell_bias,
+                   const ICLTensor        *output_gate_bias,
+                   ICLTensor              *cell_state_in,
+                   const ICLTensor        *output_state_in,
+                   ICLTensor              *cell_state_out,
+                   ICLTensor              *output_state_out);
 
     /** Static function to check if given info will lead to a valid configuration of @ref CLLSTMLayerQuantized
      *
@@ -156,11 +178,22 @@ public:
      * @return a status
      */
     static Status validate(const ITensorInfo *input,
-                           const ITensorInfo *input_to_input_weights, const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights,
-                           const ITensorInfo *recurrent_to_input_weights, const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights,
-                           const ITensorInfo *input_gate_bias, const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias,
-                           const ITensorInfo *cell_state_in, const ITensorInfo *output_state_in,
-                           const ITensorInfo *cell_state_out, const ITensorInfo *output_state_out);
+                           const ITensorInfo *input_to_input_weights,
+                           const ITensorInfo *input_to_forget_weights,
+                           const ITensorInfo *input_to_cell_weights,
+                           const ITensorInfo *input_to_output_weights,
+                           const ITensorInfo *recurrent_to_input_weights,
+                           const ITensorInfo *recurrent_to_forget_weights,
+                           const ITensorInfo *recurrent_to_cell_weights,
+                           const ITensorInfo *recurrent_to_output_weights,
+                           const ITensorInfo *input_gate_bias,
+                           const ITensorInfo *forget_gate_bias,
+                           const ITensorInfo *cell_bias,
+                           const ITensorInfo *output_gate_bias,
+                           const ITensorInfo *cell_state_in,
+                           const ITensorInfo *output_state_in,
+                           const ITensorInfo *cell_state_out,
+                           const ITensorInfo *output_state_out);
 
     // Inherited methods overridden:
     void run() override;
@@ -170,30 +203,30 @@ private:
     MemoryGroup _memory_group;
 
     // Functions used
-    CLGEMMLowpMatrixMultiplyCore                        _gemmlowp;
-    CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint _output_stage;
-    CLTranspose                                         _transpose_weights;
-    CLConcatenateLayer                                  _concat_input_weights;
-    CLConcatenateLayer                                  _concat_recurrent_weights;
-    CLConcatenateLayer                                  _concat_weights;
-    CLConcatenateLayer                                  _concat_inputs;
-    CLConcatenateLayer                                  _concat_bias;
-    CLActivationLayer                                   _sigmoid_forget_gate;
-    CLActivationLayer                                   _sigmoid_input_gate;
-    CLActivationLayer                                   _sigmoid_output_gate;
-    CLActivationLayer                                   _tanh_modulation_gate;
-    CLActivationLayer                                   _tanh_output_state;
-    CLArithmeticAddition                                _add_cell_state_tmps;
-    CLArithmeticAddition                                _add2;
-    CLPixelWiseMultiplication                           _mul_forget_gate_cell_state;
-    CLPixelWiseMultiplication                           _mul_input_gate_input_mod_gate;
-    CLPixelWiseMultiplication                           _mul_output_state_tmp_output_gate;
-    CLSlice                                             _slice_input_tensor;
-    CLSlice                                             _slice_forget_tensor;
-    CLSlice                                             _slice_cell_tensor;
-    CLSlice                                             _slice_output_tensor;
-    CLDequantizationLayer                               _dequantize;
-    CLQuantizationLayer                                 _quantize;
+    CLGEMMLowpMatrixMultiplyCore _gemmlowp;
+    CLGEMMLowpOutputStage        _output_stage;
+    CLTranspose                  _transpose_weights;
+    CLConcatenateLayer           _concat_input_weights;
+    CLConcatenateLayer           _concat_recurrent_weights;
+    CLConcatenateLayer           _concat_weights;
+    CLConcatenateLayer           _concat_inputs;
+    CLConcatenateLayer           _concat_bias;
+    CLActivationLayer            _sigmoid_forget_gate;
+    CLActivationLayer            _sigmoid_input_gate;
+    CLActivationLayer            _sigmoid_output_gate;
+    CLActivationLayer            _tanh_modulation_gate;
+    CLActivationLayer            _tanh_output_state;
+    CLArithmeticAddition         _add_cell_state_tmps;
+    CLArithmeticAddition         _add2;
+    CLPixelWiseMultiplication    _mul_forget_gate_cell_state;
+    CLPixelWiseMultiplication    _mul_input_gate_input_mod_gate;
+    CLPixelWiseMultiplication    _mul_output_state_tmp_output_gate;
+    CLSlice                      _slice_input_tensor;
+    CLSlice                      _slice_forget_tensor;
+    CLSlice                      _slice_cell_tensor;
+    CLSlice                      _slice_output_tensor;
+    CLDequantizationLayer        _dequantize;
+    CLQuantizationLayer          _quantize;
 
     // Tensor pointers
     const ICLTensor *_input_to_input_weights;