Port the ClGemmLowp kernels to the new API

Ported kernels: - CLGEMMLowpMatrixMultiplyNativeKernel - CLGEMMLowpMatrixMultiplyReshapedKernel - CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel - CLGEMMLowpOffsetContributionKernel - CLGEMMLowpOffsetContributionOutputStageKernel - CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel - CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel - CLGEMMLowpQuantizeDownInt32ScaleKernel Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Change-Id: I9d5a744d6a2dd2f2726fdfb291bad000b6970de2 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5870 Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
author: Georgios Pinitas <georgios.pinitas@arm.com> 2021-06-25 12:13:49 +0100
committer: Georgios Pinitas <georgios.pinitas@arm.com> 2021-06-29 16:26:41 +0000
commit: 4a578b923ed000c67fe0bc1433f945aea634ca9c (patch)
tree: b7bb041d2e7bfb4b909199f1b889585d237c665d /arm_compute/runtime/CL/functions/CLQLSTMLayer.h
parent: 53832b2bcce44c71fe31a618a81765294df55750 (diff)
download: ComputeLibrary-4a578b923ed000c67fe0bc1433f945aea634ca9c.tar.gz
1 files changed, 73 insertions, 67 deletions
diff --git a/arm_compute/runtime/CL/functions/CLQLSTMLayer.h b/arm_compute/runtime/CL/functions/CLQLSTMLayer.h
index bd00d56468..1b0b759d74 100644
--- a/arm_compute/runtime/CL/functions/CLQLSTMLayer.h
+++ b/arm_compute/runtime/CL/functions/CLQLSTMLayer.h
@@ -40,9 +40,15 @@ namespace arm_compute
 // Forward declarations
 class CLCompileContext;
 class ICLTensor;
-class CLGEMMLowpMatrixAReductionKernel;
 class CLQLSTMLayerNormalizationKernel;
 class ITensorInfo;
+namespace opencl
+{
+namespace kernels
+{
+class ClGemmLowpMatrixAReductionKernel;
+} // namespace kernels
+} // namespace opencl
 
 /** Basic function to run @ref CLQLSTMLayer
  *
@@ -52,8 +58,8 @@ class ITensorInfo;
  * -# @ref CLCopy                                                Copy function for copying output_state_out to output
  * -# @ref CLArithmeticAddition                                  Elementwise addition and subtraction
  * -# @ref CLGEMMLowpMatrixMultiplyCore                          Quantized matrix multiplication core. Accumulators are 32-bit integers
- * -# @ref CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint   Convert 32-bit integers into QSYMM16
- * -# @ref CLGEMMLowpMatrixAReductionKernel                      For precomputing effective biases to use
+ * -# @ref CLGEMMLowpOutputStage   Convert 32-bit integers into QSYMM16
+ * -# @ref opencl::kernels::ClGemmLowpMatrixAReductionKernel                      For precomputing effective biases to use
  * -# @ref CLPixelWiseMultiplication                             Elementwise multiplication
  * -# @ref CLTranspose                                           Transpose function for reshaping the weights
  * */
@@ -297,70 +303,70 @@ private:
     };
 
     // Functions used
-    CLTranspose                                       _transpose_input_to_forget_weights{};
-    CLTranspose                                       _transpose_input_to_cell_weights{};
-    CLTranspose                                       _transpose_input_to_output_weights{};
-    CLTranspose                                       _transpose_input_to_input_weights{};
-    CLTranspose                                       _transpose_recurrent_to_forget_weights{};
-    CLTranspose                                       _transpose_recurrent_to_cell_weights{};
-    CLTranspose                                       _transpose_recurrent_to_output_weights{};
-    CLTranspose                                       _transpose_recurrent_to_input_weights{};
-    CLTranspose                                       _transpose_projection_weights{};
-    std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _input_to_input_reduction;
-    std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _recurrent_to_input_reduction;
-    std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _input_to_forget_reduction;
-    std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _recurrent_to_forget_reduction;
-    std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _input_to_cell_reduction;
-    std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _recurrent_to_cell_reduction;
-    std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _input_to_output_reduction;
-    std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _recurrent_to_output_reduction;
-    std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _projection_reduction;
-    CLArithmeticAddition                              _projection_bias_add{};
-    CLGEMMLowpMatrixMultiplyCore                      _mm_input_to_forget{};
-    CLGEMMLowpMatrixMultiplyCore                      _mm_recurrent_to_forget{};
-    CLPixelWiseMultiplication                         _pixelwise_mul_cell_to_forget{};
-    CLGEMMLowpOutputStage                             _input_to_forget_outstage{};
-    CLGEMMLowpOutputStage                             _recurrent_to_forget_outstage{};
-    CLGEMMLowpOutputStage                             _cell_to_forget_outstage{};
-    CLArithmeticAddition                              _accumulate_input_recurrent_forget{};
-    CLArithmeticAddition                              _accumulate_cell_forget{};
-    CLActivationLayer                                 _forget_gate_sigmoid{};
-    CLGEMMLowpMatrixMultiplyCore                      _mm_input_to_cell{};
-    CLGEMMLowpOutputStage                             _input_to_cell_outstage{};
-    CLGEMMLowpMatrixMultiplyCore                      _mm_recurrent_to_cell{};
-    CLGEMMLowpOutputStage                             _recurrent_to_cell_outstage{};
-    CLArithmeticAddition                              _accumulate_input_recurrent_modulation{};
-    CLActivationLayer                                 _cell_gate_tanh{};
-    CLArithmeticSubtraction                           _input_gate_sub{};
-    CLGEMMLowpMatrixMultiplyCore                      _mm_input_to_input{};
-    CLGEMMLowpOutputStage                             _input_to_input_outstage{};
-    CLGEMMLowpMatrixMultiplyCore                      _mm_recurrent_to_input{};
-    CLGEMMLowpOutputStage                             _recurrent_to_input_outstage{};
-    CLArithmeticAddition                              _accumulate_input_recurrent_input{};
-    CLPixelWiseMultiplication                         _pixelwise_mul_cell_to_input{};
-    CLGEMMLowpOutputStage                             _cell_to_input_outstage{};
-    CLArithmeticAddition                              _accumulate_cell_input{};
-    CLActivationLayer                                 _input_gate_sigmoid{};
-    CLPixelWiseMultiplication                         _pixelwise_mul_forget_cell{};
-    CLPixelWiseMultiplication                         _pixelwise_mul_input_cell{};
-    CLArithmeticAddition                              _add_forget_cell{};
-    CLActivationLayer                                 _cell_clip{};
-    CLGEMMLowpMatrixMultiplyCore                      _mm_input_to_output{};
-    CLGEMMLowpOutputStage                             _input_to_output_outstage{};
-    CLGEMMLowpMatrixMultiplyCore                      _mm_recurrent_to_output{};
-    CLGEMMLowpOutputStage                             _recurrent_to_output_outstage{};
-    CLArithmeticAddition                              _accumulate_input_recurrent_output{};
-    CLPixelWiseMultiplication                         _pixelwise_mul_cell_to_output{};
-    CLGEMMLowpOutputStage                             _cell_to_output_outstage{};
-    CLArithmeticAddition                              _accumulate_cell_to_output{};
-    CLActivationLayer                                 _output_gate_sigmoid{};
-    CLActivationLayer                                 _hidden_tanh{};
-    CLPixelWiseMultiplication                         _pixelwise_mul_hidden{};
-    CLGEMMLowpOutputStage                             _hidden_outstage{};
-    CLGEMMLowpMatrixMultiplyCore                      _mm_projection{};
-    CLGEMMLowpOutputStage                             _projection_outstage{};
-    CLArithmeticAddition                              _accumulate_projection{};
-    CLActivationLayer                                 _projection_clip{};
+    CLTranspose                                                        _transpose_input_to_forget_weights{};
+    CLTranspose                                                        _transpose_input_to_cell_weights{};
+    CLTranspose                                                        _transpose_input_to_output_weights{};
+    CLTranspose                                                        _transpose_input_to_input_weights{};
+    CLTranspose                                                        _transpose_recurrent_to_forget_weights{};
+    CLTranspose                                                        _transpose_recurrent_to_cell_weights{};
+    CLTranspose                                                        _transpose_recurrent_to_output_weights{};
+    CLTranspose                                                        _transpose_recurrent_to_input_weights{};
+    CLTranspose                                                        _transpose_projection_weights{};
+    std::unique_ptr<opencl::kernels::ClGemmLowpMatrixAReductionKernel> _input_to_input_reduction;
+    std::unique_ptr<opencl::kernels::ClGemmLowpMatrixAReductionKernel> _recurrent_to_input_reduction;
+    std::unique_ptr<opencl::kernels::ClGemmLowpMatrixAReductionKernel> _input_to_forget_reduction;
+    std::unique_ptr<opencl::kernels::ClGemmLowpMatrixAReductionKernel> _recurrent_to_forget_reduction;
+    std::unique_ptr<opencl::kernels::ClGemmLowpMatrixAReductionKernel> _input_to_cell_reduction;
+    std::unique_ptr<opencl::kernels::ClGemmLowpMatrixAReductionKernel> _recurrent_to_cell_reduction;
+    std::unique_ptr<opencl::kernels::ClGemmLowpMatrixAReductionKernel> _input_to_output_reduction;
+    std::unique_ptr<opencl::kernels::ClGemmLowpMatrixAReductionKernel> _recurrent_to_output_reduction;
+    std::unique_ptr<opencl::kernels::ClGemmLowpMatrixAReductionKernel> _projection_reduction;
+    CLArithmeticAddition                                               _projection_bias_add{};
+    CLGEMMLowpMatrixMultiplyCore                                       _mm_input_to_forget{};
+    CLGEMMLowpMatrixMultiplyCore                                       _mm_recurrent_to_forget{};
+    CLPixelWiseMultiplication                                          _pixelwise_mul_cell_to_forget{};
+    CLGEMMLowpOutputStage                                              _input_to_forget_outstage{};
+    CLGEMMLowpOutputStage                                              _recurrent_to_forget_outstage{};
+    CLGEMMLowpOutputStage                                              _cell_to_forget_outstage{};
+    CLArithmeticAddition                                               _accumulate_input_recurrent_forget{};
+    CLArithmeticAddition                                               _accumulate_cell_forget{};
+    CLActivationLayer                                                  _forget_gate_sigmoid{};
+    CLGEMMLowpMatrixMultiplyCore                                       _mm_input_to_cell{};
+    CLGEMMLowpOutputStage                                              _input_to_cell_outstage{};
+    CLGEMMLowpMatrixMultiplyCore                                       _mm_recurrent_to_cell{};
+    CLGEMMLowpOutputStage                                              _recurrent_to_cell_outstage{};
+    CLArithmeticAddition                                               _accumulate_input_recurrent_modulation{};
+    CLActivationLayer                                                  _cell_gate_tanh{};
+    CLArithmeticSubtraction                                            _input_gate_sub{};
+    CLGEMMLowpMatrixMultiplyCore                                       _mm_input_to_input{};
+    CLGEMMLowpOutputStage                                              _input_to_input_outstage{};
+    CLGEMMLowpMatrixMultiplyCore                                       _mm_recurrent_to_input{};
+    CLGEMMLowpOutputStage                                              _recurrent_to_input_outstage{};
+    CLArithmeticAddition                                               _accumulate_input_recurrent_input{};
+    CLPixelWiseMultiplication                                          _pixelwise_mul_cell_to_input{};
+    CLGEMMLowpOutputStage                                              _cell_to_input_outstage{};
+    CLArithmeticAddition                                               _accumulate_cell_input{};
+    CLActivationLayer                                                  _input_gate_sigmoid{};
+    CLPixelWiseMultiplication                                          _pixelwise_mul_forget_cell{};
+    CLPixelWiseMultiplication                                          _pixelwise_mul_input_cell{};
+    CLArithmeticAddition                                               _add_forget_cell{};
+    CLActivationLayer                                                  _cell_clip{};
+    CLGEMMLowpMatrixMultiplyCore                                       _mm_input_to_output{};
+    CLGEMMLowpOutputStage                                              _input_to_output_outstage{};
+    CLGEMMLowpMatrixMultiplyCore                                       _mm_recurrent_to_output{};
+    CLGEMMLowpOutputStage                                              _recurrent_to_output_outstage{};
+    CLArithmeticAddition                                               _accumulate_input_recurrent_output{};
+    CLPixelWiseMultiplication                                          _pixelwise_mul_cell_to_output{};
+    CLGEMMLowpOutputStage                                              _cell_to_output_outstage{};
+    CLArithmeticAddition                                               _accumulate_cell_to_output{};
+    CLActivationLayer                                                  _output_gate_sigmoid{};
+    CLActivationLayer                                                  _hidden_tanh{};
+    CLPixelWiseMultiplication                                          _pixelwise_mul_hidden{};
+    CLGEMMLowpOutputStage                                              _hidden_outstage{};
+    CLGEMMLowpMatrixMultiplyCore                                       _mm_projection{};
+    CLGEMMLowpOutputStage                                              _projection_outstage{};
+    CLArithmeticAddition                                               _accumulate_projection{};
+    CLActivationLayer                                                  _projection_clip{};
     std::array<std::unique_ptr<CLQLSTMLayerNormalizationKernel>, _layer_norm_count> _layer_norms;
     CLCopy _copy_output;
author	Georgios Pinitas <georgios.pinitas@arm.com>	2021-06-25 12:13:49 +0100
committer	Georgios Pinitas <georgios.pinitas@arm.com>	2021-06-29 16:26:41 +0000
commit	4a578b923ed000c67fe0bc1433f945aea634ca9c (patch)
tree	b7bb041d2e7bfb4b909199f1b889585d237c665d /arm_compute/runtime/CL/functions/CLQLSTMLayer.h
parent	53832b2bcce44c71fe31a618a81765294df55750 (diff)
download	ComputeLibrary-4a578b923ed000c67fe0bc1433f945aea634ca9c.tar.gz