aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h
diff options
context:
space:
mode:
authorManuel Bottini <manuel.bottini@arm.com>2021-06-18 15:47:28 +0100
committerManuel Bottini <manuel.bottini@arm.com>2021-07-08 14:47:38 +0000
commitcfac51c779f9bf05e8b2d386fbfb4022767d1d30 (patch)
tree6ded148068c32bb1b2926946f59d0262d928b9ab /arm_compute/runtime/NEON/functions/NEQLSTMLayer.h
parent06ac6e438fc95aa7f8228be8217e0776d692b8e7 (diff)
downloadComputeLibrary-cfac51c779f9bf05e8b2d386fbfb4022767d1d30.tar.gz
Port NEGEMMLowp Part 2
Details: Extend NEConvertQuantizedSignednessKernel Port NEGEMMInterleave4x4Kernel to CpuGemmInterleave4x4Kernel Port NEGEMMTranspose1xWKernel to CpuGemmTranspose1xWKernel Port NEGEMMLowpMatrixAReductionKernel to CpuGemmLowpMatrixAReductionKernel Port NEGEMMLowpMatrixBReductionKernel to CpuGemmLowpMatrixBReductionKernel Port NEGEMMLowpOffsetContributionOutputStageKernel to CpuGemmLowpOffsetContributionOutputStageKernel Port NEGEMMLowpOffsetContributionKernel to CpuGemmLowpOffsetContributionKernel Resolves: COMPMID-4403 Change-Id: I3227f052f25e7b41d073bbea1da8a881fcd78b8e Signed-off-by: Manuel Bottini <manuel.bottini@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5875 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Diffstat (limited to 'arm_compute/runtime/NEON/functions/NEQLSTMLayer.h')
-rw-r--r--arm_compute/runtime/NEON/functions/NEQLSTMLayer.h139
1 files changed, 72 insertions, 67 deletions
diff --git a/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h b/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h
index 77adffd543..acbd92cff7 100644
--- a/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h
@@ -43,8 +43,13 @@ namespace arm_compute
class ITensor;
class ITensorInfo;
class NEQLSTMLayerNormalizationKernel;
-class NEGEMMLowpMatrixAReductionKernel;
-
+namespace cpu
+{
+namespace kernels
+{
+class CpuGemmLowpMatrixAReductionKernel;
+} // namespace kernels
+} // namespace cpu
/** Basic function to run @ref NEQLSTMLayer
*
* This function calls the following kernels:
@@ -55,7 +60,7 @@ class NEGEMMLowpMatrixAReductionKernel;
* -# @ref NECopy Copy kernel for copying output_state_out to output
* -# @ref NEGEMMLowpMatrixMultiplyCore Quantized matrix multiplication core. Accumulators are 32-bit integers
* -# @ref NEGEMMLowpOutputStage Convert 32-bit integers into QSYMM16
- * -# @ref NEGEMMLowpMatrixAReductionKernel For precomputing effective biases to use
+ * -# @ref cpu::kernels::CpuGemmLowpMatrixAReductionKernel For precomputing effective biases to use
* -# @ref NEPixelWiseMultiplication Elementwise multiplication
* -# @ref NETranspose Transpose function for reshaping the weights
* */
@@ -250,70 +255,70 @@ private:
};
// Functions used
- NETranspose _transpose_input_to_forget_weights;
- NETranspose _transpose_input_to_cell_weights;
- NETranspose _transpose_input_to_output_weights;
- NETranspose _transpose_input_to_input_weights;
- NETranspose _transpose_recurrent_to_forget_weights;
- NETranspose _transpose_recurrent_to_cell_weights;
- NETranspose _transpose_recurrent_to_output_weights;
- NETranspose _transpose_recurrent_to_input_weights;
- NETranspose _transpose_projection_weights;
- std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _input_to_input_reduction;
- std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _recurrent_to_input_reduction;
- std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _input_to_forget_reduction;
- std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _recurrent_to_forget_reduction;
- std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _input_to_cell_reduction;
- std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _recurrent_to_cell_reduction;
- std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _input_to_output_reduction;
- std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _recurrent_to_output_reduction;
- std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _projection_reduction;
- NEArithmeticAddition _projection_bias_add;
- NEGEMMLowpMatrixMultiplyCore _mm_input_to_forget;
- NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_forget;
- NEPixelWiseMultiplication _pixelwise_mul_cell_to_forget;
- NEGEMMLowpOutputStage _input_to_forget_outstage;
- NEGEMMLowpOutputStage _recurrent_to_forget_outstage;
- NEGEMMLowpOutputStage _cell_to_forget_outstage;
- NEArithmeticAddition _accumulate_input_recurrent_forget;
- NEArithmeticAddition _accumulate_cell_forget;
- NEActivationLayer _forget_gate_sigmoid;
- NEGEMMLowpMatrixMultiplyCore _mm_input_to_cell;
- NEGEMMLowpOutputStage _input_to_cell_outstage;
- NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_cell;
- NEGEMMLowpOutputStage _recurrent_to_cell_outstage;
- NEArithmeticAddition _accumulate_input_recurrent_modulation;
- NEActivationLayer _cell_gate_tanh;
- NEArithmeticSubtraction _input_gate_sub;
- NEGEMMLowpMatrixMultiplyCore _mm_input_to_input;
- NEGEMMLowpOutputStage _input_to_input_outstage;
- NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_input;
- NEGEMMLowpOutputStage _recurrent_to_input_outstage;
- NEArithmeticAddition _accumulate_input_recurrent_input;
- NEPixelWiseMultiplication _pixelwise_mul_cell_to_input;
- NEGEMMLowpOutputStage _cell_to_input_outstage;
- NEArithmeticAddition _accumulate_cell_input;
- NEActivationLayer _input_gate_sigmoid;
- NEPixelWiseMultiplication _pixelwise_mul_forget_cell;
- NEPixelWiseMultiplication _pixelwise_mul_input_cell;
- NEArithmeticAddition _add_forget_cell;
- NEActivationLayer _cell_clip;
- NEGEMMLowpMatrixMultiplyCore _mm_input_to_output;
- NEGEMMLowpOutputStage _input_to_output_outstage;
- NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_output;
- NEGEMMLowpOutputStage _recurrent_to_output_outstage;
- NEArithmeticAddition _accumulate_input_recurrent_output;
- NEPixelWiseMultiplication _pixelwise_mul_cell_to_output;
- NEGEMMLowpOutputStage _cell_to_output_outstage;
- NEArithmeticAddition _accumulate_cell_to_output;
- NEActivationLayer _output_gate_sigmoid;
- NEActivationLayer _hidden_tanh;
- NEPixelWiseMultiplication _pixelwise_mul_hidden;
- NEGEMMLowpOutputStage _hidden_outstage;
- NEGEMMLowpMatrixMultiplyCore _mm_projection;
- NEGEMMLowpOutputStage _projection_outstage;
- NEArithmeticAddition _accumulate_projection;
- NEActivationLayer _projection_clip;
+ NETranspose _transpose_input_to_forget_weights;
+ NETranspose _transpose_input_to_cell_weights;
+ NETranspose _transpose_input_to_output_weights;
+ NETranspose _transpose_input_to_input_weights;
+ NETranspose _transpose_recurrent_to_forget_weights;
+ NETranspose _transpose_recurrent_to_cell_weights;
+ NETranspose _transpose_recurrent_to_output_weights;
+ NETranspose _transpose_recurrent_to_input_weights;
+ NETranspose _transpose_projection_weights;
+ std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _input_to_input_reduction;
+ std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _recurrent_to_input_reduction;
+ std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _input_to_forget_reduction;
+ std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _recurrent_to_forget_reduction;
+ std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _input_to_cell_reduction;
+ std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _recurrent_to_cell_reduction;
+ std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _input_to_output_reduction;
+ std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _recurrent_to_output_reduction;
+ std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _projection_reduction;
+ NEArithmeticAddition _projection_bias_add;
+ NEGEMMLowpMatrixMultiplyCore _mm_input_to_forget;
+ NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_forget;
+ NEPixelWiseMultiplication _pixelwise_mul_cell_to_forget;
+ NEGEMMLowpOutputStage _input_to_forget_outstage;
+ NEGEMMLowpOutputStage _recurrent_to_forget_outstage;
+ NEGEMMLowpOutputStage _cell_to_forget_outstage;
+ NEArithmeticAddition _accumulate_input_recurrent_forget;
+ NEArithmeticAddition _accumulate_cell_forget;
+ NEActivationLayer _forget_gate_sigmoid;
+ NEGEMMLowpMatrixMultiplyCore _mm_input_to_cell;
+ NEGEMMLowpOutputStage _input_to_cell_outstage;
+ NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_cell;
+ NEGEMMLowpOutputStage _recurrent_to_cell_outstage;
+ NEArithmeticAddition _accumulate_input_recurrent_modulation;
+ NEActivationLayer _cell_gate_tanh;
+ NEArithmeticSubtraction _input_gate_sub;
+ NEGEMMLowpMatrixMultiplyCore _mm_input_to_input;
+ NEGEMMLowpOutputStage _input_to_input_outstage;
+ NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_input;
+ NEGEMMLowpOutputStage _recurrent_to_input_outstage;
+ NEArithmeticAddition _accumulate_input_recurrent_input;
+ NEPixelWiseMultiplication _pixelwise_mul_cell_to_input;
+ NEGEMMLowpOutputStage _cell_to_input_outstage;
+ NEArithmeticAddition _accumulate_cell_input;
+ NEActivationLayer _input_gate_sigmoid;
+ NEPixelWiseMultiplication _pixelwise_mul_forget_cell;
+ NEPixelWiseMultiplication _pixelwise_mul_input_cell;
+ NEArithmeticAddition _add_forget_cell;
+ NEActivationLayer _cell_clip;
+ NEGEMMLowpMatrixMultiplyCore _mm_input_to_output;
+ NEGEMMLowpOutputStage _input_to_output_outstage;
+ NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_output;
+ NEGEMMLowpOutputStage _recurrent_to_output_outstage;
+ NEArithmeticAddition _accumulate_input_recurrent_output;
+ NEPixelWiseMultiplication _pixelwise_mul_cell_to_output;
+ NEGEMMLowpOutputStage _cell_to_output_outstage;
+ NEArithmeticAddition _accumulate_cell_to_output;
+ NEActivationLayer _output_gate_sigmoid;
+ NEActivationLayer _hidden_tanh;
+ NEPixelWiseMultiplication _pixelwise_mul_hidden;
+ NEGEMMLowpOutputStage _hidden_outstage;
+ NEGEMMLowpMatrixMultiplyCore _mm_projection;
+ NEGEMMLowpOutputStage _projection_outstage;
+ NEArithmeticAddition _accumulate_projection;
+ NEActivationLayer _projection_clip;
TensorCopyKernel _projection_bias_copy;
TensorCopyKernel _projection_output_to_accumulate_copy;