From 4a578b923ed000c67fe0bc1433f945aea634ca9c Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Fri, 25 Jun 2021 12:13:49 +0100 Subject: Port the ClGemmLowp kernels to the new API Ported kernels: - CLGEMMLowpMatrixMultiplyNativeKernel - CLGEMMLowpMatrixMultiplyReshapedKernel - CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel - CLGEMMLowpOffsetContributionKernel - CLGEMMLowpOffsetContributionOutputStageKernel - CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel - CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel - CLGEMMLowpQuantizeDownInt32ScaleKernel Signed-off-by: Georgios Pinitas Change-Id: I9d5a744d6a2dd2f2726fdfb291bad000b6970de2 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5870 Reviewed-by: Michele Di Giorgio Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins --- .../runtime/CL/functions/CLLSTMLayerQuantized.h | 68 +++++++++++----------- 1 file changed, 34 insertions(+), 34 deletions(-) (limited to 'arm_compute/runtime/CL/functions/CLLSTMLayerQuantized.h') diff --git a/arm_compute/runtime/CL/functions/CLLSTMLayerQuantized.h b/arm_compute/runtime/CL/functions/CLLSTMLayerQuantized.h index 2ef7427a5a..9c004b85d0 100644 --- a/arm_compute/runtime/CL/functions/CLLSTMLayerQuantized.h +++ b/arm_compute/runtime/CL/functions/CLLSTMLayerQuantized.h @@ -47,16 +47,16 @@ class ICLTensor; * * This function calls the following CL functions/kernels: * - * -# @ref CLGEMMLowpMatrixMultiplyCore Quantized matrix multiplication core. Accumulators are 32-bit integers - * -# @ref CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint Convert 32-bit integers into QSYMM16 - * -# @ref CLTranspose Matrix transpose - * -# @ref CLConcatenateLayer Tensor concatenation - * -# @ref CLActivationLayer Activation functions (tanh and logistic) - * -# @ref CLArithmeticAddition Elementwise addition - * -# @ref CLPixelWiseMultiplication Elementwise multiplication - * -# @ref CLSlice Tensor slicing - * -# @ref CLDequantizationLayer Dequantize into float - * -# @ref CLQuantizationLayer Quantize from float + * -# @ref CLGEMMLowpMatrixMultiplyCore Quantized matrix multiplication core. Accumulators are 32-bit integers + * -# @ref CLGEMMLowpOutputStage Convert 32-bit integers into QSYMM16 + * -# @ref CLTranspose Matrix transpose + * -# @ref CLConcatenateLayer Tensor concatenation + * -# @ref CLActivationLayer Activation functions (tanh and logistic) + * -# @ref CLArithmeticAddition Elementwise addition + * -# @ref CLPixelWiseMultiplication Elementwise multiplication + * -# @ref CLSlice Tensor slicing + * -# @ref CLDequantizationLayer Dequantize into float + * -# @ref CLQuantizationLayer Quantize from float * */ class CLLSTMLayerQuantized : public IFunction { @@ -170,30 +170,30 @@ private: MemoryGroup _memory_group; // Functions used - CLGEMMLowpMatrixMultiplyCore _gemmlowp; - CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint _output_stage; - CLTranspose _transpose_weights; - CLConcatenateLayer _concat_input_weights; - CLConcatenateLayer _concat_recurrent_weights; - CLConcatenateLayer _concat_weights; - CLConcatenateLayer _concat_inputs; - CLConcatenateLayer _concat_bias; - CLActivationLayer _sigmoid_forget_gate; - CLActivationLayer _sigmoid_input_gate; - CLActivationLayer _sigmoid_output_gate; - CLActivationLayer _tanh_modulation_gate; - CLActivationLayer _tanh_output_state; - CLArithmeticAddition _add_cell_state_tmps; - CLArithmeticAddition _add2; - CLPixelWiseMultiplication _mul_forget_gate_cell_state; - CLPixelWiseMultiplication _mul_input_gate_input_mod_gate; - CLPixelWiseMultiplication _mul_output_state_tmp_output_gate; - CLSlice _slice_input_tensor; - CLSlice _slice_forget_tensor; - CLSlice _slice_cell_tensor; - CLSlice _slice_output_tensor; - CLDequantizationLayer _dequantize; - CLQuantizationLayer _quantize; + CLGEMMLowpMatrixMultiplyCore _gemmlowp; + CLGEMMLowpOutputStage _output_stage; + CLTranspose _transpose_weights; + CLConcatenateLayer _concat_input_weights; + CLConcatenateLayer _concat_recurrent_weights; + CLConcatenateLayer _concat_weights; + CLConcatenateLayer _concat_inputs; + CLConcatenateLayer _concat_bias; + CLActivationLayer _sigmoid_forget_gate; + CLActivationLayer _sigmoid_input_gate; + CLActivationLayer _sigmoid_output_gate; + CLActivationLayer _tanh_modulation_gate; + CLActivationLayer _tanh_output_state; + CLArithmeticAddition _add_cell_state_tmps; + CLArithmeticAddition _add2; + CLPixelWiseMultiplication _mul_forget_gate_cell_state; + CLPixelWiseMultiplication _mul_input_gate_input_mod_gate; + CLPixelWiseMultiplication _mul_output_state_tmp_output_gate; + CLSlice _slice_input_tensor; + CLSlice _slice_forget_tensor; + CLSlice _slice_cell_tensor; + CLSlice _slice_output_tensor; + CLDequantizationLayer _dequantize; + CLQuantizationLayer _quantize; // Tensor pointers const ICLTensor *_input_to_input_weights; -- cgit v1.2.1