From 2788609b8a10306e9eae47543b39812a7b075aaa Mon Sep 17 00:00:00 2001 From: Teresa Charlin Date: Thu, 25 Feb 2021 20:15:01 +0000 Subject: Port ClTranspose to new API Partially Resolves: COMPMID-4277 (1/2) Signed-off-by: Teresa Charlin Change-Id: I704c2303135cbe1ba46d2fd5642c84c562204bc7 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5194 Tested-by: Arm Jenkins Reviewed-by: Georgios Pinitas Comments-Addressed: Arm Jenkins --- arm_compute/runtime/CL/functions/CLLSTMLayer.h | 177 +++++++++++++------------ 1 file changed, 92 insertions(+), 85 deletions(-) (limited to 'arm_compute/runtime/CL/functions/CLLSTMLayer.h') diff --git a/arm_compute/runtime/CL/functions/CLLSTMLayer.h b/arm_compute/runtime/CL/functions/CLLSTMLayer.h index 20b068316c..38a24d030b 100644 --- a/arm_compute/runtime/CL/functions/CLLSTMLayer.h +++ b/arm_compute/runtime/CL/functions/CLLSTMLayer.h @@ -46,8 +46,14 @@ namespace arm_compute { class CLCompileContext; -class CLTransposeKernel; class ICLTensor; +namespace opencl +{ +namespace kernels +{ +class ClTransposeKernel; +} +} /** This function performs a single time step in a Long Short-Term Memory (LSTM) layer. * @@ -212,90 +218,91 @@ public: void prepare() override; private: - MemoryGroup _memory_group; - CLFullyConnectedLayer _fully_connected_input_gate; - CLArithmeticAddition _accum_input_gate1; - CLArithmeticSubtraction _subtract_input_gate; - CLPixelWiseMultiplication _pixelwise_mul_input_gate; - CLActivationLayer _activation_input_gate; - CLFullyConnectedLayer _fully_connected_forget_gate; - CLArithmeticAddition _accum_forget_gate1; - CLPixelWiseMultiplication _pixelwise_mul_forget_gate; - CLActivationLayer _activation_forget_gate; - CLFullyConnectedLayer _fully_connected_cell_state; - CLGEMM _gemm_cell_state1; - std::unique_ptr _transpose_cell_state; - CLArithmeticAddition _accum_cell_state1; - CLArithmeticAddition _accum_cell_state2; - CLPixelWiseMultiplication _pixelwise_mul_cell_state1; - CLActivationLayer _activation_cell_state; - CLActivationLayer _cell_clip; - CLPixelWiseMultiplication _pixelwise_mul_cell_state2; - CLFullyConnectedLayer _fully_connected_output; - CLPixelWiseMultiplication _pixelwise_mul_output_state1; - CLArithmeticAddition _accum_output1; - CLActivationLayer _activation_output; - CLActivationLayer _activation_output_state; - CLPixelWiseMultiplication _pixelwise_mul_output_state2; - CLFullyConnectedLayer _fully_connected_output_state; - CLActivationLayer _projection_clip; - CLCopy _copy_cell_state; - CLCopy _copy_output; - CLConcatenateLayer _concat_scratch_buffer; - CLConcatenateLayer _concat_inputs_forget_gate; - CLConcatenateLayer _concat_weights_forget_gate; - CLConcatenateLayer _concat_weights_input_gate; - CLConcatenateLayer _concat_weights_output; - CLFill _ones_fill; - CLMeanStdDevNormalizationLayer _mean_std_norm_input_gate; - CLPixelWiseMultiplication _pixelwise_mul_input_gate_coeff; - CLArithmeticAddition _accum_input_gate_bias; - CLMeanStdDevNormalizationLayer _mean_std_norm_forget_gate; - CLPixelWiseMultiplication _pixelwise_mul_forget_gate_coeff; - CLArithmeticAddition _accum_forget_gate_bias; - CLMeanStdDevNormalizationLayer _mean_std_norm_cell_gate; - CLPixelWiseMultiplication _pixelwise_mul_cell_gate_coeff; - CLArithmeticAddition _accum_cell_gate_bias; - CLMeanStdDevNormalizationLayer _mean_std_norm_output_gate; - CLPixelWiseMultiplication _pixelwise_mul_output_gate_coeff; - CLArithmeticAddition _accum_output_gate_bias; - CLTensor _input_gate_out1; - CLTensor _input_gate_out2; - CLTensor _input_gate_out3; - CLTensor _input_gate_out4; - CLTensor _forget_gate_out1; - CLTensor _forget_gate_out2; - CLTensor _forget_gate_out3; - CLTensor _forget_gate_out4; - CLTensor _forget_gate_out5; - CLTensor _forget_gate_out6; - CLTensor _cell_state_out1; - CLTensor _cell_state_out2; - CLTensor _cell_state_out3; - CLTensor _cell_state_out4; - CLTensor _cell_state_out5; - CLTensor _output1; - CLTensor _output2; - CLTensor _output3; - CLTensor _output4; - CLTensor _cell_state_activation; - CLTensor _output_state1; - CLTensor _ones; - CLTensor _input_layer_norm_out1; - CLTensor _input_layer_norm_out2; - CLTensor _forget_layer_norm_out1; - CLTensor _forget_layer_norm_out2; - CLTensor _cell_layer_norm_out1; - CLTensor _cell_layer_norm_out2; - CLTensor _output_layer_norm_out1; - CLTensor _output_layer_norm_out2; - bool _run_peephole_opt; - bool _run_cifg_opt; - bool _perform_cell_clipping; - bool _has_projection_weights; - bool _perform_projection_clipping; - bool _is_prepared; - bool _is_layer_norm_lstm; + MemoryGroup _memory_group; + CLFullyConnectedLayer _fully_connected_input_gate; + CLArithmeticAddition _accum_input_gate1; + CLArithmeticSubtraction _subtract_input_gate; + CLPixelWiseMultiplication _pixelwise_mul_input_gate; + CLActivationLayer _activation_input_gate; + CLFullyConnectedLayer _fully_connected_forget_gate; + CLArithmeticAddition _accum_forget_gate1; + CLPixelWiseMultiplication _pixelwise_mul_forget_gate; + CLActivationLayer _activation_forget_gate; + CLFullyConnectedLayer _fully_connected_cell_state; + CLGEMM _gemm_cell_state1; + std::unique_ptr _transpose_cell_state; + CLArithmeticAddition _accum_cell_state1; + CLArithmeticAddition _accum_cell_state2; + CLPixelWiseMultiplication _pixelwise_mul_cell_state1; + CLActivationLayer _activation_cell_state; + CLActivationLayer _cell_clip; + CLPixelWiseMultiplication _pixelwise_mul_cell_state2; + CLFullyConnectedLayer _fully_connected_output; + CLPixelWiseMultiplication _pixelwise_mul_output_state1; + CLArithmeticAddition _accum_output1; + CLActivationLayer _activation_output; + CLActivationLayer _activation_output_state; + CLPixelWiseMultiplication _pixelwise_mul_output_state2; + CLFullyConnectedLayer _fully_connected_output_state; + CLActivationLayer _projection_clip; + CLCopy _copy_cell_state; + CLCopy _copy_output; + CLConcatenateLayer _concat_scratch_buffer; + CLConcatenateLayer _concat_inputs_forget_gate; + CLConcatenateLayer _concat_weights_forget_gate; + CLConcatenateLayer _concat_weights_input_gate; + CLConcatenateLayer _concat_weights_output; + CLFill _ones_fill; + CLMeanStdDevNormalizationLayer _mean_std_norm_input_gate; + CLPixelWiseMultiplication _pixelwise_mul_input_gate_coeff; + CLArithmeticAddition _accum_input_gate_bias; + CLMeanStdDevNormalizationLayer _mean_std_norm_forget_gate; + CLPixelWiseMultiplication _pixelwise_mul_forget_gate_coeff; + CLArithmeticAddition _accum_forget_gate_bias; + CLMeanStdDevNormalizationLayer _mean_std_norm_cell_gate; + CLPixelWiseMultiplication _pixelwise_mul_cell_gate_coeff; + CLArithmeticAddition _accum_cell_gate_bias; + CLMeanStdDevNormalizationLayer _mean_std_norm_output_gate; + CLPixelWiseMultiplication _pixelwise_mul_output_gate_coeff; + CLArithmeticAddition _accum_output_gate_bias; + CLTensor _input_gate_out1; + CLTensor _input_gate_out2; + CLTensor _input_gate_out3; + CLTensor _input_gate_out4; + CLTensor _forget_gate_out1; + CLTensor _forget_gate_out2; + CLTensor _forget_gate_out3; + CLTensor _forget_gate_out4; + CLTensor _forget_gate_out5; + CLTensor _forget_gate_out6; + CLTensor _cell_state_out1; + CLTensor _cell_state_out2; + CLTensor _cell_state_out3; + CLTensor _cell_state_out4; + CLTensor _cell_state_out5; + CLTensor _output1; + CLTensor _output2; + CLTensor _output3; + CLTensor _output4; + CLTensor _cell_state_activation; + CLTensor _output_state1; + CLTensor _ones; + CLTensor _input_layer_norm_out1; + CLTensor _input_layer_norm_out2; + CLTensor _forget_layer_norm_out1; + CLTensor _forget_layer_norm_out2; + CLTensor _cell_layer_norm_out1; + CLTensor _cell_layer_norm_out2; + CLTensor _output_layer_norm_out1; + CLTensor _output_layer_norm_out2; + bool _run_peephole_opt; + bool _run_cifg_opt; + bool _perform_cell_clipping; + bool _has_projection_weights; + bool _perform_projection_clipping; + bool _is_prepared; + bool _is_layer_norm_lstm; + const ICLTensor *_recurrent_to_cell_weights{ nullptr }; }; } // namespace arm_compute #endif /* ARM_COMPUTE_CLLSTMLAYER_H */ -- cgit v1.2.1