From 2b84be544e4a27f7e8e80827e9c85c8f0d58b4ce Mon Sep 17 00:00:00 2001 From: Manuel Bottini Date: Wed, 8 Apr 2020 10:15:51 +0100 Subject: COMPMID-3280: Make all ML primitives for CL use the new interface - Part 2 - CLFunctions have been updated Change-Id: Ie3256a6c775bc12f3126482bd8e8a46da54b267c Signed-off-by: Manuel Bottini Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3053 Reviewed-by: Georgios Pinitas Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins --- src/runtime/CL/functions/CLLSTMLayerQuantized.cpp | 68 +++++++++++++---------- 1 file changed, 40 insertions(+), 28 deletions(-) (limited to 'src/runtime/CL/functions/CLLSTMLayerQuantized.cpp') diff --git a/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp b/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp index e5f127825b..c57fcc9f21 100644 --- a/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp +++ b/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -61,6 +61,18 @@ void CLLSTMLayerQuantized::configure(const ICLTensor *input, const ICLTensor *input_gate_bias, const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias, ICLTensor *cell_state_in, const ICLTensor *output_state_in, ICLTensor *cell_state_out, ICLTensor *output_state_out) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, input_to_input_weights, input_to_forget_weights, input_to_cell_weights, input_to_output_weights, recurrent_to_input_weights, + recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights, input_gate_bias, forget_gate_bias, cell_bias, output_gate_bias, cell_state_in, output_state_in, cell_state_out, + output_state_out); +} + +void CLLSTMLayerQuantized::configure(const CLCompileContext &compile_context, const ICLTensor *input, + const ICLTensor *input_to_input_weights, const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights, + const ICLTensor *recurrent_to_input_weights, const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights, + const ICLTensor *input_gate_bias, const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias, + ICLTensor *cell_state_in, const ICLTensor *output_state_in, + ICLTensor *cell_state_out, ICLTensor *output_state_out) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, input_to_input_weights, input_to_forget_weights, input_to_cell_weights, input_to_output_weights, recurrent_to_input_weights, recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights, @@ -107,18 +119,18 @@ void CLLSTMLayerQuantized::configure(const ICLTensor *input, recurrent_weights_vector.emplace_back(recurrent_to_output_weights); _input_weights.allocator()->init(TensorInfo(TensorShape(input_size, 4 * output_size), 1, DataType::QASYMM8, qweights)); - _concat_input_weights.configure(inputs_weights_vector, &_input_weights, Window::DimY); + _concat_input_weights.configure(compile_context, inputs_weights_vector, &_input_weights, Window::DimY); _recurrent_weights.allocator()->init(TensorInfo(TensorShape(output_size, 4 * output_size), 1, DataType::QASYMM8, qweights)); - _concat_recurrent_weights.configure(recurrent_weights_vector, &_recurrent_weights, Window::DimY); + _concat_recurrent_weights.configure(compile_context, recurrent_weights_vector, &_recurrent_weights, Window::DimY); std::vector weights_vector; weights_vector.emplace_back(&_recurrent_weights); weights_vector.emplace_back(&_input_weights); _weights.allocator()->init(TensorInfo(TensorShape(output_size + input_size, 4 * output_size), 1, DataType::QASYMM8, qweights)); - _concat_weights.configure(weights_vector, &_weights, Window::DimX); - _transpose_weights.configure(&_weights, &_weights_transposed); + _concat_weights.configure(compile_context, weights_vector, &_weights, Window::DimX); + _transpose_weights.configure(compile_context, &_weights, &_weights_transposed); // Input concatenation std::vector input_vector; @@ -127,7 +139,7 @@ void CLLSTMLayerQuantized::configure(const ICLTensor *input, _memory_group.manage(&_input); _input.allocator()->init(TensorInfo(TensorShape(output_size + input_size, batch_size), 1, DataType::QASYMM8, qasymm)); - _concat_inputs.configure(input_vector, &_input, Window::DimX); + _concat_inputs.configure(compile_context, input_vector, &_input, Window::DimX); // Bias concatenation std::vector bias_vector; @@ -137,7 +149,7 @@ void CLLSTMLayerQuantized::configure(const ICLTensor *input, bias_vector.emplace_back(output_gate_bias); _bias.allocator()->init(TensorInfo(TensorShape(4 * output_size), 1, DataType::S32)); - _concat_bias.configure(bias_vector, &_bias, Window::DimX); + _concat_bias.configure(compile_context, bias_vector, &_bias, Window::DimX); // Invert the offset for gemmlowp _input.info()->set_quantization_info(QuantizationInfo(qasymm.uniform().scale, -qasymm.uniform().offset)); @@ -146,7 +158,7 @@ void CLLSTMLayerQuantized::configure(const ICLTensor *input, // Run gemmlowp _memory_group.manage(&_output_highp); _output_highp.allocator()->init(TensorInfo(TensorShape(4 * output_size, batch_size), 1, DataType::S32)); - _gemmlowp.configure(&_input, &_weights_transposed, nullptr, &_output_highp); + _gemmlowp.configure(compile_context, &_input, &_weights_transposed, nullptr, &_output_highp); _input.allocator()->allocate(); // Set the offset back @@ -162,7 +174,7 @@ void CLLSTMLayerQuantized::configure(const ICLTensor *input, quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift); _memory_group.manage(&_output_lowp); - _output_stage.configure(&_output_highp, &_bias, &_output_lowp, output_multiplier, output_shift); + _output_stage.configure(compile_context, &_output_highp, &_bias, &_output_lowp, output_multiplier, output_shift); _output_highp.allocator()->allocate(); _bias.allocator()->allocate(); @@ -170,86 +182,86 @@ void CLLSTMLayerQuantized::configure(const ICLTensor *input, if(batch_size > 1) { _memory_group.manage(&_input_gate_input); - _slice_input_tensor.configure(&_output_lowp, &_input_gate_input, { 0, 0 }, { output_size, batch_size }); + _slice_input_tensor.configure(compile_context, &_output_lowp, &_input_gate_input, { 0, 0 }, { output_size, batch_size }); _memory_group.manage(&_forget_gate_input); - _slice_forget_tensor.configure(&_output_lowp, &_forget_gate_input, { output_size, 0 }, { 2 * output_size, batch_size }); + _slice_forget_tensor.configure(compile_context, &_output_lowp, &_forget_gate_input, { output_size, 0 }, { 2 * output_size, batch_size }); _memory_group.manage(&_input_modulation_gate_input); - _slice_cell_tensor.configure(&_output_lowp, &_input_modulation_gate_input, { 2 * output_size, 0 }, { 3 * output_size, batch_size }); + _slice_cell_tensor.configure(compile_context, &_output_lowp, &_input_modulation_gate_input, { 2 * output_size, 0 }, { 3 * output_size, batch_size }); _memory_group.manage(&_output_gate_input); - _slice_output_tensor.configure(&_output_lowp, &_output_gate_input, { 3 * output_size, 0 }, { 4 * output_size, batch_size }); + _slice_output_tensor.configure(compile_context, &_output_lowp, &_output_gate_input, { 3 * output_size, 0 }, { 4 * output_size, batch_size }); _output_lowp.allocator()->allocate(); } else { _memory_group.manage(&_input_gate_input); - _slice_input_tensor.configure(&_output_lowp, &_input_gate_input, { 0 }, { output_size }); + _slice_input_tensor.configure(compile_context, &_output_lowp, &_input_gate_input, { 0 }, { output_size }); _memory_group.manage(&_forget_gate_input); - _slice_forget_tensor.configure(&_output_lowp, &_forget_gate_input, { output_size }, { 2 * output_size }); + _slice_forget_tensor.configure(compile_context, &_output_lowp, &_forget_gate_input, { output_size }, { 2 * output_size }); _memory_group.manage(&_input_modulation_gate_input); - _slice_cell_tensor.configure(&_output_lowp, &_input_modulation_gate_input, { 2 * output_size }, { 3 * output_size }); + _slice_cell_tensor.configure(compile_context, &_output_lowp, &_input_modulation_gate_input, { 2 * output_size }, { 3 * output_size }); _memory_group.manage(&_output_gate_input); - _slice_output_tensor.configure(&_output_lowp, &_output_gate_input, { 3 * output_size }, { 4 * output_size }); + _slice_output_tensor.configure(compile_context, &_output_lowp, &_output_gate_input, { 3 * output_size }, { 4 * output_size }); _output_lowp.allocator()->allocate(); } // Forget gate _memory_group.manage(&_forget_gate_output); _forget_gate_output.allocator()->init(TensorInfo(_forget_gate_input.info()->tensor_shape(), 1, DataType::QSYMM16, qsymm_0)); - _sigmoid_forget_gate.configure(&_forget_gate_input, &_forget_gate_output, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC)); + _sigmoid_forget_gate.configure(compile_context, &_forget_gate_input, &_forget_gate_output, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC)); _forget_gate_input.allocator()->allocate(); // Input gate _memory_group.manage(&_input_gate_output); _input_gate_output.allocator()->init(TensorInfo(_input_gate_input.info()->tensor_shape(), 1, DataType::QSYMM16, qsymm_0)); - _sigmoid_input_gate.configure(&_input_gate_input, &_input_gate_output, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC)); + _sigmoid_input_gate.configure(compile_context, &_input_gate_input, &_input_gate_output, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC)); _input_gate_input.allocator()->allocate(); // Input modulation gate equation _memory_group.manage(&_input_modulation_gate_output); _input_modulation_gate_output.allocator()->init(TensorInfo(_input_modulation_gate_input.info()->tensor_shape(), 1, DataType::QSYMM16, qsymm_0)); - _tanh_modulation_gate.configure(&_input_modulation_gate_input, &_input_modulation_gate_output, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f)); + _tanh_modulation_gate.configure(compile_context, &_input_modulation_gate_input, &_input_modulation_gate_output, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f)); _input_modulation_gate_input.allocator()->allocate(); // Output gate _memory_group.manage(&_output_gate_output); _output_gate_output.allocator()->init(TensorInfo(_output_gate_input.info()->tensor_shape(), 1, DataType::QSYMM16, qsymm_0)); - _sigmoid_output_gate.configure(&_output_gate_input, &_output_gate_output, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC)); + _sigmoid_output_gate.configure(compile_context, &_output_gate_input, &_output_gate_output, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC)); _output_gate_input.allocator()->allocate(); // Long term memory _memory_group.manage(&_cell_state_tmp1); _cell_state_tmp1.allocator()->init(TensorInfo(_forget_gate_output.info()->tensor_shape(), 1, DataType::QSYMM16, qsymm_4)); - _mul_forget_gate_cell_state.configure(&_forget_gate_output, cell_state_in, &_cell_state_tmp1, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO); + _mul_forget_gate_cell_state.configure(compile_context, &_forget_gate_output, cell_state_in, &_cell_state_tmp1, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO); _forget_gate_output.allocator()->allocate(); _memory_group.manage(&_cell_state_tmp2); _cell_state_tmp2.allocator()->init(TensorInfo(_input_gate_output.info()->tensor_shape(), 1, DataType::QSYMM16, qsymm_4)); - _mul_input_gate_input_mod_gate.configure(&_input_gate_output, &_input_modulation_gate_output, &_cell_state_tmp2, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO); + _mul_input_gate_input_mod_gate.configure(compile_context, &_input_gate_output, &_input_modulation_gate_output, &_cell_state_tmp2, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO); _input_modulation_gate_output.allocator()->allocate(); _input_gate_output.allocator()->allocate(); - _add_cell_state_tmps.configure(&_cell_state_tmp1, &_cell_state_tmp2, cell_state_out, ConvertPolicy::SATURATE); + _add_cell_state_tmps.configure(compile_context, &_cell_state_tmp1, &_cell_state_tmp2, cell_state_out, ConvertPolicy::SATURATE); _cell_state_tmp1.allocator()->allocate(); _cell_state_tmp2.allocator()->allocate(); // Short term memory _memory_group.manage(&_output_state_tmp); _output_state_tmp.allocator()->init(TensorInfo(cell_state_out->info()->tensor_shape(), 1, DataType::QSYMM16, qsymm_0)); - _tanh_output_state.configure(cell_state_out, &_output_state_tmp, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f)); + _tanh_output_state.configure(compile_context, cell_state_out, &_output_state_tmp, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f)); _memory_group.manage(&_output_state_out_symm); _output_state_out_symm.allocator()->init(TensorInfo(_output_gate_output.info()->tensor_shape(), 1, DataType::QSYMM16, qsymm_0)); - _mul_output_state_tmp_output_gate.configure(&_output_state_tmp, &_output_gate_output, &_output_state_out_symm, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO); + _mul_output_state_tmp_output_gate.configure(compile_context, &_output_state_tmp, &_output_gate_output, &_output_state_out_symm, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO); _output_gate_output.allocator()->allocate(); _output_state_tmp.allocator()->allocate(); // Requantize the output state from QSYMM16 to QASYMM8 _memory_group.manage(&_output_state_out_f32); _output_state_out_f32.allocator()->init(TensorInfo(_output_state_out_symm.info()->tensor_shape(), 1, DataType::F32)); - _dequantize.configure(&_output_state_out_symm, &_output_state_out_f32); + _dequantize.configure(compile_context, &_output_state_out_symm, &_output_state_out_f32); _output_state_out_symm.allocator()->allocate(); - _quantize.configure(&_output_state_out_f32, output_state_out); + _quantize.configure(compile_context, &_output_state_out_f32, output_state_out); _output_state_out_f32.allocator()->allocate(); } -- cgit v1.2.1