aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp
diff options
context:
space:
mode:
authorManuel Bottini <manuel.bottini@arm.com>2020-04-08 10:15:51 +0100
committerManuel Bottini <manuel.bottini@arm.com>2020-04-23 17:53:59 +0000
commit2b84be544e4a27f7e8e80827e9c85c8f0d58b4ce (patch)
tree078051a911f9b8883a3f11955cfd3b7ba0d7d9f3 /src/runtime/CL/functions/CLLSTMLayerQuantized.cpp
parent0de45d0a8009e19331c4e29d617fa183167c513a (diff)
downloadComputeLibrary-2b84be544e4a27f7e8e80827e9c85c8f0d58b4ce.tar.gz
COMPMID-3280: Make all ML primitives for CL use the new interface - Part 2
- CLFunctions have been updated Change-Id: Ie3256a6c775bc12f3126482bd8e8a46da54b267c Signed-off-by: Manuel Bottini <manuel.bottini@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3053 Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/runtime/CL/functions/CLLSTMLayerQuantized.cpp')
-rw-r--r--src/runtime/CL/functions/CLLSTMLayerQuantized.cpp68
1 files changed, 40 insertions, 28 deletions
diff --git a/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp b/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp
index e5f127825b..c57fcc9f21 100644
--- a/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp
+++ b/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -62,6 +62,18 @@ void CLLSTMLayerQuantized::configure(const ICLTensor *input,
ICLTensor *cell_state_in, const ICLTensor *output_state_in,
ICLTensor *cell_state_out, ICLTensor *output_state_out)
{
+ configure(CLKernelLibrary::get().get_compile_context(), input, input_to_input_weights, input_to_forget_weights, input_to_cell_weights, input_to_output_weights, recurrent_to_input_weights,
+ recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights, input_gate_bias, forget_gate_bias, cell_bias, output_gate_bias, cell_state_in, output_state_in, cell_state_out,
+ output_state_out);
+}
+
+void CLLSTMLayerQuantized::configure(const CLCompileContext &compile_context, const ICLTensor *input,
+ const ICLTensor *input_to_input_weights, const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights,
+ const ICLTensor *recurrent_to_input_weights, const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights,
+ const ICLTensor *input_gate_bias, const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias,
+ ICLTensor *cell_state_in, const ICLTensor *output_state_in,
+ ICLTensor *cell_state_out, ICLTensor *output_state_out)
+{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, input_to_input_weights, input_to_forget_weights, input_to_cell_weights, input_to_output_weights,
recurrent_to_input_weights, recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights,
input_gate_bias, forget_gate_bias, cell_bias, output_gate_bias, cell_state_in, output_state_in, cell_state_out, output_state_out);
@@ -107,18 +119,18 @@ void CLLSTMLayerQuantized::configure(const ICLTensor *input,
recurrent_weights_vector.emplace_back(recurrent_to_output_weights);
_input_weights.allocator()->init(TensorInfo(TensorShape(input_size, 4 * output_size), 1, DataType::QASYMM8, qweights));
- _concat_input_weights.configure(inputs_weights_vector, &_input_weights, Window::DimY);
+ _concat_input_weights.configure(compile_context, inputs_weights_vector, &_input_weights, Window::DimY);
_recurrent_weights.allocator()->init(TensorInfo(TensorShape(output_size, 4 * output_size), 1, DataType::QASYMM8, qweights));
- _concat_recurrent_weights.configure(recurrent_weights_vector, &_recurrent_weights, Window::DimY);
+ _concat_recurrent_weights.configure(compile_context, recurrent_weights_vector, &_recurrent_weights, Window::DimY);
std::vector<const ICLTensor *> weights_vector;
weights_vector.emplace_back(&_recurrent_weights);
weights_vector.emplace_back(&_input_weights);
_weights.allocator()->init(TensorInfo(TensorShape(output_size + input_size, 4 * output_size), 1, DataType::QASYMM8, qweights));
- _concat_weights.configure(weights_vector, &_weights, Window::DimX);
- _transpose_weights.configure(&_weights, &_weights_transposed);
+ _concat_weights.configure(compile_context, weights_vector, &_weights, Window::DimX);
+ _transpose_weights.configure(compile_context, &_weights, &_weights_transposed);
// Input concatenation
std::vector<const ICLTensor *> input_vector;
@@ -127,7 +139,7 @@ void CLLSTMLayerQuantized::configure(const ICLTensor *input,
_memory_group.manage(&_input);
_input.allocator()->init(TensorInfo(TensorShape(output_size + input_size, batch_size), 1, DataType::QASYMM8, qasymm));
- _concat_inputs.configure(input_vector, &_input, Window::DimX);
+ _concat_inputs.configure(compile_context, input_vector, &_input, Window::DimX);
// Bias concatenation
std::vector<const ICLTensor *> bias_vector;
@@ -137,7 +149,7 @@ void CLLSTMLayerQuantized::configure(const ICLTensor *input,
bias_vector.emplace_back(output_gate_bias);
_bias.allocator()->init(TensorInfo(TensorShape(4 * output_size), 1, DataType::S32));
- _concat_bias.configure(bias_vector, &_bias, Window::DimX);
+ _concat_bias.configure(compile_context, bias_vector, &_bias, Window::DimX);
// Invert the offset for gemmlowp
_input.info()->set_quantization_info(QuantizationInfo(qasymm.uniform().scale, -qasymm.uniform().offset));
@@ -146,7 +158,7 @@ void CLLSTMLayerQuantized::configure(const ICLTensor *input,
// Run gemmlowp
_memory_group.manage(&_output_highp);
_output_highp.allocator()->init(TensorInfo(TensorShape(4 * output_size, batch_size), 1, DataType::S32));
- _gemmlowp.configure(&_input, &_weights_transposed, nullptr, &_output_highp);
+ _gemmlowp.configure(compile_context, &_input, &_weights_transposed, nullptr, &_output_highp);
_input.allocator()->allocate();
// Set the offset back
@@ -162,7 +174,7 @@ void CLLSTMLayerQuantized::configure(const ICLTensor *input,
quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift);
_memory_group.manage(&_output_lowp);
- _output_stage.configure(&_output_highp, &_bias, &_output_lowp, output_multiplier, output_shift);
+ _output_stage.configure(compile_context, &_output_highp, &_bias, &_output_lowp, output_multiplier, output_shift);
_output_highp.allocator()->allocate();
_bias.allocator()->allocate();
@@ -170,86 +182,86 @@ void CLLSTMLayerQuantized::configure(const ICLTensor *input,
if(batch_size > 1)
{
_memory_group.manage(&_input_gate_input);
- _slice_input_tensor.configure(&_output_lowp, &_input_gate_input, { 0, 0 }, { output_size, batch_size });
+ _slice_input_tensor.configure(compile_context, &_output_lowp, &_input_gate_input, { 0, 0 }, { output_size, batch_size });
_memory_group.manage(&_forget_gate_input);
- _slice_forget_tensor.configure(&_output_lowp, &_forget_gate_input, { output_size, 0 }, { 2 * output_size, batch_size });
+ _slice_forget_tensor.configure(compile_context, &_output_lowp, &_forget_gate_input, { output_size, 0 }, { 2 * output_size, batch_size });
_memory_group.manage(&_input_modulation_gate_input);
- _slice_cell_tensor.configure(&_output_lowp, &_input_modulation_gate_input, { 2 * output_size, 0 }, { 3 * output_size, batch_size });
+ _slice_cell_tensor.configure(compile_context, &_output_lowp, &_input_modulation_gate_input, { 2 * output_size, 0 }, { 3 * output_size, batch_size });
_memory_group.manage(&_output_gate_input);
- _slice_output_tensor.configure(&_output_lowp, &_output_gate_input, { 3 * output_size, 0 }, { 4 * output_size, batch_size });
+ _slice_output_tensor.configure(compile_context, &_output_lowp, &_output_gate_input, { 3 * output_size, 0 }, { 4 * output_size, batch_size });
_output_lowp.allocator()->allocate();
}
else
{
_memory_group.manage(&_input_gate_input);
- _slice_input_tensor.configure(&_output_lowp, &_input_gate_input, { 0 }, { output_size });
+ _slice_input_tensor.configure(compile_context, &_output_lowp, &_input_gate_input, { 0 }, { output_size });
_memory_group.manage(&_forget_gate_input);
- _slice_forget_tensor.configure(&_output_lowp, &_forget_gate_input, { output_size }, { 2 * output_size });
+ _slice_forget_tensor.configure(compile_context, &_output_lowp, &_forget_gate_input, { output_size }, { 2 * output_size });
_memory_group.manage(&_input_modulation_gate_input);
- _slice_cell_tensor.configure(&_output_lowp, &_input_modulation_gate_input, { 2 * output_size }, { 3 * output_size });
+ _slice_cell_tensor.configure(compile_context, &_output_lowp, &_input_modulation_gate_input, { 2 * output_size }, { 3 * output_size });
_memory_group.manage(&_output_gate_input);
- _slice_output_tensor.configure(&_output_lowp, &_output_gate_input, { 3 * output_size }, { 4 * output_size });
+ _slice_output_tensor.configure(compile_context, &_output_lowp, &_output_gate_input, { 3 * output_size }, { 4 * output_size });
_output_lowp.allocator()->allocate();
}
// Forget gate
_memory_group.manage(&_forget_gate_output);
_forget_gate_output.allocator()->init(TensorInfo(_forget_gate_input.info()->tensor_shape(), 1, DataType::QSYMM16, qsymm_0));
- _sigmoid_forget_gate.configure(&_forget_gate_input, &_forget_gate_output, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
+ _sigmoid_forget_gate.configure(compile_context, &_forget_gate_input, &_forget_gate_output, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
_forget_gate_input.allocator()->allocate();
// Input gate
_memory_group.manage(&_input_gate_output);
_input_gate_output.allocator()->init(TensorInfo(_input_gate_input.info()->tensor_shape(), 1, DataType::QSYMM16, qsymm_0));
- _sigmoid_input_gate.configure(&_input_gate_input, &_input_gate_output, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
+ _sigmoid_input_gate.configure(compile_context, &_input_gate_input, &_input_gate_output, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
_input_gate_input.allocator()->allocate();
// Input modulation gate equation
_memory_group.manage(&_input_modulation_gate_output);
_input_modulation_gate_output.allocator()->init(TensorInfo(_input_modulation_gate_input.info()->tensor_shape(), 1, DataType::QSYMM16, qsymm_0));
- _tanh_modulation_gate.configure(&_input_modulation_gate_input, &_input_modulation_gate_output, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f));
+ _tanh_modulation_gate.configure(compile_context, &_input_modulation_gate_input, &_input_modulation_gate_output, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f));
_input_modulation_gate_input.allocator()->allocate();
// Output gate
_memory_group.manage(&_output_gate_output);
_output_gate_output.allocator()->init(TensorInfo(_output_gate_input.info()->tensor_shape(), 1, DataType::QSYMM16, qsymm_0));
- _sigmoid_output_gate.configure(&_output_gate_input, &_output_gate_output, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
+ _sigmoid_output_gate.configure(compile_context, &_output_gate_input, &_output_gate_output, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
_output_gate_input.allocator()->allocate();
// Long term memory
_memory_group.manage(&_cell_state_tmp1);
_cell_state_tmp1.allocator()->init(TensorInfo(_forget_gate_output.info()->tensor_shape(), 1, DataType::QSYMM16, qsymm_4));
- _mul_forget_gate_cell_state.configure(&_forget_gate_output, cell_state_in, &_cell_state_tmp1, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
+ _mul_forget_gate_cell_state.configure(compile_context, &_forget_gate_output, cell_state_in, &_cell_state_tmp1, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
_forget_gate_output.allocator()->allocate();
_memory_group.manage(&_cell_state_tmp2);
_cell_state_tmp2.allocator()->init(TensorInfo(_input_gate_output.info()->tensor_shape(), 1, DataType::QSYMM16, qsymm_4));
- _mul_input_gate_input_mod_gate.configure(&_input_gate_output, &_input_modulation_gate_output, &_cell_state_tmp2, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
+ _mul_input_gate_input_mod_gate.configure(compile_context, &_input_gate_output, &_input_modulation_gate_output, &_cell_state_tmp2, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
_input_modulation_gate_output.allocator()->allocate();
_input_gate_output.allocator()->allocate();
- _add_cell_state_tmps.configure(&_cell_state_tmp1, &_cell_state_tmp2, cell_state_out, ConvertPolicy::SATURATE);
+ _add_cell_state_tmps.configure(compile_context, &_cell_state_tmp1, &_cell_state_tmp2, cell_state_out, ConvertPolicy::SATURATE);
_cell_state_tmp1.allocator()->allocate();
_cell_state_tmp2.allocator()->allocate();
// Short term memory
_memory_group.manage(&_output_state_tmp);
_output_state_tmp.allocator()->init(TensorInfo(cell_state_out->info()->tensor_shape(), 1, DataType::QSYMM16, qsymm_0));
- _tanh_output_state.configure(cell_state_out, &_output_state_tmp, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f));
+ _tanh_output_state.configure(compile_context, cell_state_out, &_output_state_tmp, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f));
_memory_group.manage(&_output_state_out_symm);
_output_state_out_symm.allocator()->init(TensorInfo(_output_gate_output.info()->tensor_shape(), 1, DataType::QSYMM16, qsymm_0));
- _mul_output_state_tmp_output_gate.configure(&_output_state_tmp, &_output_gate_output, &_output_state_out_symm, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
+ _mul_output_state_tmp_output_gate.configure(compile_context, &_output_state_tmp, &_output_gate_output, &_output_state_out_symm, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
_output_gate_output.allocator()->allocate();
_output_state_tmp.allocator()->allocate();
// Requantize the output state from QSYMM16 to QASYMM8
_memory_group.manage(&_output_state_out_f32);
_output_state_out_f32.allocator()->init(TensorInfo(_output_state_out_symm.info()->tensor_shape(), 1, DataType::F32));
- _dequantize.configure(&_output_state_out_symm, &_output_state_out_f32);
+ _dequantize.configure(compile_context, &_output_state_out_symm, &_output_state_out_f32);
_output_state_out_symm.allocator()->allocate();
- _quantize.configure(&_output_state_out_f32, output_state_out);
+ _quantize.configure(compile_context, &_output_state_out_f32, output_state_out);
_output_state_out_f32.allocator()->allocate();
}