79 const uint32_t numBatches = inputShape[0];
80 const uint32_t inputSize = inputShape[1];
81 const uint32_t outputSize = outputStateInShape[1];
82 const uint32_t numUnits = cellStateInShape[1];
91 std::unique_ptr<Decoder<float>> inputDecoder =
92 MakeDecoder<float>(inputInfo, inputs[0]->Map());
93 std::unique_ptr<Decoder<float>> outputStateInDecoder =
94 MakeDecoder<float>(outputStateInInfo, inputs[1]->Map());
95 std::unique_ptr<Decoder<float>> cellStateInDecoder =
96 MakeDecoder<float>(cellStateInInfo, inputs[2]->Map());
99 std::unique_ptr<Decoder<float>> outputStateOutDecoder =
100 MakeDecoder<float>(outputStateOutInfo, outputs[0]->Map());
101 std::unique_ptr<Decoder<float>> cellStateOutDecoder =
102 MakeDecoder<float>(cellStateOutInfo, outputs[1]->Map());
103 std::unique_ptr<Decoder<float>> outputDecoder =
104 MakeDecoder<float>(outputInfo, outputs[2]->Map());
107 std::unique_ptr<Encoder<float>> outputStateOutEncoder =
108 MakeEncoder<float>(outputStateOutInfo, outputs[0]->Map());
109 std::unique_ptr<Encoder<float>> cellStateOutEncoder =
110 MakeEncoder<float>(cellStateOutInfo, outputs[1]->Map());
111 std::unique_ptr<Encoder<float>> outputEncoder =
112 MakeEncoder<float>(outputInfo, outputs[2]->Map());
115 std::unique_ptr<Decoder<float>> inputToForgetWeightsDecoder = MakeDecoder<float>(
116 m_InputToForgetWeightsTensor->GetTensorInfo(), m_InputToForgetWeightsTensor->GetConstTensor<
void>());
117 std::unique_ptr<Decoder<float>> inputToCellWeightsDecoder = MakeDecoder<float>(
118 m_InputToCellWeightsTensor->GetTensorInfo(), m_InputToCellWeightsTensor->GetConstTensor<
void>());
119 std::unique_ptr<Decoder<float>> inputToOutputWeightsDecoder = MakeDecoder<float>(
120 m_InputToOutputWeightsTensor->GetTensorInfo(), m_InputToOutputWeightsTensor->GetConstTensor<
void>());
122 std::unique_ptr<Decoder<float>> recurrentToForgetWeightsDecoder = MakeDecoder<float>(
123 m_RecurrentToForgetWeightsTensor->GetTensorInfo(),
124 m_RecurrentToForgetWeightsTensor->GetConstTensor<
void>());
125 std::unique_ptr<Decoder<float>> recurrentToCellWeightsDecoder = MakeDecoder<float>(
126 m_RecurrentToCellWeightsTensor->GetTensorInfo(), m_RecurrentToCellWeightsTensor->GetConstTensor<
void>());
127 std::unique_ptr<Decoder<float>> recurrentToOutputWeightsDecoder = MakeDecoder<float>(
128 m_RecurrentToOutputWeightsTensor->GetTensorInfo(),
129 m_RecurrentToOutputWeightsTensor->GetConstTensor<
void>());
132 std::unique_ptr<Decoder<float>> inputToInputWeightsDecoder;
133 std::unique_ptr<Decoder<float>> recurrentToInputWeightsDecoder;
134 std::unique_ptr<Decoder<float>> inputGateBiasDecoder;
137 std::unique_ptr<Decoder<float>> cellToInputWeightsDecoder;
138 std::unique_ptr<Decoder<float>> cellToForgetWeightsDecoder;
139 std::unique_ptr<Decoder<float>> cellToOutputWeightsDecoder;
142 std::unique_ptr<Decoder<float>> projectionWeightsDecoder;
143 std::unique_ptr<Decoder<float>> projectionBiasDecoder;
146 std::unique_ptr<Decoder<float>> inputLayerNormWeightsDecoder;
147 std::unique_ptr<Decoder<float>> forgetLayerNormWeightsDecoder;
148 std::unique_ptr<Decoder<float>> cellLayerNormWeightsDecoder;
149 std::unique_ptr<Decoder<float>> outputLayerNormWeightsDecoder;
152 std::unique_ptr<Decoder<float>> forgetGateBiasDecoder;
153 std::unique_ptr<Decoder<float>> cellGateBiasDecoder;
154 std::unique_ptr<Decoder<float>> outputGateBiasDecoder;
157 const uint32_t stateTensorSize = numBatches * numUnits;
158 std::vector<int16_t> inputGateData(stateTensorSize);
159 std::vector<int16_t> cellGateData(stateTensorSize);
160 std::vector<int16_t> forgetGateData(stateTensorSize);
161 std::vector<int16_t> outputGateData(stateTensorSize);
162 std::vector<int32_t> hiddenStateData(stateTensorSize);
163 std::vector<int16_t> outputInt16Data(numBatches * outputSize);
179 outputInfo.GetQuantizationScale(),
180 outputInfo.GetQuantizationOffset());
183 std::unique_ptr<Decoder<float>> inputGateDecoder =
184 MakeDecoder<float>(inputGateInfo, inputGateData.data());
185 std::unique_ptr<Decoder<float>> cellGateDecoder =
186 MakeDecoder<float>(cellGateInfo, cellGateData.data());
187 std::unique_ptr<Decoder<float>> forgetGateDecoder =
188 MakeDecoder<float>(forgetGateInfo, forgetGateData.data());
189 std::unique_ptr<Decoder<float>> outputGateDecoder =
190 MakeDecoder<float>(outputGateInfo, outputGateData.data());
191 std::unique_ptr<Decoder<float>> hiddenStateDecoder =
192 MakeDecoder<float>(hiddenStateInfo, hiddenStateData.data());
194 std::unique_ptr<Encoder<float>> inputGateEncoder =
195 MakeEncoder<float>(inputGateInfo, inputGateData.data());
196 std::unique_ptr<Encoder<float>> cellGateEncoder =
197 MakeEncoder<float>(cellGateInfo, cellGateData.data());
198 std::unique_ptr<Encoder<float>> forgetGateEncoder =
199 MakeEncoder<float>(forgetGateInfo, forgetGateData.data());
200 std::unique_ptr<Encoder<float>> outputGateEncoder =
201 MakeEncoder<float>(outputGateInfo, outputGateData.data());
202 std::unique_ptr<Encoder<float>> hiddenStateEncoder =
203 MakeEncoder<float>(hiddenStateInfo, hiddenStateData.data());
206 std::unique_ptr<Decoder<float>> outputInt16Decoder =
207 MakeDecoder<float>(outputInt16Info, outputInt16Data.data());
208 std::unique_ptr<Encoder<float>> outputInt16Encoder =
209 MakeEncoder<float>(outputInt16Info, outputInt16Data.data());
214 inputToInputWeightsDecoder = MakeDecoder<float>(
215 m_InputToInputWeightsTensor->GetTensorInfo(), m_InputToInputWeightsTensor->GetConstTensor<
void>());
216 recurrentToInputWeightsDecoder = MakeDecoder<float>(m_RecurrentToInputWeightsTensor->GetTensorInfo(),
217 m_RecurrentToInputWeightsTensor->GetConstTensor<
void>());
224 cellToInputWeightsDecoder = MakeDecoder<float>(
225 m_CellToInputWeightsTensor->GetTensorInfo(), m_CellToInputWeightsTensor->GetConstTensor<
void>());
227 cellToForgetWeightsDecoder = MakeDecoder<float>(
228 m_CellToForgetWeightsTensor->GetTensorInfo(), m_CellToForgetWeightsTensor->GetConstTensor<
void>());
229 cellToOutputWeightsDecoder = MakeDecoder<float>(
230 m_CellToOutputWeightsTensor->GetTensorInfo(), m_CellToOutputWeightsTensor->GetConstTensor<
void>());
233 if (projectionEnabled)
235 projectionWeightsDecoder = MakeDecoder<float>(
236 m_ProjectionWeightsTensor->GetTensorInfo(), m_ProjectionWeightsTensor->GetConstTensor<
void>());
237 if (m_ProjectionBiasTensor)
239 projectionBiasDecoder = MakeDecoder<float>(
240 m_ProjectionBiasTensor->GetTensorInfo(), m_ProjectionBiasTensor->GetConstTensor<
void>());
244 if (layerNormEnabled)
248 inputLayerNormWeightsDecoder = MakeDecoder<float>(m_InputLayerNormWeightsTensor->GetTensorInfo(),
249 m_InputLayerNormWeightsTensor->GetConstTensor<
void>());
253 m_InputLayerNormWeightsTensor->GetTensorInfo().GetQuantizationScale() / 1024, 0);
254 inputGateBiasDecoder = MakeDecoder<float>(
255 inputGateBiasTensorInfo, m_InputGateBiasTensor->GetConstTensor<
void>());
258 forgetLayerNormWeightsDecoder = MakeDecoder<float>(
259 m_ForgetLayerNormWeightsTensor->GetTensorInfo(),
260 m_ForgetLayerNormWeightsTensor->GetConstTensor<
void>());
261 cellLayerNormWeightsDecoder = MakeDecoder<float>(
262 m_CellLayerNormWeightsTensor->GetTensorInfo(), m_CellLayerNormWeightsTensor->GetConstTensor<
void>());
263 outputLayerNormWeightsDecoder = MakeDecoder<float>(
264 m_OutputLayerNormWeightsTensor->GetTensorInfo(),
265 m_OutputLayerNormWeightsTensor->GetConstTensor<
void>());
269 m_ForgetLayerNormWeightsTensor->GetTensorInfo().GetQuantizationScale() / 1024, 0);
270 forgetGateBiasDecoder = MakeDecoder<float>(
271 forgetGateBiasTensorInfo, m_ForgetGateBiasTensor->GetConstTensor<
void>());
274 m_CellLayerNormWeightsTensor->GetTensorInfo().GetQuantizationScale() / 1024, 0);
275 cellGateBiasDecoder = MakeDecoder<float>(
276 cellGateBiasTensorInfo, m_CellBiasTensor->GetConstTensor<
void>());
279 m_OutputLayerNormWeightsTensor->GetTensorInfo().GetQuantizationScale() / 1024, 0);
280 outputGateBiasDecoder = MakeDecoder<float>(
281 outputGateBiasTensorInfo, m_OutputGateBiasTensor->GetConstTensor<
void>());
287 ZeroVector(*inputGateEncoder, stateTensorSize);
289 ZeroVector(*forgetGateEncoder, stateTensorSize);
290 ZeroVector(*cellGateEncoder, stateTensorSize);
291 ZeroVector(*outputGateEncoder, stateTensorSize);
292 ZeroVector(*hiddenStateEncoder, stateTensorSize);
298 numUnits, inputSize, *inputDecoder, numBatches, *inputGateEncoder);
302 numUnits, inputSize, *inputDecoder, numBatches, *forgetGateEncoder);
305 numUnits, inputSize, *inputDecoder, numBatches, *cellGateEncoder);
308 numUnits, inputSize, *inputDecoder, numBatches, *outputGateEncoder);
314 numUnits, outputSize, *outputStateInDecoder, numBatches, *inputGateEncoder);
318 numUnits, outputSize, *outputStateInDecoder, numBatches, *forgetGateEncoder);
321 numUnits, outputSize, *outputStateInDecoder, numBatches, *cellGateEncoder);
324 numUnits, outputSize, *outputStateInDecoder, numBatches, *outputGateEncoder);
332 numUnits, *cellStateInDecoder, numBatches, *inputGateEncoder);
335 if (layerNormEnabled)
337 inputGateInfo.SetQuantizationScale(inputInfo.GetQuantizationScale() *
338 m_InputLayerNormWeightsTensor->GetTensorInfo().GetQuantizationScale() *
340 inputGateEncoder = MakeEncoder<float>(inputGateInfo, inputGateData.data());
343 *inputGateEncoder, numUnits, numBatches, m_LayerNormEpsilon);
345 inputGateDecoder = MakeDecoder<float>(inputGateInfo, inputGateData.data());
348 numUnits, *inputGateDecoder, numBatches, *inputGateEncoder);
350 inputGateInfo.SetQuantizationScale(1.f / 4096);
351 inputGateEncoder = MakeEncoder<float>(inputGateInfo, inputGateData.data());
354 numUnits, *inputGateDecoder, numBatches, *inputGateEncoder);
356 inputGateDecoder = MakeDecoder<float>(inputGateInfo, inputGateData.data());
359 inputGateInfo.SetQuantizationScale(cellStateOutInfo.GetQuantizationScale());
360 inputGateEncoder = MakeEncoder<float>(inputGateInfo, inputGateData.data());
363 Activation(*inputGateDecoder, *inputGateEncoder,
364 TensorInfo({numUnits, numBatches}, internalType),
367 inputGateDecoder = MakeDecoder<float>(inputGateInfo, inputGateData.data());
374 *cellStateInDecoder, numBatches, *forgetGateEncoder);
377 if (layerNormEnabled)
380 forgetGateInfo.SetQuantizationScale(inputInfo.GetQuantizationScale() *
381 m_ForgetLayerNormWeightsTensor->GetTensorInfo().GetQuantizationScale() *
383 forgetGateEncoder = MakeEncoder<float>(forgetGateInfo, forgetGateData.data());
388 *forgetGateEncoder, numUnits, numBatches, m_LayerNormEpsilon);
391 forgetGateDecoder = MakeDecoder<float>(forgetGateInfo, forgetGateData.data());
394 numUnits, *forgetGateDecoder, numBatches, *forgetGateEncoder);
398 forgetGateInfo.SetQuantizationScale(1.f / 4096);
399 forgetGateEncoder = MakeEncoder<float>(forgetGateInfo, forgetGateData.data());
402 numUnits, *forgetGateDecoder, numBatches, *forgetGateEncoder);
405 forgetGateDecoder = MakeDecoder<float>(forgetGateInfo, forgetGateData.data());
408 forgetGateInfo.SetQuantizationScale(cellStateOutInfo.GetQuantizationScale());
409 forgetGateEncoder = MakeEncoder<float>(forgetGateInfo, forgetGateData.data());
412 Activation(*forgetGateDecoder, *forgetGateEncoder,
413 TensorInfo({numUnits, numBatches}, internalType),
416 forgetGateDecoder = MakeDecoder<float>(forgetGateInfo, forgetGateData.data());
419 if (layerNormEnabled)
421 cellGateInfo.SetQuantizationScale(inputInfo.GetQuantizationScale() *
422 m_CellLayerNormWeightsTensor->GetTensorInfo().GetQuantizationScale() *
424 cellGateEncoder = MakeEncoder<float>(cellGateInfo, cellGateData.data());
428 cellGateDecoder = MakeDecoder<float>(cellGateInfo, cellGateData.data());
431 numUnits, *cellGateDecoder, numBatches, *cellGateEncoder);
433 cellGateInfo.SetQuantizationScale(1.f / 4096);
434 cellGateEncoder = MakeEncoder<float>(cellGateInfo, cellGateData.data());
437 numUnits, *cellGateDecoder, numBatches, *cellGateEncoder);
439 cellGateDecoder = MakeDecoder<float>(cellGateInfo, cellGateData.data());
442 cellGateInfo.SetQuantizationScale(cellStateOutInfo.GetQuantizationScale());
443 cellGateEncoder = MakeEncoder<float>(cellGateInfo, cellGateData.data());
446 Activation(*cellGateDecoder, *cellGateEncoder,
447 TensorInfo({numUnits, numBatches}, internalType),
450 cellGateDecoder = MakeDecoder<float>(cellGateInfo, cellGateData.data());
456 Sub1Vector(*forgetGateDecoder, stateTensorSize, *forgetGateEncoder);
458 *cellGateDecoder, *forgetGateDecoder, stateTensorSize, *cellStateOutEncoder);
463 *cellGateDecoder, *inputGateDecoder, stateTensorSize, *cellStateOutEncoder);
476 numUnits, *cellStateOutDecoder, numBatches, *outputGateEncoder);
479 if (layerNormEnabled)
481 outputGateInfo.SetQuantizationScale(inputInfo.GetQuantizationScale() *
482 m_OutputLayerNormWeightsTensor->GetTensorInfo().GetQuantizationScale() *
484 outputGateEncoder = MakeEncoder<float>(outputGateInfo, outputGateData.data());
488 outputGateDecoder = MakeDecoder<float>(outputGateInfo, outputGateData.data());
491 numBatches, *outputGateEncoder);
493 outputGateInfo.SetQuantizationScale(1.f / 4096);
494 outputGateEncoder = MakeEncoder<float>(outputGateInfo, outputGateData.data());
496 VectorBatchVectorAdd(*outputGateBiasDecoder, numUnits, *outputGateDecoder, numBatches, *outputGateEncoder);
498 outputGateDecoder = MakeDecoder<float>(outputGateInfo, outputGateData.data());
501 outputGateInfo.SetQuantizationScale(cellStateOutInfo.GetQuantizationScale());
502 outputGateEncoder = MakeEncoder<float>(outputGateInfo, outputGateData.data());
505 Activation(*outputGateDecoder, *outputGateEncoder,
506 TensorInfo({numUnits, numBatches}, internalType),
509 outputGateDecoder = MakeDecoder<float>(outputGateInfo, outputGateData.data());
512 Activation(*cellStateOutDecoder, *cellGateEncoder,
513 TensorInfo({numUnits, numBatches}, internalType),
522 if (m_ProjectionBiasTensor)
528 numBatches, *outputInt16Encoder);
530 CopyVector(*outputInt16Decoder, numBatches * outputSize, *outputEncoder);
540 CopyVector(*hiddenStateDecoder, numBatches * outputSize, *outputEncoder);
544 CopyVector(*outputDecoder, numBatches * outputSize, *outputStateOutEncoder);
void MeanStddevNormalization(armnn::Decoder< float > &input_vector, armnn::Encoder< float > &output_vector, uint32_t v_size, uint32_t n_batch, float normalization_epsilon)
void VectorBatchVectorAdd(armnn::Decoder< float > &vector, uint32_t vSize, armnn::Decoder< float > &batchVector, uint32_t nBatch, armnn::Encoder< float > &outResult)
const TensorShape & GetShape() const
void Execute() const override
void ClipVector(armnn::Decoder< float > &vector, uint32_t vSize, float absLimit, armnn::Encoder< float > &outResult)
bool m_PeepholeEnabled
Enable/disable peephole.
void Sub1Vector(armnn::Decoder< float > &vector, uint32_t vSize, armnn::Encoder< float > &result)
float m_HiddenStateScale
Hidden State quantization scale.
float m_OutputIntermediateScale
Output intermediate quantization scale.
void CopyVector(armnn::Decoder< float > &vector, uint32_t vSize, armnn::Encoder< float > &outResult)
std::unique_ptr< armnn::ScopedTensorHandle > AssignScopedTensorHandle(const armnn::ConstTensorHandle *ptr)
void VectorBatchVectorCwiseProductAccumulate(armnn::Decoder< float > &vector, uint32_t vSize, armnn::Decoder< float > &batchVector, uint32_t nBatch, armnn::Encoder< float > &outResult)
void ZeroVector(armnn::Encoder< float > &vector, uint32_t vSize)
Copyright (c) 2021 ARM Limited and Contributors.
void VectorVectorCwiseProduct(armnn::Decoder< float > &vector1, armnn::Decoder< float > &vector2, uint32_t vSize, armnn::Encoder< float > &outResult)
LayerDescriptor m_Parameters
void VectorBatchVectorCwiseProduct(armnn::Decoder< float > &vector, uint32_t vSize, armnn::Decoder< float > &batchVector, uint32_t nBatch, armnn::Encoder< float > &outResult)
std::vector< ITensorHandle * > m_Inputs
void MatrixBatchVectorMultiplyAccumulate(armnn::Decoder< float > &matrix, uint32_t mRows, uint32_t mCols, armnn::Decoder< float > &vector, uint32_t nBatch, armnn::Encoder< float > &outResult)
bool m_LayerNormEnabled
Enable/disable layer normalization.
RefQLstmWorkload(const QLstmQueueDescriptor &descriptor, const WorkloadInfo &info)
QLstmQueueDescriptor m_Data
float m_ProjectionClip
Clipping threshold value for the projection.
float m_InputIntermediateScale
Input intermediate quantization scale.
void VectorVectorCwiseProductAccumulate(armnn::Decoder< float > &vector1, armnn::Decoder< float > &vector2, uint32_t vSize, armnn::Encoder< float > &outResult)
void VectorBatchVectorAssign(armnn::Decoder< float > &vector, uint32_t vSize, uint32_t nBatch, armnn::Encoder< float > &outBatchVector)
float m_ForgetIntermediateScale
Forget intermediate quantization scale.
float m_CellClip
Clipping threshold value for the cell state.
void ExecuteAsync(ExecutionData &executionData) override
std::vector< ITensorHandle * > m_Outputs
bool m_ProjectionEnabled
Enable/disable the projection layer.
std::vector< ITensorHandle * > m_Outputs
Contains information about TensorInfos of a layer.
std::vector< ITensorHandle * > m_Inputs
float m_CellIntermediateScale
Cell intermediate quantization scale.
bool m_CifgEnabled
Enable/disable CIFG (coupled input & forget gate).
const TensorInfo & GetTensorInfo(const ITensorHandle *tensorHandle)
float32 helpers
int32_t m_HiddenStateZeroPoint
Hidden State zero point.