62 const TensorShape& inputShape = inputInfo.GetShape();
63 const TensorShape& outputStateInShape = outputStateInInfo.GetShape();
64 const TensorShape& cellStateInShape = cellStateInInfo.GetShape();
67 const uint32_t numBatches = inputShape[0];
68 const uint32_t inputSize = inputShape[1];
69 const uint32_t outputSize = outputStateInShape[1];
70 const uint32_t numUnits = cellStateInShape[1];
79 std::unique_ptr<Decoder<float>> inputDecoder =
81 std::unique_ptr<Decoder<float>> outputStateInDecoder =
83 std::unique_ptr<Decoder<float>> cellStateInDecoder =
87 std::unique_ptr<Decoder<float>> outputStateOutDecoder =
89 std::unique_ptr<Decoder<float>> cellStateOutDecoder =
91 std::unique_ptr<Decoder<float>> outputDecoder =
95 std::unique_ptr<Encoder<float>> outputStateOutEncoder =
97 std::unique_ptr<Encoder<float>> cellStateOutEncoder =
99 std::unique_ptr<Encoder<float>> outputEncoder =
103 std::unique_ptr<Decoder<float>> inputToForgetWeightsDecoder = MakeDecoder<float>(
104 m_InputToForgetWeightsTensor->GetTensorInfo(), m_InputToForgetWeightsTensor->GetTensor<
void>());
105 std::unique_ptr<Decoder<float>> inputToCellWeightsDecoder = MakeDecoder<float>(
106 m_InputToCellWeightsTensor->GetTensorInfo(), m_InputToCellWeightsTensor->GetTensor<
void>());
107 std::unique_ptr<Decoder<float>> inputToOutputWeightsDecoder = MakeDecoder<float>(
108 m_InputToOutputWeightsTensor->GetTensorInfo(), m_InputToOutputWeightsTensor->GetTensor<
void>());
110 std::unique_ptr<Decoder<float>> recurrentToForgetWeightsDecoder = MakeDecoder<float>(
111 m_RecurrentToForgetWeightsTensor->GetTensorInfo(), m_RecurrentToForgetWeightsTensor->GetTensor<
void>());
112 std::unique_ptr<Decoder<float>> recurrentToCellWeightsDecoder = MakeDecoder<float>(
113 m_RecurrentToCellWeightsTensor->GetTensorInfo(), m_RecurrentToCellWeightsTensor->GetTensor<
void>());
114 std::unique_ptr<Decoder<float>> recurrentToOutputWeightsDecoder = MakeDecoder<float>(
115 m_RecurrentToOutputWeightsTensor->GetTensorInfo(), m_RecurrentToOutputWeightsTensor->GetTensor<
void>());
118 std::unique_ptr<Decoder<float>> inputToInputWeightsDecoder;
119 std::unique_ptr<Decoder<float>> recurrentToInputWeightsDecoder;
120 std::unique_ptr<Decoder<float>> inputGateBiasDecoder;
123 std::unique_ptr<Decoder<float>> cellToInputWeightsDecoder;
124 std::unique_ptr<Decoder<float>> cellToForgetWeightsDecoder;
125 std::unique_ptr<Decoder<float>> cellToOutputWeightsDecoder;
128 std::unique_ptr<Decoder<float>> projectionWeightsDecoder;
129 std::unique_ptr<Decoder<float>> projectionBiasDecoder;
132 std::unique_ptr<Decoder<float>> inputLayerNormWeightsDecoder;
133 std::unique_ptr<Decoder<float>> forgetLayerNormWeightsDecoder;
134 std::unique_ptr<Decoder<float>> cellLayerNormWeightsDecoder;
135 std::unique_ptr<Decoder<float>> outputLayerNormWeightsDecoder;
138 std::unique_ptr<Decoder<float>> forgetGateBiasDecoder;
139 std::unique_ptr<Decoder<float>> cellGateBiasDecoder;
140 std::unique_ptr<Decoder<float>> outputGateBiasDecoder;
143 const uint32_t stateTensorSize = numBatches * numUnits;
144 std::vector<int16_t> inputGateData(stateTensorSize);
145 std::vector<int16_t> cellGateData(stateTensorSize);
146 std::vector<int16_t> forgetGateData(stateTensorSize);
147 std::vector<int16_t> outputGateData(stateTensorSize);
148 std::vector<int32_t> hiddenStateData(stateTensorSize);
164 std::unique_ptr<Decoder<float>> inputGateDecoder =
165 MakeDecoder<float>(inputGateInfo, inputGateData.data());
166 std::unique_ptr<Decoder<float>> cellGateDecoder =
167 MakeDecoder<float>(cellGateInfo, cellGateData.data());
168 std::unique_ptr<Decoder<float>> forgetGateDecoder =
169 MakeDecoder<float>(forgetGateInfo, forgetGateData.data());
170 std::unique_ptr<Decoder<float>> outputGateDecoder =
171 MakeDecoder<float>(outputGateInfo, outputGateData.data());
172 std::unique_ptr<Decoder<float>> hiddenStateDecoder =
173 MakeDecoder<float>(hiddenStateInfo, hiddenStateData.data());
175 std::unique_ptr<Encoder<float>> inputGateEncoder =
176 MakeEncoder<float>(inputGateInfo, inputGateData.data());
177 std::unique_ptr<Encoder<float>> cellGateEncoder =
178 MakeEncoder<float>(cellGateInfo, cellGateData.data());
179 std::unique_ptr<Encoder<float>> forgetGateEncoder =
180 MakeEncoder<float>(forgetGateInfo, forgetGateData.data());
181 std::unique_ptr<Encoder<float>> outputGateEncoder =
182 MakeEncoder<float>(outputGateInfo, outputGateData.data());
183 std::unique_ptr<Encoder<float>> hiddenStateEncoder =
184 MakeEncoder<float>(hiddenStateInfo, hiddenStateData.data());
189 inputToInputWeightsDecoder = MakeDecoder<float>(
190 m_InputToInputWeightsTensor->GetTensorInfo(), m_InputToInputWeightsTensor->GetTensor<
void>());
191 recurrentToInputWeightsDecoder = MakeDecoder<float>(
192 m_RecurrentToInputWeightsTensor->GetTensorInfo(), m_RecurrentToInputWeightsTensor->GetTensor<
void>());
199 cellToInputWeightsDecoder = MakeDecoder<float>(
200 m_CellToInputWeightsTensor->GetTensorInfo(), m_CellToInputWeightsTensor->GetTensor<
void>());
202 cellToForgetWeightsDecoder = MakeDecoder<float>(
203 m_CellToForgetWeightsTensor->GetTensorInfo(), m_CellToForgetWeightsTensor->GetTensor<
void>());
204 cellToOutputWeightsDecoder = MakeDecoder<float>(
205 m_CellToOutputWeightsTensor->GetTensorInfo(), m_CellToOutputWeightsTensor->GetTensor<
void>());
208 if (projectionEnabled)
210 projectionWeightsDecoder = MakeDecoder<float>(
211 m_ProjectionWeightsTensor->GetTensorInfo(), m_ProjectionWeightsTensor->GetTensor<
void>());
212 if (m_ProjectionBiasTensor)
214 projectionBiasDecoder = MakeDecoder<float>(
215 m_ProjectionBiasTensor->GetTensorInfo(), m_ProjectionBiasTensor->GetTensor<
void>());
219 if (layerNormEnabled)
223 inputLayerNormWeightsDecoder = MakeDecoder<float>(
224 m_InputLayerNormWeightsTensor->GetTensorInfo(), m_InputLayerNormWeightsTensor->GetTensor<
void>());
228 m_InputLayerNormWeightsTensor->GetTensorInfo().GetQuantizationScale() / 1024, 0);
229 inputGateBiasDecoder = MakeDecoder<float>(
230 inputGateBiasTensorInfo, m_InputGateBiasTensor->GetTensor<
void>());
233 forgetLayerNormWeightsDecoder = MakeDecoder<float>(
234 m_ForgetLayerNormWeightsTensor->GetTensorInfo(), m_ForgetLayerNormWeightsTensor->GetTensor<
void>());
235 cellLayerNormWeightsDecoder = MakeDecoder<float>(
236 m_CellLayerNormWeightsTensor->GetTensorInfo(), m_CellLayerNormWeightsTensor->GetTensor<
void>());
237 outputLayerNormWeightsDecoder = MakeDecoder<float>(
238 m_OutputLayerNormWeightsTensor->GetTensorInfo(), m_OutputLayerNormWeightsTensor->GetTensor<
void>());
242 m_ForgetLayerNormWeightsTensor->GetTensorInfo().GetQuantizationScale() / 1024, 0);
243 forgetGateBiasDecoder = MakeDecoder<float>(
244 forgetGateBiasTensorInfo, m_ForgetGateBiasTensor->GetTensor<
void>());
247 m_CellLayerNormWeightsTensor->GetTensorInfo().GetQuantizationScale() / 1024, 0);
248 cellGateBiasDecoder = MakeDecoder<float>(
249 cellGateBiasTensorInfo, m_CellBiasTensor->GetTensor<
void>());
252 m_OutputLayerNormWeightsTensor->GetTensorInfo().GetQuantizationScale() / 1024, 0);
253 outputGateBiasDecoder = MakeDecoder<float>(
254 outputGateBiasTensorInfo, m_OutputGateBiasTensor->GetTensor<
void>());
260 ZeroVector(*inputGateEncoder, stateTensorSize);
262 ZeroVector(*forgetGateEncoder, stateTensorSize);
263 ZeroVector(*cellGateEncoder, stateTensorSize);
264 ZeroVector(*outputGateEncoder, stateTensorSize);
265 ZeroVector(*hiddenStateEncoder, stateTensorSize);
271 numUnits, inputSize, *inputDecoder, numBatches, *inputGateEncoder);
275 numUnits, inputSize, *inputDecoder, numBatches, *forgetGateEncoder);
278 numUnits, inputSize, *inputDecoder, numBatches, *cellGateEncoder);
281 numUnits, inputSize, *inputDecoder, numBatches, *outputGateEncoder);
287 numUnits, outputSize, *outputStateInDecoder, numBatches, *inputGateEncoder);
291 numUnits, outputSize, *outputStateInDecoder, numBatches, *forgetGateEncoder);
294 numUnits, outputSize, *outputStateInDecoder, numBatches, *cellGateEncoder);
297 numUnits, outputSize, *outputStateInDecoder, numBatches, *outputGateEncoder);
305 numUnits, *cellStateInDecoder, numBatches, *inputGateEncoder);
308 if (layerNormEnabled)
310 inputGateInfo.SetQuantizationScale(inputInfo.GetQuantizationScale() *
311 m_InputLayerNormWeightsTensor->GetTensorInfo().GetQuantizationScale() *
313 inputGateEncoder = MakeEncoder<float>(inputGateInfo, inputGateData.data());
316 *inputGateEncoder, numUnits, numBatches, m_LayerNormEpsilon);
318 inputGateDecoder = MakeDecoder<float>(inputGateInfo, inputGateData.data());
321 numUnits, *inputGateDecoder, numBatches, *inputGateEncoder);
323 inputGateInfo.SetQuantizationScale(1.f / 4096);
324 inputGateEncoder = MakeEncoder<float>(inputGateInfo, inputGateData.data());
327 numUnits, *inputGateDecoder, numBatches, *inputGateEncoder);
329 inputGateDecoder = MakeDecoder<float>(inputGateInfo, inputGateData.data());
332 inputGateInfo.SetQuantizationScale(cellStateOutInfo.GetQuantizationScale());
333 inputGateEncoder = MakeEncoder<float>(inputGateInfo, inputGateData.data());
336 Activation(*inputGateDecoder, *inputGateEncoder,
337 TensorInfo({numUnits, numBatches}, internalType),
340 inputGateDecoder = MakeDecoder<float>(inputGateInfo, inputGateData.data());
347 *cellStateInDecoder, numBatches, *forgetGateEncoder);
350 if (layerNormEnabled)
353 forgetGateInfo.SetQuantizationScale(inputInfo.GetQuantizationScale() *
354 m_ForgetLayerNormWeightsTensor->GetTensorInfo().GetQuantizationScale() *
356 forgetGateEncoder = MakeEncoder<float>(forgetGateInfo, forgetGateData.data());
361 *forgetGateEncoder, numUnits, numBatches, m_LayerNormEpsilon);
364 forgetGateDecoder = MakeDecoder<float>(forgetGateInfo, forgetGateData.data());
367 numUnits, *forgetGateDecoder, numBatches, *forgetGateEncoder);
371 forgetGateInfo.SetQuantizationScale(1.f / 4096);
372 forgetGateEncoder = MakeEncoder<float>(forgetGateInfo, forgetGateData.data());
375 numUnits, *forgetGateDecoder, numBatches, *forgetGateEncoder);
378 forgetGateDecoder = MakeDecoder<float>(forgetGateInfo, forgetGateData.data());
381 forgetGateInfo.SetQuantizationScale(cellStateOutInfo.GetQuantizationScale());
382 forgetGateEncoder = MakeEncoder<float>(forgetGateInfo, forgetGateData.data());
385 Activation(*forgetGateDecoder, *forgetGateEncoder,
386 TensorInfo({numUnits, numBatches}, internalType),
389 forgetGateDecoder = MakeDecoder<float>(forgetGateInfo, forgetGateData.data());
392 if (layerNormEnabled)
394 cellGateInfo.SetQuantizationScale(inputInfo.GetQuantizationScale() *
395 m_CellLayerNormWeightsTensor->GetTensorInfo().GetQuantizationScale() *
397 cellGateEncoder = MakeEncoder<float>(cellGateInfo, cellGateData.data());
401 cellGateDecoder = MakeDecoder<float>(cellGateInfo, cellGateData.data());
404 numUnits, *cellGateDecoder, numBatches, *cellGateEncoder);
406 cellGateInfo.SetQuantizationScale(1.f / 4096);
407 cellGateEncoder = MakeEncoder<float>(cellGateInfo, cellGateData.data());
410 numUnits, *cellGateDecoder, numBatches, *cellGateEncoder);
412 cellGateDecoder = MakeDecoder<float>(cellGateInfo, cellGateData.data());
415 cellGateInfo.SetQuantizationScale(cellStateOutInfo.GetQuantizationScale());
416 cellGateEncoder = MakeEncoder<float>(cellGateInfo, cellGateData.data());
419 Activation(*cellGateDecoder, *cellGateEncoder,
420 TensorInfo({numUnits, numBatches}, internalType),
423 cellGateDecoder = MakeDecoder<float>(cellGateInfo, cellGateData.data());
429 Sub1Vector(*forgetGateDecoder, stateTensorSize, *forgetGateEncoder);
431 *cellGateDecoder, *forgetGateDecoder, stateTensorSize, *cellStateOutEncoder);
436 *cellGateDecoder, *inputGateDecoder, stateTensorSize, *cellStateOutEncoder);
449 numUnits, *cellStateOutDecoder, numBatches, *outputGateEncoder);
452 if (layerNormEnabled)
454 outputGateInfo.SetQuantizationScale(inputInfo.GetQuantizationScale() *
455 m_OutputLayerNormWeightsTensor->GetTensorInfo().GetQuantizationScale() *
457 outputGateEncoder = MakeEncoder<float>(outputGateInfo, outputGateData.data());
461 outputGateDecoder = MakeDecoder<float>(outputGateInfo, outputGateData.data());
464 numBatches, *outputGateEncoder);
466 outputGateInfo.SetQuantizationScale(1.f / 4096);
467 outputGateEncoder = MakeEncoder<float>(outputGateInfo, outputGateData.data());
469 VectorBatchVectorAdd(*outputGateBiasDecoder, numUnits, *outputGateDecoder, numBatches, *outputGateEncoder);
471 outputGateDecoder = MakeDecoder<float>(outputGateInfo, outputGateData.data());
474 outputGateInfo.SetQuantizationScale(cellStateOutInfo.GetQuantizationScale());
475 outputGateEncoder = MakeEncoder<float>(outputGateInfo, outputGateData.data());
478 Activation(*outputGateDecoder, *outputGateEncoder,
479 TensorInfo({numUnits, numBatches}, internalType),
482 outputGateDecoder = MakeDecoder<float>(outputGateInfo, outputGateData.data());
485 Activation(*cellStateOutDecoder, *cellGateEncoder,
486 TensorInfo({numUnits, numBatches}, internalType),
495 if (m_ProjectionBiasTensor)
498 outputSize, numBatches, *outputEncoder);
502 outputSize, numUnits, *hiddenStateDecoder, numBatches, *outputEncoder);
512 CopyVector(*hiddenStateDecoder, numBatches * outputSize, *outputEncoder);
516 CopyVector(*outputDecoder, numBatches * outputSize, *outputStateOutEncoder);
void MeanStddevNormalization(armnn::Decoder< float > &input_vector, armnn::Encoder< float > &output_vector, uint32_t v_size, uint32_t n_batch, float normalization_epsilon)
void VectorBatchVectorAdd(armnn::Decoder< float > &vector, uint32_t vSize, armnn::Decoder< float > &batchVector, uint32_t nBatch, armnn::Encoder< float > &outResult)
void ClipVector(armnn::Decoder< float > &vector, uint32_t vSize, float absLimit, armnn::Encoder< float > &outResult)
bool m_PeepholeEnabled
Enable/disable peephole.
void Sub1Vector(armnn::Decoder< float > &vector, uint32_t vSize, armnn::Encoder< float > &result)
float m_HiddenStateScale
Hidden State quantization scale.
const QLstmQueueDescriptor m_Data
float m_OutputIntermediateScale
Output intermediate quantization scale.
void CopyVector(armnn::Decoder< float > &vector, uint32_t vSize, armnn::Encoder< float > &outResult)
const TensorInfo & GetTensorInfo(const ITensorHandle *tensorHandle)
float32 helpers
void VectorBatchVectorCwiseProductAccumulate(armnn::Decoder< float > &vector, uint32_t vSize, armnn::Decoder< float > &batchVector, uint32_t nBatch, armnn::Encoder< float > &outResult)
void ZeroVector(armnn::Encoder< float > &vector, uint32_t vSize)
void VectorVectorCwiseProduct(armnn::Decoder< float > &vector1, armnn::Decoder< float > &vector2, uint32_t vSize, armnn::Encoder< float > &outResult)
LayerDescriptor m_Parameters
void VectorBatchVectorCwiseProduct(armnn::Decoder< float > &vector, uint32_t vSize, armnn::Decoder< float > &batchVector, uint32_t nBatch, armnn::Encoder< float > &outResult)
void MatrixBatchVectorMultiplyAccumulate(armnn::Decoder< float > &matrix, uint32_t mRows, uint32_t mCols, armnn::Decoder< float > &vector, uint32_t nBatch, armnn::Encoder< float > &outResult)
bool m_LayerNormEnabled
Enable/disable layer normalization.
float m_ProjectionClip
Clipping threshold value for the projection.
float m_InputIntermediateScale
Input intermediate quantization scale.
void VectorVectorCwiseProductAccumulate(armnn::Decoder< float > &vector1, armnn::Decoder< float > &vector2, uint32_t vSize, armnn::Encoder< float > &outResult)
void VectorBatchVectorAssign(armnn::Decoder< float > &vector, uint32_t vSize, uint32_t nBatch, armnn::Encoder< float > &outBatchVector)
float m_ForgetIntermediateScale
Forget intermediate quantization scale.
float m_CellClip
Clipping threshold value for the cell state.
std::vector< ITensorHandle * > m_Outputs
bool m_ProjectionEnabled
Enable/disable the projection layer.
std::vector< ITensorHandle * > m_Inputs
float m_CellIntermediateScale
Cell intermediate quantization scale.
bool m_CifgEnabled
Enable/disable CIFG (coupled input & forget gate).
int32_t m_HiddenStateZeroPoint
Hidden State zero point.