78 const uint32_t numBatches = inputShape[0];
79 const uint32_t inputSize = inputShape[1];
80 const uint32_t outputSize = outputStateInShape[1];
81 const uint32_t numUnits = cellStateInShape[1];
90 std::unique_ptr<Decoder<float>> inputDecoder =
91 MakeDecoder<float>(inputInfo, inputs[0]->Map());
92 std::unique_ptr<Decoder<float>> outputStateInDecoder =
93 MakeDecoder<float>(outputStateInInfo, inputs[1]->Map());
94 std::unique_ptr<Decoder<float>> cellStateInDecoder =
95 MakeDecoder<float>(cellStateInInfo, inputs[2]->Map());
98 std::unique_ptr<Decoder<float>> outputStateOutDecoder =
99 MakeDecoder<float>(outputStateOutInfo, outputs[0]->Map());
100 std::unique_ptr<Decoder<float>> cellStateOutDecoder =
101 MakeDecoder<float>(cellStateOutInfo, outputs[1]->Map());
102 std::unique_ptr<Decoder<float>> outputDecoder =
103 MakeDecoder<float>(outputInfo, outputs[2]->Map());
106 std::unique_ptr<Encoder<float>> outputStateOutEncoder =
107 MakeEncoder<float>(outputStateOutInfo, outputs[0]->Map());
108 std::unique_ptr<Encoder<float>> cellStateOutEncoder =
109 MakeEncoder<float>(cellStateOutInfo, outputs[1]->Map());
110 std::unique_ptr<Encoder<float>> outputEncoder =
111 MakeEncoder<float>(outputInfo, outputs[2]->Map());
114 std::unique_ptr<Decoder<float>> inputToForgetWeightsDecoder = MakeDecoder<float>(
115 m_InputToForgetWeightsTensor->GetTensorInfo(), m_InputToForgetWeightsTensor->GetConstTensor<
void>());
116 std::unique_ptr<Decoder<float>> inputToCellWeightsDecoder = MakeDecoder<float>(
117 m_InputToCellWeightsTensor->GetTensorInfo(), m_InputToCellWeightsTensor->GetConstTensor<
void>());
118 std::unique_ptr<Decoder<float>> inputToOutputWeightsDecoder = MakeDecoder<float>(
119 m_InputToOutputWeightsTensor->GetTensorInfo(), m_InputToOutputWeightsTensor->GetConstTensor<
void>());
121 std::unique_ptr<Decoder<float>> recurrentToForgetWeightsDecoder = MakeDecoder<float>(
122 m_RecurrentToForgetWeightsTensor->GetTensorInfo(),
123 m_RecurrentToForgetWeightsTensor->GetConstTensor<
void>());
124 std::unique_ptr<Decoder<float>> recurrentToCellWeightsDecoder = MakeDecoder<float>(
125 m_RecurrentToCellWeightsTensor->GetTensorInfo(), m_RecurrentToCellWeightsTensor->GetConstTensor<
void>());
126 std::unique_ptr<Decoder<float>> recurrentToOutputWeightsDecoder = MakeDecoder<float>(
127 m_RecurrentToOutputWeightsTensor->GetTensorInfo(),
128 m_RecurrentToOutputWeightsTensor->GetConstTensor<
void>());
131 std::unique_ptr<Decoder<float>> inputToInputWeightsDecoder;
132 std::unique_ptr<Decoder<float>> recurrentToInputWeightsDecoder;
133 std::unique_ptr<Decoder<float>> inputGateBiasDecoder;
136 std::unique_ptr<Decoder<float>> cellToInputWeightsDecoder;
137 std::unique_ptr<Decoder<float>> cellToForgetWeightsDecoder;
138 std::unique_ptr<Decoder<float>> cellToOutputWeightsDecoder;
141 std::unique_ptr<Decoder<float>> projectionWeightsDecoder;
142 std::unique_ptr<Decoder<float>> projectionBiasDecoder;
145 std::unique_ptr<Decoder<float>> inputLayerNormWeightsDecoder;
146 std::unique_ptr<Decoder<float>> forgetLayerNormWeightsDecoder;
147 std::unique_ptr<Decoder<float>> cellLayerNormWeightsDecoder;
148 std::unique_ptr<Decoder<float>> outputLayerNormWeightsDecoder;
151 std::unique_ptr<Decoder<float>> forgetGateBiasDecoder;
152 std::unique_ptr<Decoder<float>> cellGateBiasDecoder;
153 std::unique_ptr<Decoder<float>> outputGateBiasDecoder;
156 const uint32_t stateTensorSize = numBatches * numUnits;
157 std::vector<int16_t> inputGateData(stateTensorSize);
158 std::vector<int16_t> cellGateData(stateTensorSize);
159 std::vector<int16_t> forgetGateData(stateTensorSize);
160 std::vector<int16_t> outputGateData(stateTensorSize);
161 std::vector<int32_t> hiddenStateData(stateTensorSize);
162 std::vector<int16_t> outputInt16Data(numBatches * outputSize);
178 outputInfo.GetQuantizationScale(),
179 outputInfo.GetQuantizationOffset());
182 std::unique_ptr<Decoder<float>> inputGateDecoder =
183 MakeDecoder<float>(inputGateInfo, inputGateData.data());
184 std::unique_ptr<Decoder<float>> cellGateDecoder =
185 MakeDecoder<float>(cellGateInfo, cellGateData.data());
186 std::unique_ptr<Decoder<float>> forgetGateDecoder =
187 MakeDecoder<float>(forgetGateInfo, forgetGateData.data());
188 std::unique_ptr<Decoder<float>> outputGateDecoder =
189 MakeDecoder<float>(outputGateInfo, outputGateData.data());
190 std::unique_ptr<Decoder<float>> hiddenStateDecoder =
191 MakeDecoder<float>(hiddenStateInfo, hiddenStateData.data());
193 std::unique_ptr<Encoder<float>> inputGateEncoder =
194 MakeEncoder<float>(inputGateInfo, inputGateData.data());
195 std::unique_ptr<Encoder<float>> cellGateEncoder =
196 MakeEncoder<float>(cellGateInfo, cellGateData.data());
197 std::unique_ptr<Encoder<float>> forgetGateEncoder =
198 MakeEncoder<float>(forgetGateInfo, forgetGateData.data());
199 std::unique_ptr<Encoder<float>> outputGateEncoder =
200 MakeEncoder<float>(outputGateInfo, outputGateData.data());
201 std::unique_ptr<Encoder<float>> hiddenStateEncoder =
202 MakeEncoder<float>(hiddenStateInfo, hiddenStateData.data());
205 std::unique_ptr<Decoder<float>> outputInt16Decoder =
206 MakeDecoder<float>(outputInt16Info, outputInt16Data.data());
207 std::unique_ptr<Encoder<float>> outputInt16Encoder =
208 MakeEncoder<float>(outputInt16Info, outputInt16Data.data());
213 inputToInputWeightsDecoder = MakeDecoder<float>(
214 m_InputToInputWeightsTensor->GetTensorInfo(), m_InputToInputWeightsTensor->GetConstTensor<
void>());
215 recurrentToInputWeightsDecoder = MakeDecoder<float>(m_RecurrentToInputWeightsTensor->GetTensorInfo(),
216 m_RecurrentToInputWeightsTensor->GetConstTensor<
void>());
223 cellToInputWeightsDecoder = MakeDecoder<float>(
224 m_CellToInputWeightsTensor->GetTensorInfo(), m_CellToInputWeightsTensor->GetConstTensor<
void>());
226 cellToForgetWeightsDecoder = MakeDecoder<float>(
227 m_CellToForgetWeightsTensor->GetTensorInfo(), m_CellToForgetWeightsTensor->GetConstTensor<
void>());
228 cellToOutputWeightsDecoder = MakeDecoder<float>(
229 m_CellToOutputWeightsTensor->GetTensorInfo(), m_CellToOutputWeightsTensor->GetConstTensor<
void>());
232 if (projectionEnabled)
234 projectionWeightsDecoder = MakeDecoder<float>(
235 m_ProjectionWeightsTensor->GetTensorInfo(), m_ProjectionWeightsTensor->GetConstTensor<
void>());
236 if (m_ProjectionBiasTensor)
238 projectionBiasDecoder = MakeDecoder<float>(
239 m_ProjectionBiasTensor->GetTensorInfo(), m_ProjectionBiasTensor->GetConstTensor<
void>());
243 if (layerNormEnabled)
247 inputLayerNormWeightsDecoder = MakeDecoder<float>(m_InputLayerNormWeightsTensor->GetTensorInfo(),
248 m_InputLayerNormWeightsTensor->GetConstTensor<
void>());
252 m_InputLayerNormWeightsTensor->GetTensorInfo().GetQuantizationScale() / 1024, 0);
253 inputGateBiasDecoder = MakeDecoder<float>(
254 inputGateBiasTensorInfo, m_InputGateBiasTensor->GetConstTensor<
void>());
257 forgetLayerNormWeightsDecoder = MakeDecoder<float>(
258 m_ForgetLayerNormWeightsTensor->GetTensorInfo(),
259 m_ForgetLayerNormWeightsTensor->GetConstTensor<
void>());
260 cellLayerNormWeightsDecoder = MakeDecoder<float>(
261 m_CellLayerNormWeightsTensor->GetTensorInfo(), m_CellLayerNormWeightsTensor->GetConstTensor<
void>());
262 outputLayerNormWeightsDecoder = MakeDecoder<float>(
263 m_OutputLayerNormWeightsTensor->GetTensorInfo(),
264 m_OutputLayerNormWeightsTensor->GetConstTensor<
void>());
268 m_ForgetLayerNormWeightsTensor->GetTensorInfo().GetQuantizationScale() / 1024, 0);
269 forgetGateBiasDecoder = MakeDecoder<float>(
270 forgetGateBiasTensorInfo, m_ForgetGateBiasTensor->GetConstTensor<
void>());
273 m_CellLayerNormWeightsTensor->GetTensorInfo().GetQuantizationScale() / 1024, 0);
274 cellGateBiasDecoder = MakeDecoder<float>(
275 cellGateBiasTensorInfo, m_CellBiasTensor->GetConstTensor<
void>());
278 m_OutputLayerNormWeightsTensor->GetTensorInfo().GetQuantizationScale() / 1024, 0);
279 outputGateBiasDecoder = MakeDecoder<float>(
280 outputGateBiasTensorInfo, m_OutputGateBiasTensor->GetConstTensor<
void>());
286 ZeroVector(*inputGateEncoder, stateTensorSize);
288 ZeroVector(*forgetGateEncoder, stateTensorSize);
289 ZeroVector(*cellGateEncoder, stateTensorSize);
290 ZeroVector(*outputGateEncoder, stateTensorSize);
291 ZeroVector(*hiddenStateEncoder, stateTensorSize);
297 numUnits, inputSize, *inputDecoder, numBatches, *inputGateEncoder);
301 numUnits, inputSize, *inputDecoder, numBatches, *forgetGateEncoder);
304 numUnits, inputSize, *inputDecoder, numBatches, *cellGateEncoder);
307 numUnits, inputSize, *inputDecoder, numBatches, *outputGateEncoder);
313 numUnits, outputSize, *outputStateInDecoder, numBatches, *inputGateEncoder);
317 numUnits, outputSize, *outputStateInDecoder, numBatches, *forgetGateEncoder);
320 numUnits, outputSize, *outputStateInDecoder, numBatches, *cellGateEncoder);
323 numUnits, outputSize, *outputStateInDecoder, numBatches, *outputGateEncoder);
331 numUnits, *cellStateInDecoder, numBatches, *inputGateEncoder);
334 if (layerNormEnabled)
336 inputGateInfo.SetQuantizationScale(inputInfo.GetQuantizationScale() *
337 m_InputLayerNormWeightsTensor->GetTensorInfo().GetQuantizationScale() *
339 inputGateEncoder = MakeEncoder<float>(inputGateInfo, inputGateData.data());
342 *inputGateEncoder, numUnits, numBatches, m_LayerNormEpsilon);
344 inputGateDecoder = MakeDecoder<float>(inputGateInfo, inputGateData.data());
347 numUnits, *inputGateDecoder, numBatches, *inputGateEncoder);
349 inputGateInfo.SetQuantizationScale(1.f / 4096);
350 inputGateEncoder = MakeEncoder<float>(inputGateInfo, inputGateData.data());
353 numUnits, *inputGateDecoder, numBatches, *inputGateEncoder);
355 inputGateDecoder = MakeDecoder<float>(inputGateInfo, inputGateData.data());
358 inputGateInfo.SetQuantizationScale(cellStateOutInfo.GetQuantizationScale());
359 inputGateEncoder = MakeEncoder<float>(inputGateInfo, inputGateData.data());
362 Activation(*inputGateDecoder, *inputGateEncoder,
363 TensorInfo({numUnits, numBatches}, internalType),
366 inputGateDecoder = MakeDecoder<float>(inputGateInfo, inputGateData.data());
373 *cellStateInDecoder, numBatches, *forgetGateEncoder);
376 if (layerNormEnabled)
379 forgetGateInfo.SetQuantizationScale(inputInfo.GetQuantizationScale() *
380 m_ForgetLayerNormWeightsTensor->GetTensorInfo().GetQuantizationScale() *
382 forgetGateEncoder = MakeEncoder<float>(forgetGateInfo, forgetGateData.data());
387 *forgetGateEncoder, numUnits, numBatches, m_LayerNormEpsilon);
390 forgetGateDecoder = MakeDecoder<float>(forgetGateInfo, forgetGateData.data());
393 numUnits, *forgetGateDecoder, numBatches, *forgetGateEncoder);
397 forgetGateInfo.SetQuantizationScale(1.f / 4096);
398 forgetGateEncoder = MakeEncoder<float>(forgetGateInfo, forgetGateData.data());
401 numUnits, *forgetGateDecoder, numBatches, *forgetGateEncoder);
404 forgetGateDecoder = MakeDecoder<float>(forgetGateInfo, forgetGateData.data());
407 forgetGateInfo.SetQuantizationScale(cellStateOutInfo.GetQuantizationScale());
408 forgetGateEncoder = MakeEncoder<float>(forgetGateInfo, forgetGateData.data());
411 Activation(*forgetGateDecoder, *forgetGateEncoder,
412 TensorInfo({numUnits, numBatches}, internalType),
415 forgetGateDecoder = MakeDecoder<float>(forgetGateInfo, forgetGateData.data());
418 if (layerNormEnabled)
420 cellGateInfo.SetQuantizationScale(inputInfo.GetQuantizationScale() *
421 m_CellLayerNormWeightsTensor->GetTensorInfo().GetQuantizationScale() *
423 cellGateEncoder = MakeEncoder<float>(cellGateInfo, cellGateData.data());
427 cellGateDecoder = MakeDecoder<float>(cellGateInfo, cellGateData.data());
430 numUnits, *cellGateDecoder, numBatches, *cellGateEncoder);
432 cellGateInfo.SetQuantizationScale(1.f / 4096);
433 cellGateEncoder = MakeEncoder<float>(cellGateInfo, cellGateData.data());
436 numUnits, *cellGateDecoder, numBatches, *cellGateEncoder);
438 cellGateDecoder = MakeDecoder<float>(cellGateInfo, cellGateData.data());
441 cellGateInfo.SetQuantizationScale(cellStateOutInfo.GetQuantizationScale());
442 cellGateEncoder = MakeEncoder<float>(cellGateInfo, cellGateData.data());
445 Activation(*cellGateDecoder, *cellGateEncoder,
446 TensorInfo({numUnits, numBatches}, internalType),
449 cellGateDecoder = MakeDecoder<float>(cellGateInfo, cellGateData.data());
455 Sub1Vector(*forgetGateDecoder, stateTensorSize, *forgetGateEncoder);
457 *cellGateDecoder, *forgetGateDecoder, stateTensorSize, *cellStateOutEncoder);
462 *cellGateDecoder, *inputGateDecoder, stateTensorSize, *cellStateOutEncoder);
475 numUnits, *cellStateOutDecoder, numBatches, *outputGateEncoder);
478 if (layerNormEnabled)
480 outputGateInfo.SetQuantizationScale(inputInfo.GetQuantizationScale() *
481 m_OutputLayerNormWeightsTensor->GetTensorInfo().GetQuantizationScale() *
483 outputGateEncoder = MakeEncoder<float>(outputGateInfo, outputGateData.data());
487 outputGateDecoder = MakeDecoder<float>(outputGateInfo, outputGateData.data());
490 numBatches, *outputGateEncoder);
492 outputGateInfo.SetQuantizationScale(1.f / 4096);
493 outputGateEncoder = MakeEncoder<float>(outputGateInfo, outputGateData.data());
495 VectorBatchVectorAdd(*outputGateBiasDecoder, numUnits, *outputGateDecoder, numBatches, *outputGateEncoder);
497 outputGateDecoder = MakeDecoder<float>(outputGateInfo, outputGateData.data());
500 outputGateInfo.SetQuantizationScale(cellStateOutInfo.GetQuantizationScale());
501 outputGateEncoder = MakeEncoder<float>(outputGateInfo, outputGateData.data());
504 Activation(*outputGateDecoder, *outputGateEncoder,
505 TensorInfo({numUnits, numBatches}, internalType),
508 outputGateDecoder = MakeDecoder<float>(outputGateInfo, outputGateData.data());
511 Activation(*cellStateOutDecoder, *cellGateEncoder,
512 TensorInfo({numUnits, numBatches}, internalType),
521 if (m_ProjectionBiasTensor)
527 numBatches, *outputInt16Encoder);
529 CopyVector(*outputInt16Decoder, numBatches * outputSize, *outputEncoder);
539 CopyVector(*hiddenStateDecoder, numBatches * outputSize, *outputEncoder);
543 CopyVector(*outputDecoder, numBatches * outputSize, *outputStateOutEncoder);
void MeanStddevNormalization(armnn::Decoder< float > &input_vector, armnn::Encoder< float > &output_vector, uint32_t v_size, uint32_t n_batch, float normalization_epsilon)
void VectorBatchVectorAdd(armnn::Decoder< float > &vector, uint32_t vSize, armnn::Decoder< float > &batchVector, uint32_t nBatch, armnn::Encoder< float > &outResult)
const TensorShape & GetShape() const
void Execute() const override
void ClipVector(armnn::Decoder< float > &vector, uint32_t vSize, float absLimit, armnn::Encoder< float > &outResult)
bool m_PeepholeEnabled
Enable/disable peephole.
void Sub1Vector(armnn::Decoder< float > &vector, uint32_t vSize, armnn::Encoder< float > &result)
float m_HiddenStateScale
Hidden State quantization scale.
float m_OutputIntermediateScale
Output intermediate quantization scale.
void CopyVector(armnn::Decoder< float > &vector, uint32_t vSize, armnn::Encoder< float > &outResult)
std::unique_ptr< armnn::ScopedTensorHandle > AssignScopedTensorHandle(const armnn::ConstTensorHandle *ptr)
void VectorBatchVectorCwiseProductAccumulate(armnn::Decoder< float > &vector, uint32_t vSize, armnn::Decoder< float > &batchVector, uint32_t nBatch, armnn::Encoder< float > &outResult)
void ZeroVector(armnn::Encoder< float > &vector, uint32_t vSize)
Copyright (c) 2021 ARM Limited and Contributors.
void VectorVectorCwiseProduct(armnn::Decoder< float > &vector1, armnn::Decoder< float > &vector2, uint32_t vSize, armnn::Encoder< float > &outResult)
LayerDescriptor m_Parameters
void VectorBatchVectorCwiseProduct(armnn::Decoder< float > &vector, uint32_t vSize, armnn::Decoder< float > &batchVector, uint32_t nBatch, armnn::Encoder< float > &outResult)
void ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) override
std::vector< ITensorHandle * > m_Inputs
void MatrixBatchVectorMultiplyAccumulate(armnn::Decoder< float > &matrix, uint32_t mRows, uint32_t mCols, armnn::Decoder< float > &vector, uint32_t nBatch, armnn::Encoder< float > &outResult)
bool m_LayerNormEnabled
Enable/disable layer normalization.
RefQLstmWorkload(const QLstmQueueDescriptor &descriptor, const WorkloadInfo &info)
QLstmQueueDescriptor m_Data
float m_ProjectionClip
Clipping threshold value for the projection.
float m_InputIntermediateScale
Input intermediate quantization scale.
void VectorVectorCwiseProductAccumulate(armnn::Decoder< float > &vector1, armnn::Decoder< float > &vector2, uint32_t vSize, armnn::Encoder< float > &outResult)
void VectorBatchVectorAssign(armnn::Decoder< float > &vector, uint32_t vSize, uint32_t nBatch, armnn::Encoder< float > &outBatchVector)
float m_ForgetIntermediateScale
Forget intermediate quantization scale.
float m_CellClip
Clipping threshold value for the cell state.
std::vector< ITensorHandle * > m_Outputs
bool m_ProjectionEnabled
Enable/disable the projection layer.
std::vector< ITensorHandle * > m_Outputs
Contains information about TensorInfos of a layer.
std::vector< ITensorHandle * > m_Inputs
const TensorInfo & GetTensorInfo(const ITensorHandle *tensorHandle)
float32 helpers
float m_CellIntermediateScale
Cell intermediate quantization scale.
bool m_CifgEnabled
Enable/disable CIFG (coupled input & forget gate).
int32_t m_HiddenStateZeroPoint
Hidden State zero point.