diff options
Diffstat (limited to 'src/backends/reference')
-rw-r--r-- | src/backends/reference/test/RefLayerTests.cpp | 2 | ||||
-rw-r--r-- | src/backends/reference/workloads/RefQLstmWorkload.cpp | 20 |
2 files changed, 18 insertions, 4 deletions
diff --git a/src/backends/reference/test/RefLayerTests.cpp b/src/backends/reference/test/RefLayerTests.cpp index eb2d0c55d9..09096b4646 100644 --- a/src/backends/reference/test/RefLayerTests.cpp +++ b/src/backends/reference/test/RefLayerTests.cpp @@ -1283,6 +1283,8 @@ ARMNN_AUTO_TEST_CASE(LstmLayerInt16NoCifgNoPeepholeNoProjectionInt16Constant, // QLstm ARMNN_AUTO_TEST_CASE(QLstm, QLstmTest) +ARMNN_AUTO_TEST_CASE(QLstm1, QLstmTest1) +ARMNN_AUTO_TEST_CASE(QLstm2, QLstmTest2) // Convert from BFloat16 to Float32 ARMNN_AUTO_TEST_CASE(ConvertBf16ToFp32, ConvertBf16ToFp32Test) diff --git a/src/backends/reference/workloads/RefQLstmWorkload.cpp b/src/backends/reference/workloads/RefQLstmWorkload.cpp index 34d048b0cb..e11ea55add 100644 --- a/src/backends/reference/workloads/RefQLstmWorkload.cpp +++ b/src/backends/reference/workloads/RefQLstmWorkload.cpp @@ -146,6 +146,7 @@ void RefQLstmWorkload::Execute() const std::vector<int16_t> forgetGateData(stateTensorSize); std::vector<int16_t> outputGateData(stateTensorSize); std::vector<int32_t> hiddenStateData(stateTensorSize); + std::vector<int16_t> outputInt16Data(numBatches * outputSize); armnn::TensorInfo inputGateInfo( {numBatches , numUnits}, armnn::DataType::QSymmS16, m_Data.m_Parameters.m_InputIntermediateScale, 0); @@ -159,6 +160,10 @@ void RefQLstmWorkload::Execute() const armnn::DataType::QAsymmS8, m_Data.m_Parameters.m_HiddenStateScale, m_Data.m_Parameters.m_HiddenStateZeroPoint); + armnn::TensorInfo outputInt16Info({numBatches , outputSize}, + armnn::DataType::QSymmS16, + outputInfo.GetQuantizationScale(), + outputInfo.GetQuantizationOffset()); // Decoders/Encoders for internal states std::unique_ptr<Decoder<float>> inputGateDecoder = @@ -183,6 +188,12 @@ void RefQLstmWorkload::Execute() const std::unique_ptr<Encoder<float>> hiddenStateEncoder = MakeEncoder<float>(hiddenStateInfo, hiddenStateData.data()); + // Int16 used to accumulate output to prevent overflowing (after Projection MatMul) + std::unique_ptr<Decoder<float>> outputInt16Decoder = + MakeDecoder<float>(outputInt16Info, outputInt16Data.data()); + std::unique_ptr<Encoder<float>> outputInt16Encoder = + MakeEncoder<float>(outputInt16Info, outputInt16Data.data()); + // Create decoders for optional params if they are enabled if (!cifgEnabled) { @@ -494,12 +505,13 @@ void RefQLstmWorkload::Execute() const { if (m_ProjectionBiasTensor) { - VectorBatchVectorAssign(*projectionBiasDecoder, - outputSize, numBatches, *outputEncoder); + VectorBatchVectorAssign(*projectionBiasDecoder, outputSize, numBatches, *outputInt16Encoder); } - MatrixBatchVectorMultiplyAccumulate(*projectionWeightsDecoder, - outputSize, numUnits, *hiddenStateDecoder, numBatches, *outputEncoder); + MatrixBatchVectorMultiplyAccumulate(*projectionWeightsDecoder, outputSize, numUnits, *hiddenStateDecoder, + numBatches, *outputInt16Encoder); + + CopyVector(*outputInt16Decoder, numBatches * outputSize, *outputEncoder); if (m_Data.m_Parameters.m_ProjectionClip > 0.0) { |