aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJames Conroy <james.conroy@arm.com>2020-06-08 14:53:10 +0100
committerJames Conroy <james.conroy@arm.com>2020-06-08 15:15:42 +0100
commitb22a75e2aaec1175bbacba54e1a33a83f9749ce2 (patch)
treee25c0ba8652435a14825eb29a7847a421efadb84 /src
parent041b3c0d416d94b1aa4efa41960808b7e5e61f25 (diff)
downloadarmnn-b22a75e2aaec1175bbacba54e1a33a83f9749ce2.tar.gz
IVGCVSW-4860 Add tests to verify QLstm projection
* Adds int16 output tensor to CpuRef impl to prevent overflow when accumulating output after projection. * Adds two remaining tests to verify QLstm on CpuRef. Signed-off-by: James Conroy <james.conroy@arm.com> Change-Id: I93d7c64c4a9cc1012cb2bc052d598d4279fbd372
Diffstat (limited to 'src')
-rw-r--r--src/backends/backendsCommon/test/layerTests/LstmTestImpl.cpp554
-rw-r--r--src/backends/backendsCommon/test/layerTests/LstmTestImpl.hpp8
-rw-r--r--src/backends/reference/test/RefLayerTests.cpp2
-rw-r--r--src/backends/reference/workloads/RefQLstmWorkload.cpp20
4 files changed, 579 insertions, 5 deletions
diff --git a/src/backends/backendsCommon/test/layerTests/LstmTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/LstmTestImpl.cpp
index c7f902ab04..08ee440b8e 100644
--- a/src/backends/backendsCommon/test/layerTests/LstmTestImpl.cpp
+++ b/src/backends/backendsCommon/test/layerTests/LstmTestImpl.cpp
@@ -1733,7 +1733,7 @@ LayerTestResult<uint8_t, 2> QuantizedLstmTestImpl(
return ret;
}
-// QLSTM
+// QLSTM: CIFG, LayerNorm
LayerTestResult<int8_t, 2> QLstmTestImpl(
armnn::IWorkloadFactory& workloadFactory,
const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
@@ -1969,6 +1969,528 @@ LayerTestResult<int8_t, 2> QLstmTestImpl(
return ret;
}
+// QLSTM: Projection, LayerNorm
+LayerTestResult<int8_t, 2> QLstmTestImpl1(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const boost::multi_array<int8_t, 2>& input,
+ const boost::multi_array<int8_t, 2>& outputExpected)
+{
+ IgnoreUnused(memoryManager);
+ unsigned int numBatches = 2;
+ unsigned int inputSize = 5;
+ unsigned int outputSize = 3;
+ unsigned int numUnits = 4;
+
+ bool cifgEnabled = false;
+ bool peepholeEnabled = false;
+ bool projectionEnabled = true;
+ bool layerNormEnabled = true;
+
+ // Scale/Offset quantization info
+ float inputScale = 0.0078125f;
+ int32_t inputOffset = 0;
+
+ int32_t hiddenStateZeroPoint = 0;
+ float hiddenStateScale = 0.007f;
+
+ // if (!projectionEnabled) outputScale == hiddenStateScale
+ float outputScale = 3.05176e-05f;
+ int32_t outputOffset = 0;
+
+ float cellStateScale = 3.05176e-05f;
+ int32_t cellStateOffset = 0;
+
+ float weightsScale = 0.00784314f;
+ int32_t weightsOffset = 0;
+
+ float layerNormScale = 3.05182e-05f;
+ int32_t layerNormOffset = 0;
+
+ float biasScale = layerNormScale / 1024;
+ int32_t biasOffset = 0;
+
+ float projectionWeightsScale = 0.00392157f;
+
+ float inputIntermediateScale = 0.007059f;
+ float forgetIntermediateScale = 0.007812f;
+ float cellIntermediateScale = inputIntermediateScale;
+ float outputIntermediateScale = forgetIntermediateScale;
+
+ float cellClip = 0.0f;
+ float projectionClip = 0.0f;
+
+ // Input/Output tensor info
+ armnn::TensorInfo inputInfo({numBatches , inputSize},
+ armnn::DataType::QAsymmS8,
+ inputScale,
+ inputOffset);
+
+ armnn::TensorInfo cellStateInfo({numBatches , numUnits},
+ armnn::DataType::QSymmS16,
+ cellStateScale,
+ cellStateOffset);
+
+ armnn::TensorInfo outputStateInfo({numBatches , outputSize},
+ armnn::DataType::QAsymmS8,
+ outputScale,
+ outputOffset);
+
+ LayerTestResult<int8_t, 2> ret(outputStateInfo);
+
+ // Input tensors
+ std::vector<int8_t> inputVector;
+ inputVector.assign(input.data(), input.data() + (numBatches * inputSize));
+ auto inputTensor = MakeTensor<int8_t, 2>(inputInfo, inputVector);
+
+ std::vector<int16_t> cellStateInVector = {0, 0, 0, 0, 0, 0, 0, 0};
+ auto cellStateInTensor = MakeTensor<int16_t, 2>(cellStateInfo, cellStateInVector);
+
+ std::vector<int8_t> outputStateInVector = {0, 0, 0, 0, 0, 0};
+ auto outputStateInTensor = MakeTensor<int8_t, 2>(outputStateInfo, outputStateInVector);
+
+ // Output tensors
+ std::vector<int16_t> cellStateOutVector = {-14650, 8939, 5771, 6715, -11843, 7847, 1508, 12939};
+ auto cellStateOutTensor = MakeTensor<int16_t, 2>(cellStateInfo, cellStateOutVector);
+
+ std::vector<int8_t> outputVector;
+ outputVector.assign(outputExpected.data(), outputExpected.data() + (numBatches * outputSize));
+ ret.outputExpected = MakeTensor<int8_t, 2>(outputStateInfo, outputVector);
+
+ // Create tensor handles
+ std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputInfo);
+ std::unique_ptr<armnn::ITensorHandle> cellStateInHandle =
+ workloadFactory.CreateTensorHandle(cellStateInfo);
+ std::unique_ptr<armnn::ITensorHandle> outputStateInHandle =
+ workloadFactory.CreateTensorHandle(outputStateInfo);
+
+ std::unique_ptr<armnn::ITensorHandle> outputStateOutHandle = workloadFactory.CreateTensorHandle(outputStateInfo);
+ std::unique_ptr<armnn::ITensorHandle> cellStateOutHandle =
+ workloadFactory.CreateTensorHandle(cellStateInfo);
+ std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputStateInfo);
+
+ armnn::QLstmQueueDescriptor data;
+ armnn::WorkloadInfo info;
+
+ // Add inputs and outputs to workload
+ AddInputToWorkload(data, info, inputInfo, inputHandle.get());
+ AddInputToWorkload(data, info, outputStateInfo, outputStateInHandle.get());
+ AddInputToWorkload(data, info, cellStateInfo, cellStateInHandle.get());
+
+ AddOutputToWorkload(data, info, outputStateInfo, outputStateOutHandle.get());
+ AddOutputToWorkload(data, info, cellStateInfo, cellStateOutHandle.get());
+ AddOutputToWorkload(data, info, outputStateInfo, outputHandle.get());
+
+ // Weights and bias tensor and quantization info
+ armnn::TensorInfo inputWeightsInfo({numUnits, inputSize},
+ armnn::DataType::QSymmS8,
+ weightsScale,
+ weightsOffset);
+
+ armnn::TensorInfo recurrentWeightsInfo({numUnits, outputSize},
+ armnn::DataType::QSymmS8,
+ weightsScale,
+ weightsOffset);
+
+ armnn::TensorInfo biasInfo({numUnits}, armnn::DataType::Signed32, biasScale, biasOffset);
+
+ armnn::TensorInfo layerNormWeightsInfo({numUnits}, armnn::DataType::QSymmS16, layerNormScale, layerNormOffset);
+
+ armnn::TensorInfo projectionWeightsInfo({outputSize, numUnits},
+ armnn::DataType::QSymmS8,
+ projectionWeightsScale,
+ 0);
+
+ // Weights and bias tensor data
+ auto inputToInputWeights = MakeTensor<int8_t, 2>(inputWeightsInfo,
+ {64, 77, 89, -102, -115, 13, 25, 38, -51, 64, -102, 89, -77, 64, -51, -64, -51, -38, -25, -13});
+ auto inputToForgetWeights = MakeTensor<int8_t, 2>(inputWeightsInfo,
+ {-77, -13, 38, 25, 115, -64, -25, -51, 38, -102, -51, 38, -64, -51, -77, 38, -51, -77, -64, -64});
+ auto inputToCellWeights = MakeTensor<int8_t, 2>(inputWeightsInfo,
+ {-51, -38, -25, -13, -64, 64, -25, -38, -25, -77, 77, -13, -51, -38, -89, 89, -115, -64, 102, 77});
+ auto inputToOutputWeights = MakeTensor<int8_t, 2>(inputWeightsInfo,
+ {-102, -51, -25, -115, -13, -89, 38, -38, -102, -25, 77, -25, 51, -89, -38, -64, 13, 64, -77, -51});
+
+ auto recurrentToInputWeights = MakeTensor<int8_t, 2>(recurrentWeightsInfo,
+ {-25, -38, 51, 13, -64, 115, -25, -38, -89, 6, -25, -77});
+ auto recurrentToForgetWeights = MakeTensor<int8_t, 2>(recurrentWeightsInfo,
+ {-64, -38, -64, -25, 77, 51, 115, 38, -13, 25, 64, 25});
+ auto recurrentToCellWeights = MakeTensor<int8_t, 2>(recurrentWeightsInfo,
+ {-38, 25, 13, -38, 102, -10, -25, 38, 102, -77, -13, 25});
+ auto recurrentToOutputWeights = MakeTensor<int8_t, 2>(recurrentWeightsInfo,
+ {38, -13, 13, -25, -64, -89, -25, -77, -13, -51, -89, -25});
+
+ auto inputGateBias = MakeTensor<int32_t, 1>(biasInfo, {644245, 3221226, 4724464, 8160438});
+ auto forgetGateBias = MakeTensor<int32_t, 1>(biasInfo, {2147484, -6442451, -4294968, 2147484});
+ auto cellBias = MakeTensor<int32_t, 1>(biasInfo, {-1073742, 15461883, 5368709, 1717987});
+ auto outputGateBias = MakeTensor<int32_t, 1>(biasInfo, {1073742, -214748, 4294968, 2147484});
+
+ auto inputLayerNormWeights = MakeTensor<int16_t, 1>(layerNormWeightsInfo, {3277, 6553, 9830, 16384});
+ auto forgetLayerNormWeights = MakeTensor<int16_t, 1>(layerNormWeightsInfo, {6553, 6553, 13107, 9830});
+ auto cellLayerNormWeights = MakeTensor<int16_t, 1>(layerNormWeightsInfo, {22937, 6553, 9830, 26214});
+ auto outputLayerNormWeights = MakeTensor<int16_t, 1>(layerNormWeightsInfo, {19660, 6553, 6553, 16384});
+
+ auto projectionWeights = MakeTensor<int8_t, 2>(projectionWeightsInfo,
+ {-25, 51, 3, -51, 25, 127, 77, 20, 18, 51, -102, 51});
+
+ // ScopedCpuTensorHandles
+ armnn::ScopedCpuTensorHandle inputToInputWeightsTensor(inputWeightsInfo);
+ armnn::ScopedCpuTensorHandle inputToForgetWeightsTensor(inputWeightsInfo);
+ armnn::ScopedCpuTensorHandle inputToCellWeightsTensor(inputWeightsInfo);
+ armnn::ScopedCpuTensorHandle inputToOutputWeightsTensor(inputWeightsInfo);
+
+ armnn::ScopedCpuTensorHandle recurrentToInputWeightsTensor(recurrentWeightsInfo);
+ armnn::ScopedCpuTensorHandle recurrentToForgetWeightsTensor(recurrentWeightsInfo);
+ armnn::ScopedCpuTensorHandle recurrentToCellWeightsTensor(recurrentWeightsInfo);
+ armnn::ScopedCpuTensorHandle recurrentToOutputWeightsTensor(recurrentWeightsInfo);
+
+ armnn::ScopedCpuTensorHandle inputGateBiasTensor(biasInfo);
+ armnn::ScopedCpuTensorHandle forgetGateBiasTensor(biasInfo);
+ armnn::ScopedCpuTensorHandle cellBiasTensor(biasInfo);
+ armnn::ScopedCpuTensorHandle outputGateBiasTensor(biasInfo);
+
+ armnn::ScopedCpuTensorHandle inputLayerNormWeightsTensor(layerNormWeightsInfo);
+ armnn::ScopedCpuTensorHandle forgetLayerNormWeightsTensor(layerNormWeightsInfo);
+ armnn::ScopedCpuTensorHandle cellLayerNormWeightsTensor(layerNormWeightsInfo);
+ armnn::ScopedCpuTensorHandle outputLayerNormWeightsTensor(layerNormWeightsInfo);
+
+ armnn::ScopedCpuTensorHandle projectionWeightsTensor(projectionWeightsInfo);
+
+ // Allocate and copy data
+ AllocateAndCopyDataToITensorHandle(&inputToInputWeightsTensor, &inputToInputWeights[0][0]);
+ AllocateAndCopyDataToITensorHandle(&inputToForgetWeightsTensor, &inputToForgetWeights[0][0]);
+ AllocateAndCopyDataToITensorHandle(&inputToCellWeightsTensor, &inputToCellWeights[0][0]);
+ AllocateAndCopyDataToITensorHandle(&inputToOutputWeightsTensor, &inputToOutputWeights[0][0]);
+
+ AllocateAndCopyDataToITensorHandle(&recurrentToInputWeightsTensor, &recurrentToInputWeights[0][0]);
+ AllocateAndCopyDataToITensorHandle(&recurrentToForgetWeightsTensor, &recurrentToForgetWeights[0][0]);
+ AllocateAndCopyDataToITensorHandle(&recurrentToCellWeightsTensor, &recurrentToCellWeights[0][0]);
+ AllocateAndCopyDataToITensorHandle(&recurrentToOutputWeightsTensor, &recurrentToOutputWeights[0][0]);
+
+ AllocateAndCopyDataToITensorHandle(&inputGateBiasTensor, &inputGateBias[0]);
+ AllocateAndCopyDataToITensorHandle(&forgetGateBiasTensor, &forgetGateBias[0]);
+ AllocateAndCopyDataToITensorHandle(&cellBiasTensor, &cellBias[0]);
+ AllocateAndCopyDataToITensorHandle(&outputGateBiasTensor, &outputGateBias[0]);
+
+ AllocateAndCopyDataToITensorHandle(&inputLayerNormWeightsTensor, &inputLayerNormWeights[0]);
+ AllocateAndCopyDataToITensorHandle(&forgetLayerNormWeightsTensor, &forgetLayerNormWeights[0]);
+ AllocateAndCopyDataToITensorHandle(&cellLayerNormWeightsTensor, &cellLayerNormWeights[0]);
+ AllocateAndCopyDataToITensorHandle(&outputLayerNormWeightsTensor, &outputLayerNormWeights[0]);
+
+ AllocateAndCopyDataToITensorHandle(&projectionWeightsTensor, &projectionWeights[0][0]);
+
+ // Setup queue descriptor
+ data.m_InputToInputWeights = &inputToInputWeightsTensor;
+ data.m_InputToForgetWeights = &inputToForgetWeightsTensor;
+ data.m_InputToCellWeights = &inputToCellWeightsTensor;
+ data.m_InputToOutputWeights = &inputToOutputWeightsTensor;
+
+ data.m_RecurrentToInputWeights = &recurrentToInputWeightsTensor;
+ data.m_RecurrentToForgetWeights = &recurrentToForgetWeightsTensor;
+ data.m_RecurrentToCellWeights = &recurrentToCellWeightsTensor;
+ data.m_RecurrentToOutputWeights = &recurrentToOutputWeightsTensor;
+
+ data.m_InputGateBias = &inputGateBiasTensor;
+ data.m_ForgetGateBias = &forgetGateBiasTensor;
+ data.m_CellBias = &cellBiasTensor;
+ data.m_OutputGateBias = &outputGateBiasTensor;
+
+ data.m_InputLayerNormWeights = &inputLayerNormWeightsTensor;
+ data.m_ForgetLayerNormWeights = &forgetLayerNormWeightsTensor;
+ data.m_CellLayerNormWeights = &cellLayerNormWeightsTensor;
+ data.m_OutputLayerNormWeights = &outputLayerNormWeightsTensor;
+
+ data.m_ProjectionWeights = &projectionWeightsTensor;
+
+ data.m_Parameters.m_CifgEnabled = cifgEnabled;
+ data.m_Parameters.m_PeepholeEnabled = peepholeEnabled;
+ data.m_Parameters.m_ProjectionEnabled = projectionEnabled;
+ data.m_Parameters.m_LayerNormEnabled = layerNormEnabled;
+
+ data.m_Parameters.m_InputIntermediateScale = inputIntermediateScale;
+ data.m_Parameters.m_ForgetIntermediateScale = forgetIntermediateScale;
+ data.m_Parameters.m_CellIntermediateScale = cellIntermediateScale;
+ data.m_Parameters.m_OutputIntermediateScale = outputIntermediateScale;
+
+ data.m_Parameters.m_HiddenStateZeroPoint = hiddenStateZeroPoint;
+ data.m_Parameters.m_HiddenStateScale = hiddenStateScale;
+
+ data.m_Parameters.m_CellClip = cellClip;
+ data.m_Parameters.m_ProjectionClip = projectionClip;
+
+ // Create workload and allocate tensor handles
+ std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateQLstm(data, info);
+ inputHandle->Allocate();
+ outputStateInHandle->Allocate();
+ cellStateInHandle->Allocate();
+
+ outputStateOutHandle->Allocate();
+ cellStateOutHandle->Allocate();
+ outputHandle->Allocate();
+
+ CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0]);
+ CopyDataToITensorHandle(outputStateInHandle.get(), &outputStateInTensor[0][0]);
+ CopyDataToITensorHandle(cellStateInHandle.get(), &cellStateInTensor[0][0]);
+
+ workload->Execute();
+
+ CopyDataFromITensorHandle(&ret.output[0][0], outputHandle.get());
+
+ return ret;
+}
+
+// QLSTM: Projection, CIFG, LayerNorm
+LayerTestResult<int8_t, 2> QLstmTestImpl2(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const boost::multi_array<int8_t, 2>& input,
+ const boost::multi_array<int8_t, 2>& outputExpected)
+{
+ IgnoreUnused(memoryManager);
+ unsigned int numBatches = 2;
+ unsigned int inputSize = 5;
+ unsigned int outputSize = 3;
+ unsigned int numUnits = 4;
+
+ bool cifgEnabled = true;
+ bool peepholeEnabled = false;
+ bool projectionEnabled = true;
+ bool layerNormEnabled = true;
+
+ // Scale/Offset quantization info
+ float inputScale = 0.0078125f;
+ int32_t inputOffset = 0;
+
+ int32_t hiddenStateZeroPoint = 0;
+ float hiddenStateScale = 0.007f;
+
+ // if (!projectionEnabled) outputScale == hiddenStateScale
+ float outputScale = 3.05176e-05f;
+ int32_t outputOffset = 0;
+
+ float cellStateScale = 3.05176e-05f;
+ int32_t cellStateOffset = 0;
+
+ float weightsScale = 0.00784314f;
+ int32_t weightsOffset = 0;
+
+ float layerNormScale = 3.05182e-05f;
+ int32_t layerNormOffset = 0;
+
+ float biasScale = layerNormScale / 1024;
+ int32_t biasOffset = 0;
+
+ float projectionWeightsScale = 0.00392157f;
+
+ float inputIntermediateScale = 0.007059f;
+ float forgetIntermediateScale = 0.007812f;
+ float cellIntermediateScale = inputIntermediateScale;
+ float outputIntermediateScale = forgetIntermediateScale;
+
+ float cellClip = 0.0f;
+ float projectionClip = 0.0f;
+
+ // Input/Output tensor info
+ armnn::TensorInfo inputInfo({numBatches , inputSize},
+ armnn::DataType::QAsymmS8,
+ inputScale,
+ inputOffset);
+
+ armnn::TensorInfo cellStateInfo({numBatches , numUnits},
+ armnn::DataType::QSymmS16,
+ cellStateScale,
+ cellStateOffset);
+
+ armnn::TensorInfo outputStateInfo({numBatches , outputSize},
+ armnn::DataType::QAsymmS8,
+ outputScale,
+ outputOffset);
+
+ LayerTestResult<int8_t, 2> ret(outputStateInfo);
+
+ // Input tensors
+ std::vector<int8_t> inputVector;
+ inputVector.assign(input.data(), input.data() + (numBatches * inputSize));
+ auto inputTensor = MakeTensor<int8_t, 2>(inputInfo, inputVector);
+
+ std::vector<int16_t> cellStateInVector = {0, 0, 0, 0, 0, 0, 0, 0};
+ auto cellStateInTensor = MakeTensor<int16_t, 2>(cellStateInfo, cellStateInVector);
+
+ std::vector<int8_t> outputStateInVector = {0, 0, 0, 0, 0, 0};
+ auto outputStateInTensor = MakeTensor<int8_t, 2>(outputStateInfo, outputStateInVector);
+
+ // Output tensors
+ std::vector<int16_t> cellStateOutVector = {-14650, 8939, 5771, 6715, -11843, 7847, 1508, 12939};
+ auto cellStateOutTensor = MakeTensor<int16_t, 2>(cellStateInfo, cellStateOutVector);
+
+ std::vector<int8_t> outputVector;
+ outputVector.assign(outputExpected.data(), outputExpected.data() + (numBatches * outputSize));
+ ret.outputExpected = MakeTensor<int8_t, 2>(outputStateInfo, outputVector);
+
+ // Create tensor handles
+ std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputInfo);
+ std::unique_ptr<armnn::ITensorHandle> cellStateInHandle =
+ workloadFactory.CreateTensorHandle(cellStateInfo);
+ std::unique_ptr<armnn::ITensorHandle> outputStateInHandle =
+ workloadFactory.CreateTensorHandle(outputStateInfo);
+
+ std::unique_ptr<armnn::ITensorHandle> outputStateOutHandle = workloadFactory.CreateTensorHandle(outputStateInfo);
+ std::unique_ptr<armnn::ITensorHandle> cellStateOutHandle =
+ workloadFactory.CreateTensorHandle(cellStateInfo);
+ std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputStateInfo);
+
+ armnn::QLstmQueueDescriptor data;
+ armnn::WorkloadInfo info;
+
+ // Add inputs and outputs to workload
+ AddInputToWorkload(data, info, inputInfo, inputHandle.get());
+ AddInputToWorkload(data, info, outputStateInfo, outputStateInHandle.get());
+ AddInputToWorkload(data, info, cellStateInfo, cellStateInHandle.get());
+
+ AddOutputToWorkload(data, info, outputStateInfo, outputStateOutHandle.get());
+ AddOutputToWorkload(data, info, cellStateInfo, cellStateOutHandle.get());
+ AddOutputToWorkload(data, info, outputStateInfo, outputHandle.get());
+
+ // Weights and bias tensor and quantization info
+ armnn::TensorInfo inputWeightsInfo({numUnits, inputSize},
+ armnn::DataType::QSymmS8,
+ weightsScale,
+ weightsOffset);
+
+ armnn::TensorInfo recurrentWeightsInfo({numUnits, outputSize},
+ armnn::DataType::QSymmS8,
+ weightsScale,
+ weightsOffset);
+
+ armnn::TensorInfo biasInfo({numUnits}, armnn::DataType::Signed32, biasScale, biasOffset);
+
+ armnn::TensorInfo layerNormWeightsInfo({numUnits}, armnn::DataType::QSymmS16, layerNormScale, layerNormOffset);
+
+ armnn::TensorInfo projectionWeightsInfo({outputSize, numUnits},
+ armnn::DataType::QSymmS8,
+ projectionWeightsScale,
+ 0);
+
+ // Weights and bias tensor data
+ auto inputToForgetWeights = MakeTensor<int8_t, 2>(inputWeightsInfo,
+ {-77, -13, 38, 25, 115, -64, -25, -51, 38, -102, -51, 38, -64, -51, -77, 38, -51, -77, -64, -64});
+ auto inputToCellWeights = MakeTensor<int8_t, 2>(inputWeightsInfo,
+ {-51, -38, -25, -13, -64, 64, -25, -38, -25, -77, 77, -13, -51, -38, -89, 89, -115, -64, 102, 77});
+ auto inputToOutputWeights = MakeTensor<int8_t, 2>(inputWeightsInfo,
+ {-102, -51, -25, -115, -13, -89, 38, -38, -102, -25, 77, -25, 51, -89, -38, -64, 13, 64, -77, -51});
+
+ auto recurrentToForgetWeights = MakeTensor<int8_t, 2>(recurrentWeightsInfo,
+ {-64, -38, -64, -25, 77, 51, 115, 38, -13, 25, 64, 25});
+ auto recurrentToCellWeights = MakeTensor<int8_t, 2>(recurrentWeightsInfo,
+ {-38, 25, 13, -38, 102, -10, -25, 38, 102, -77, -13, 25});
+ auto recurrentToOutputWeights = MakeTensor<int8_t, 2>(recurrentWeightsInfo,
+ {38, -13, 13, -25, -64, -89, -25, -77, -13, -51, -89, -25});
+
+ auto forgetGateBias = MakeTensor<int32_t, 1>(biasInfo, {2147484, -6442451, -4294968, 2147484});
+ auto cellBias = MakeTensor<int32_t, 1>(biasInfo, {-1073742, 15461883, 5368709, 1717987});
+ auto outputGateBias = MakeTensor<int32_t, 1>(biasInfo, {1073742, -214748, 4294968, 2147484});
+
+ auto forgetLayerNormWeights = MakeTensor<int16_t, 1>(layerNormWeightsInfo, {6553, 6553, 13107, 9830});
+ auto cellLayerNormWeights = MakeTensor<int16_t, 1>(layerNormWeightsInfo, {22937, 6553, 9830, 26214});
+ auto outputLayerNormWeights = MakeTensor<int16_t, 1>(layerNormWeightsInfo, {19660, 6553, 6553, 16384});
+
+ auto projectionWeights = MakeTensor<int8_t, 2>(projectionWeightsInfo,
+ {-25, 51, 3, -51, 25, 127, 77, 20, 18, 51, -102, 51});
+
+ // ScopedCpuTensorHandles
+ armnn::ScopedCpuTensorHandle inputToForgetWeightsTensor(inputWeightsInfo);
+ armnn::ScopedCpuTensorHandle inputToCellWeightsTensor(inputWeightsInfo);
+ armnn::ScopedCpuTensorHandle inputToOutputWeightsTensor(inputWeightsInfo);
+
+ armnn::ScopedCpuTensorHandle recurrentToForgetWeightsTensor(recurrentWeightsInfo);
+ armnn::ScopedCpuTensorHandle recurrentToCellWeightsTensor(recurrentWeightsInfo);
+ armnn::ScopedCpuTensorHandle recurrentToOutputWeightsTensor(recurrentWeightsInfo);
+
+ armnn::ScopedCpuTensorHandle forgetGateBiasTensor(biasInfo);
+ armnn::ScopedCpuTensorHandle cellBiasTensor(biasInfo);
+ armnn::ScopedCpuTensorHandle outputGateBiasTensor(biasInfo);
+
+ armnn::ScopedCpuTensorHandle forgetLayerNormWeightsTensor(layerNormWeightsInfo);
+ armnn::ScopedCpuTensorHandle cellLayerNormWeightsTensor(layerNormWeightsInfo);
+ armnn::ScopedCpuTensorHandle outputLayerNormWeightsTensor(layerNormWeightsInfo);
+
+ armnn::ScopedCpuTensorHandle projectionWeightsTensor(projectionWeightsInfo);
+
+ // Allocate and copy data
+ AllocateAndCopyDataToITensorHandle(&inputToForgetWeightsTensor, &inputToForgetWeights[0][0]);
+ AllocateAndCopyDataToITensorHandle(&inputToCellWeightsTensor, &inputToCellWeights[0][0]);
+ AllocateAndCopyDataToITensorHandle(&inputToOutputWeightsTensor, &inputToOutputWeights[0][0]);
+
+ AllocateAndCopyDataToITensorHandle(&recurrentToForgetWeightsTensor, &recurrentToForgetWeights[0][0]);
+ AllocateAndCopyDataToITensorHandle(&recurrentToCellWeightsTensor, &recurrentToCellWeights[0][0]);
+ AllocateAndCopyDataToITensorHandle(&recurrentToOutputWeightsTensor, &recurrentToOutputWeights[0][0]);
+
+ AllocateAndCopyDataToITensorHandle(&forgetGateBiasTensor, &forgetGateBias[0]);
+ AllocateAndCopyDataToITensorHandle(&cellBiasTensor, &cellBias[0]);
+ AllocateAndCopyDataToITensorHandle(&outputGateBiasTensor, &outputGateBias[0]);
+
+ AllocateAndCopyDataToITensorHandle(&forgetLayerNormWeightsTensor, &forgetLayerNormWeights[0]);
+ AllocateAndCopyDataToITensorHandle(&cellLayerNormWeightsTensor, &cellLayerNormWeights[0]);
+ AllocateAndCopyDataToITensorHandle(&outputLayerNormWeightsTensor, &outputLayerNormWeights[0]);
+
+ AllocateAndCopyDataToITensorHandle(&projectionWeightsTensor, &projectionWeights[0][0]);
+
+ // Setup queue descriptor
+ data.m_InputToForgetWeights = &inputToForgetWeightsTensor;
+ data.m_InputToCellWeights = &inputToCellWeightsTensor;
+ data.m_InputToOutputWeights = &inputToOutputWeightsTensor;
+
+ data.m_RecurrentToForgetWeights = &recurrentToForgetWeightsTensor;
+ data.m_RecurrentToCellWeights = &recurrentToCellWeightsTensor;
+ data.m_RecurrentToOutputWeights = &recurrentToOutputWeightsTensor;
+
+ data.m_ForgetGateBias = &forgetGateBiasTensor;
+ data.m_CellBias = &cellBiasTensor;
+ data.m_OutputGateBias = &outputGateBiasTensor;
+
+ data.m_ForgetLayerNormWeights = &forgetLayerNormWeightsTensor;
+ data.m_CellLayerNormWeights = &cellLayerNormWeightsTensor;
+ data.m_OutputLayerNormWeights = &outputLayerNormWeightsTensor;
+
+ data.m_ProjectionWeights = &projectionWeightsTensor;
+
+ data.m_Parameters.m_CifgEnabled = cifgEnabled;
+ data.m_Parameters.m_PeepholeEnabled = peepholeEnabled;
+ data.m_Parameters.m_ProjectionEnabled = projectionEnabled;
+ data.m_Parameters.m_LayerNormEnabled = layerNormEnabled;
+
+ data.m_Parameters.m_InputIntermediateScale = inputIntermediateScale;
+ data.m_Parameters.m_ForgetIntermediateScale = forgetIntermediateScale;
+ data.m_Parameters.m_CellIntermediateScale = cellIntermediateScale;
+ data.m_Parameters.m_OutputIntermediateScale = outputIntermediateScale;
+
+ data.m_Parameters.m_HiddenStateZeroPoint = hiddenStateZeroPoint;
+ data.m_Parameters.m_HiddenStateScale = hiddenStateScale;
+
+ data.m_Parameters.m_CellClip = cellClip;
+ data.m_Parameters.m_ProjectionClip = projectionClip;
+
+ // Create workload and allocate tensor handles
+ std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateQLstm(data, info);
+ inputHandle->Allocate();
+ outputStateInHandle->Allocate();
+ cellStateInHandle->Allocate();
+
+ outputStateOutHandle->Allocate();
+ cellStateOutHandle->Allocate();
+ outputHandle->Allocate();
+
+ CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0]);
+ CopyDataToITensorHandle(outputStateInHandle.get(), &outputStateInTensor[0][0]);
+ CopyDataToITensorHandle(cellStateInHandle.get(), &cellStateInTensor[0][0]);
+
+ workload->Execute();
+
+ CopyDataFromITensorHandle(&ret.output[0][0], outputHandle.get());
+
+ return ret;
+}
+
} // anonymous namespace
@@ -2360,3 +2882,33 @@ LayerTestResult<int8_t, 2> QLstmTest(
return QLstmTestImpl(workloadFactory, memoryManager, input, expectedOutput);
}
+
+LayerTestResult<int8_t, 2> QLstmTest1(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+ armnn::TensorInfo inputDesc({2, 5}, armnn::DataType::QAsymmS8);
+ boost::multi_array<int8_t, 2> input = MakeTensor<int8_t, 2>(inputDesc, std::vector<int8_t>(
+ {90, 102, 13, 26, 38, 102, 13, 26, 51, 64}));
+
+ armnn::TensorInfo outputDesc({2, 3}, armnn::DataType::QAsymmS8);
+ boost::multi_array<int8_t, 2> expectedOutput = MakeTensor<int8_t, 2>(outputDesc, std::vector<int8_t>(
+ {127, 127, -108, -67, 127, 127}));
+
+ return QLstmTestImpl1(workloadFactory, memoryManager, input, expectedOutput);
+}
+
+LayerTestResult<int8_t, 2> QLstmTest2(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+ armnn::TensorInfo inputDesc({2, 5}, armnn::DataType::QAsymmS8);
+ boost::multi_array<int8_t, 2> input = MakeTensor<int8_t, 2>(inputDesc, std::vector<int8_t>(
+ {90, 102, 13, 26, 38, 102, 13, 26, 51, 64}));
+
+ armnn::TensorInfo outputDesc({2, 3}, armnn::DataType::QAsymmS8);
+ boost::multi_array<int8_t, 2> expectedOutput = MakeTensor<int8_t, 2>(outputDesc, std::vector<int8_t>(
+ {127, 127, 127, -128, 127, 127}));
+
+ return QLstmTestImpl2(workloadFactory, memoryManager, input, expectedOutput);
+} \ No newline at end of file
diff --git a/src/backends/backendsCommon/test/layerTests/LstmTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/LstmTestImpl.hpp
index f1180aee16..6e293456af 100644
--- a/src/backends/backendsCommon/test/layerTests/LstmTestImpl.hpp
+++ b/src/backends/backendsCommon/test/layerTests/LstmTestImpl.hpp
@@ -66,3 +66,11 @@ LayerTestResult<uint8_t, 2> QuantizedLstmTest(
LayerTestResult<int8_t, 2> QLstmTest(
armnn::IWorkloadFactory& workloadFactory,
const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int8_t, 2> QLstmTest1(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int8_t, 2> QLstmTest2(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); \ No newline at end of file
diff --git a/src/backends/reference/test/RefLayerTests.cpp b/src/backends/reference/test/RefLayerTests.cpp
index eb2d0c55d9..09096b4646 100644
--- a/src/backends/reference/test/RefLayerTests.cpp
+++ b/src/backends/reference/test/RefLayerTests.cpp
@@ -1283,6 +1283,8 @@ ARMNN_AUTO_TEST_CASE(LstmLayerInt16NoCifgNoPeepholeNoProjectionInt16Constant,
// QLstm
ARMNN_AUTO_TEST_CASE(QLstm, QLstmTest)
+ARMNN_AUTO_TEST_CASE(QLstm1, QLstmTest1)
+ARMNN_AUTO_TEST_CASE(QLstm2, QLstmTest2)
// Convert from BFloat16 to Float32
ARMNN_AUTO_TEST_CASE(ConvertBf16ToFp32, ConvertBf16ToFp32Test)
diff --git a/src/backends/reference/workloads/RefQLstmWorkload.cpp b/src/backends/reference/workloads/RefQLstmWorkload.cpp
index 34d048b0cb..e11ea55add 100644
--- a/src/backends/reference/workloads/RefQLstmWorkload.cpp
+++ b/src/backends/reference/workloads/RefQLstmWorkload.cpp
@@ -146,6 +146,7 @@ void RefQLstmWorkload::Execute() const
std::vector<int16_t> forgetGateData(stateTensorSize);
std::vector<int16_t> outputGateData(stateTensorSize);
std::vector<int32_t> hiddenStateData(stateTensorSize);
+ std::vector<int16_t> outputInt16Data(numBatches * outputSize);
armnn::TensorInfo inputGateInfo(
{numBatches , numUnits}, armnn::DataType::QSymmS16, m_Data.m_Parameters.m_InputIntermediateScale, 0);
@@ -159,6 +160,10 @@ void RefQLstmWorkload::Execute() const
armnn::DataType::QAsymmS8,
m_Data.m_Parameters.m_HiddenStateScale,
m_Data.m_Parameters.m_HiddenStateZeroPoint);
+ armnn::TensorInfo outputInt16Info({numBatches , outputSize},
+ armnn::DataType::QSymmS16,
+ outputInfo.GetQuantizationScale(),
+ outputInfo.GetQuantizationOffset());
// Decoders/Encoders for internal states
std::unique_ptr<Decoder<float>> inputGateDecoder =
@@ -183,6 +188,12 @@ void RefQLstmWorkload::Execute() const
std::unique_ptr<Encoder<float>> hiddenStateEncoder =
MakeEncoder<float>(hiddenStateInfo, hiddenStateData.data());
+ // Int16 used to accumulate output to prevent overflowing (after Projection MatMul)
+ std::unique_ptr<Decoder<float>> outputInt16Decoder =
+ MakeDecoder<float>(outputInt16Info, outputInt16Data.data());
+ std::unique_ptr<Encoder<float>> outputInt16Encoder =
+ MakeEncoder<float>(outputInt16Info, outputInt16Data.data());
+
// Create decoders for optional params if they are enabled
if (!cifgEnabled)
{
@@ -494,12 +505,13 @@ void RefQLstmWorkload::Execute() const
{
if (m_ProjectionBiasTensor)
{
- VectorBatchVectorAssign(*projectionBiasDecoder,
- outputSize, numBatches, *outputEncoder);
+ VectorBatchVectorAssign(*projectionBiasDecoder, outputSize, numBatches, *outputInt16Encoder);
}
- MatrixBatchVectorMultiplyAccumulate(*projectionWeightsDecoder,
- outputSize, numUnits, *hiddenStateDecoder, numBatches, *outputEncoder);
+ MatrixBatchVectorMultiplyAccumulate(*projectionWeightsDecoder, outputSize, numUnits, *hiddenStateDecoder,
+ numBatches, *outputInt16Encoder);
+
+ CopyVector(*outputInt16Decoder, numBatches * outputSize, *outputEncoder);
if (m_Data.m_Parameters.m_ProjectionClip > 0.0)
{