diff options
author | Jan Eilers <jan.eilers@arm.com> | 2019-06-26 13:10:09 +0100 |
---|---|---|
committer | Jan Eilers <jan.eilers@arm.com> | 2019-07-02 09:59:37 +0000 |
commit | 38e05bd2836b1b65b440330a9c283038ba4192c3 (patch) | |
tree | c232f71ce6a101c70ed65e046678f7b22593dbe4 /src/backends/backendsCommon | |
parent | d0c0cc3e27f1ada9df167d3b9ff248be432d16e1 (diff) | |
download | armnn-38e05bd2836b1b65b440330a9c283038ba4192c3.tar.gz |
IVGCVSW-3236 Extend Ref LSTM with layer normalization support
* Add descriptor values
* Update lstm queue descriptor validate function
* Update lstm workload
* Update isLstmSupported (Cl and Ref), LayerSupportBase, ILayerSupport
* Update lstm layer
* Add unit tests
Signed-off-by: Jan Eilers <jan.eilers@arm.com>
Change-Id: I932175d550facfb342325051eaa7bd2084ebdc18
Signed-off-by: Jan Eilers <jan.eilers@arm.com>
Diffstat (limited to 'src/backends/backendsCommon')
-rw-r--r-- | src/backends/backendsCommon/LayerSupportBase.cpp | 6 | ||||
-rw-r--r-- | src/backends/backendsCommon/LayerSupportBase.hpp | 6 | ||||
-rw-r--r-- | src/backends/backendsCommon/WorkloadData.cpp | 271 | ||||
-rw-r--r-- | src/backends/backendsCommon/WorkloadData.hpp | 8 | ||||
-rw-r--r-- | src/backends/backendsCommon/WorkloadFactory.cpp | 33 | ||||
-rw-r--r-- | src/backends/backendsCommon/test/LayerTests.cpp | 166 | ||||
-rw-r--r-- | src/backends/backendsCommon/test/LayerTests.hpp | 11 | ||||
-rw-r--r-- | src/backends/backendsCommon/test/LstmTestImpl.hpp | 386 | ||||
-rw-r--r-- | src/backends/backendsCommon/test/WorkloadDataValidation.cpp | 149 |
9 files changed, 1010 insertions, 26 deletions
diff --git a/src/backends/backendsCommon/LayerSupportBase.cpp b/src/backends/backendsCommon/LayerSupportBase.cpp index 6c25f87c9a..4488e25c9c 100644 --- a/src/backends/backendsCommon/LayerSupportBase.cpp +++ b/src/backends/backendsCommon/LayerSupportBase.cpp @@ -243,7 +243,11 @@ bool LayerSupportBase::IsLstmSupported(const TensorInfo& input, const TensorInfo* projectionBias, const TensorInfo* cellToForgetWeights, const TensorInfo* cellToOutputWeights, - Optional<std::string&> reasonIfUnsupported) const + Optional<std::string&> reasonIfUnsupported, + const TensorInfo* inputLayerNormWeights, + const TensorInfo* forgetLayerNormWeights, + const TensorInfo* cellLayerNormWeights, + const TensorInfo* outputLayerNormWeights) const { return DefaultLayerSupport(__func__, __FILE__, __LINE__, reasonIfUnsupported); } diff --git a/src/backends/backendsCommon/LayerSupportBase.hpp b/src/backends/backendsCommon/LayerSupportBase.hpp index 7f63ccfbb1..8abd975922 100644 --- a/src/backends/backendsCommon/LayerSupportBase.hpp +++ b/src/backends/backendsCommon/LayerSupportBase.hpp @@ -157,7 +157,11 @@ public: const TensorInfo* projectionBias, const TensorInfo* cellToForgetWeights, const TensorInfo* cellToOutputWeights, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; + Optional<std::string&> reasonIfUnsupported = EmptyOptional(), + const TensorInfo* inputLayerNormWeights = nullptr, + const TensorInfo* forgetLayerNormWeights = nullptr, + const TensorInfo* cellLayerNormWeights = nullptr, + const TensorInfo* outputLayerNormWeights = nullptr) const override; bool IsMaximumSupported(const TensorInfo& input0, const TensorInfo& input1, diff --git a/src/backends/backendsCommon/WorkloadData.cpp b/src/backends/backendsCommon/WorkloadData.cpp index e7915dd40b..3766f5f7ca 100644 --- a/src/backends/backendsCommon/WorkloadData.cpp +++ b/src/backends/backendsCommon/WorkloadData.cpp @@ -114,6 +114,30 @@ void ValidateTensorNumDimensions(const TensorInfo& tensor, } //--------------------------------------------------------------- +void ValidateTensorNumElements(const TensorInfo& tensor, + std::string const& descName, + unsigned int numElements, + std::string const& tensorName) +{ + if (tensor.GetNumElements() != numElements) + { + throw InvalidArgumentException(descName + ": Expected " + to_string(numElements) + " but got " + + to_string(tensor.GetNumDimensions()) + " elements for " + + tensorName + " tensor."); + } +} + +//--------------------------------------------------------------- +void ValidateTensorNumDimNumElem(const TensorInfo& tensorInfo, + unsigned int numDimension, + unsigned int numElements, + std::string const& tensorName) +{ + ValidateTensorNumDimensions(tensorInfo, "ValidateTensorNumDimNumElem: NumDimensionCheck", numDimension, tensorName); + ValidateTensorNumElements(tensorInfo, "ValidateTensorNumDimNumElem: NumElementsCheck", numElements, tensorName); +} + +//--------------------------------------------------------------- void ValidateTensorDataType(const TensorInfo& tensor, DataType dataType, const std::string& descName, std::string const& tensorName) { @@ -1238,22 +1262,257 @@ void FloorQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const void LstmQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const { - ValidateTensorNumDimensions(workloadInfo.m_InputTensorInfos[0], "LstmQueueDescriptor", 2, "input"); - ValidateTensorNumDimensions(workloadInfo.m_OutputTensorInfos[0], "LstmQueueDescriptor", 2, "output"); - std::vector<DataType> supportedTypes = { DataType::Float16, DataType::Float32, DataType::QuantisedSymm16 }; + // ported from android/ml/nn/common/operations/LSTM.cpp CheckInputTensorDimensions() + // check for supported type of one input and match them with all the other input and output ValidateDataTypes(workloadInfo.m_InputTensorInfos[0], supportedTypes, "LstmQueueDescriptor"); + // type matches all other inputs + for (uint32_t i = 1; i < workloadInfo.m_InputTensorInfos.size(); ++i) + { + ValidateTensorDataTypesMatch(workloadInfo.m_InputTensorInfos[0], + workloadInfo.m_InputTensorInfos[i], + "LstmQueueDescriptor", + "InputTensor[0]", + "InputTensor[" + std::to_string(i) + "]"); + } + // type matches all other outputs + for (uint32_t i = 0; i < workloadInfo.m_OutputTensorInfos.size(); ++i) + { + ValidateTensorDataTypesMatch(workloadInfo.m_InputTensorInfos[0], + workloadInfo.m_OutputTensorInfos[i], + "LstmQueueDescriptor", + "InputTensor[0]", + "OutputTensor[" + std::to_string(i) + "]"); + } - ValidateDataTypes(workloadInfo.m_OutputTensorInfos[0], - supportedTypes, - "LstmQueueDescriptor"); + // TODO: check clipping parameter is valid + + // Inferring batch size, number of outputs and number of cells from the inputs. + // TODO: figure out if there is a way to make sure the specific inputs are at that index of workloadInfo + const uint32_t n_input = workloadInfo.m_InputTensorInfos[0].GetShape()[1]; + const uint32_t n_batch = workloadInfo.m_InputTensorInfos[0].GetShape()[0]; + ValidatePointer(m_InputToOutputWeights, "Null pointer check", "InputToOutputWeights"); + const uint32_t n_cell = m_InputToOutputWeights->GetShape()[0]; + ValidatePointer(m_RecurrentToOutputWeights, "Null pointer check", "RecurrentToOutputWeights"); + const uint32_t n_output = m_RecurrentToOutputWeights->GetShape()[1]; + + // check dimensions of all inputs and outputs + if (workloadInfo.m_InputTensorInfos.size() != 3) + { + throw InvalidArgumentException("Invalid number of inputs."); + } + if (workloadInfo.m_OutputTensorInfos.size() != 4) + { + throw InvalidArgumentException("Invalid number of outputs."); + } + // input tensor + ValidateTensorNumDimNumElem( workloadInfo.m_InputTensorInfos[0], 2, (n_batch * n_input), + "LstmQueueDescriptor input[0]"); + // outputStateInTensor + ValidateTensorNumDimNumElem( workloadInfo.m_InputTensorInfos[1], 2, (n_batch * n_output), + "LstmQueueDescriptor input[1]"); + // outputStateInTensor + ValidateTensorNumDimNumElem( workloadInfo.m_InputTensorInfos[2], 2, (n_batch * n_cell), + "LstmQueueDescriptor input[2]"); + // scratchBufferTensor + unsigned int scratchBufferSize = m_Parameters.m_CifgEnabled ? n_cell * 3 : n_cell * 4; + ValidateTensorNumDimNumElem( workloadInfo.m_OutputTensorInfos[0], 2, (n_batch * scratchBufferSize), + "LstmQueueDescriptor output[0]"); + // outputStateOutTensor + ValidateTensorNumDimNumElem( workloadInfo.m_OutputTensorInfos[1], 2, (n_batch * n_output), + "LstmQueueDescriptor output[1]"); + // cellStateOutTensor + ValidateTensorNumDimNumElem( workloadInfo.m_OutputTensorInfos[2], 2, (n_batch * n_cell), + "LstmQueueDescriptor output[2]"); + // outputTensor + ValidateTensorNumDimNumElem( workloadInfo.m_OutputTensorInfos[3], 2, (n_batch * n_output), + "LstmQueueDescriptor output[3]"); + + + // check that dimensions of inputs/outputs and QueueDescriptor data match with each other + if ( m_InputToInputWeights ) + { + ValidateTensorNumDimNumElem(m_InputToInputWeights->GetTensorInfo(), 2, + (n_cell * n_input), "InputLayerNormWeights"); + } + + ValidatePointer(m_InputToForgetWeights, "Null pointer check", "InputToForgetWeights"); + ValidateTensorNumDimNumElem(m_InputToForgetWeights->GetTensorInfo(), 2, + (n_cell * n_input), "InputToForgetWeights"); + + ValidatePointer(m_InputToCellWeights, "Null pointer check", "InputToCellWeights"); + ValidateTensorNumDimNumElem(m_InputToCellWeights->GetTensorInfo(), 2, + (n_cell * n_input), "InputToCellWeights"); + + if ( m_RecurrentToInputWeights ) + { + ValidateTensorNumDimNumElem(m_RecurrentToInputWeights->GetTensorInfo(), 2, + (n_cell * n_output), "RecurrentToInputWeights"); + } + + ValidatePointer(m_RecurrentToForgetWeights, "Null pointer check", "RecurrentToForgetWeights"); + ValidateTensorNumDimNumElem(m_RecurrentToForgetWeights->GetTensorInfo(), 2, + (n_cell * n_output), "RecurrentToForgetWeights"); + + ValidatePointer(m_RecurrentToCellWeights, "Null pointer check", "RecurrentToCellWeights"); + ValidateTensorNumDimNumElem(m_RecurrentToCellWeights->GetTensorInfo(), 2, + (n_cell * n_output), "RecurrentToCellWeights"); + + // Make sure the input-gate's parameters are either both present (regular + // LSTM) or not at all (CIFG-LSTM). And CifgEnable is set accordingly. + bool cifg_weights_all_or_none = ((m_InputToInputWeights && m_RecurrentToInputWeights && + !m_Parameters.m_CifgEnabled) || + (!m_InputToInputWeights && !m_RecurrentToInputWeights && + m_Parameters.m_CifgEnabled)); + if (!cifg_weights_all_or_none) + { + throw InvalidArgumentException("Input-Gate's parameters InputToInputWeights and RecurrentToInputWeights must " + "either both be present (regular LSTM) or both not present (CIFG-LSTM). In " + "addition CifgEnable must be set accordingly"); + } + + if ( m_CellToInputWeights ) + { + ValidateTensorNumDimNumElem(m_CellToInputWeights->GetTensorInfo(), 1, + n_cell, "CellToInputWeights"); + } + if ( m_CellToForgetWeights ) + { + ValidateTensorNumDimNumElem(m_CellToForgetWeights->GetTensorInfo(), 1, + n_cell, "CellToForgetWeights"); + } + if ( m_CellToOutputWeights ) + { + ValidateTensorNumDimNumElem(m_CellToOutputWeights->GetTensorInfo(), 1, + n_cell, "CellToOutputWeights"); + } + + // Making sure the peephole weights are there all or none. And PeepholeEnable is set accordingly. + bool peephole_weights_all_or_none = + (((m_CellToInputWeights || m_Parameters.m_CifgEnabled) && m_CellToForgetWeights + && m_CellToOutputWeights && m_Parameters.m_PeepholeEnabled) + || ( !m_CellToInputWeights && !m_CellToForgetWeights + && !m_CellToOutputWeights && !m_Parameters.m_PeepholeEnabled)); + if (!peephole_weights_all_or_none) + { + throw InvalidArgumentException("Invalid combination of peephole parameters"); + } + + // Make sure the input gate bias is present only when not a CIFG-LSTM. + if (m_Parameters.m_CifgEnabled) + { + if (m_InputGateBias) + { + throw InvalidArgumentException("InputGateBias is present and CIFG-LSTM is enabled"); + } + } + else + { + if (!m_InputGateBias) + { + throw InvalidArgumentException("If CIFG-LSTM is disabled InputGateBias must be present."); + } + ValidateTensorNumDimNumElem(m_InputGateBias->GetTensorInfo(), 1, + n_cell, "InputGateBias"); + } + + ValidatePointer(m_ForgetGateBias, "Null pointer check", "ForgetGateBias"); + ValidateTensorNumDimNumElem(m_ForgetGateBias->GetTensorInfo(), 1, n_cell, "ForgetGateBias"); + + ValidatePointer(m_CellBias, "Null pointer check", "CellBias"); + ValidateTensorNumDimNumElem(m_CellBias->GetTensorInfo(), 1, n_cell, "CellBias"); + + ValidatePointer(m_OutputGateBias, "Null pointer check", "OutputGateBias"); + ValidateTensorNumDimNumElem(m_OutputGateBias->GetTensorInfo(), 1, n_cell, "OutputGateBias"); + + if (m_ProjectionWeights) + { + ValidateTensorNumDimNumElem(m_ProjectionWeights->GetTensorInfo(), 2, + (n_cell * n_output), "ProjectionWeights"); + } + if (m_ProjectionBias) + { + ValidateTensorNumDimNumElem(m_ProjectionBias->GetTensorInfo(), 1, n_output, "ProjectionBias"); + } + + // Making sure the projection tensors are consistent: + // 1) If projection weight is not present, then projection bias should not be + // present. + // 2) If projection weight is present, then projection bias is optional. + bool projecton_tensors_consistent = ((!m_ProjectionWeights && !m_ProjectionBias && + !m_Parameters.m_ProjectionEnabled) + || (m_ProjectionWeights && !m_ProjectionBias && + m_Parameters.m_ProjectionEnabled) + || (m_ProjectionWeights && m_ProjectionBias && + m_Parameters.m_ProjectionEnabled)); + if (!projecton_tensors_consistent) + { + throw InvalidArgumentException("Projection tensors are inconsistent."); + } + + // The four layer normalization weights either all have values or none of them have values. Additionally, if + // CIFG is used, input layer normalization weights tensor is omitted and the other layer normalization weights + // either all have values or none of them have values. Layer normalization is used when the values of all the + // layer normalization weights are present + if (m_InputLayerNormWeights) + { + ValidateTensorNumDimNumElem(m_InputLayerNormWeights->GetTensorInfo(), 1, n_cell, "InputLayerNormWeights"); + } + if (m_ForgetLayerNormWeights) + { + ValidateTensorNumDimNumElem(m_ForgetLayerNormWeights->GetTensorInfo(), 1, n_cell, "ForgetLayerNormWeights"); + } + if (m_CellLayerNormWeights) + { + ValidateTensorNumDimNumElem(m_CellLayerNormWeights->GetTensorInfo(), 1, n_cell, "CellLayerNormWeights"); + } + if (m_OutputLayerNormWeights) + { + ValidateTensorNumDimNumElem(m_OutputLayerNormWeights->GetTensorInfo(), 1, n_cell, "OutputLayerNormWeights"); + } + + + if (m_Parameters.m_LayerNormEnabled) + { + if (!m_Parameters.m_CifgEnabled) + { + if (!m_InputLayerNormWeights) + { + throw InvalidArgumentException("Layer normalisation is enabled and CIFG-LSTM is disabled but " + "InputLayerNormWeights are not present"); + } + ValidateTensorNumDimNumElem(m_InputLayerNormWeights->GetTensorInfo(), + 1, n_cell, "InputLayerNormWeights"); + } + else if (m_InputLayerNormWeights) + { + throw InvalidArgumentException("InputLayerNormWeights are present while CIFG is enabled"); + } + + ValidatePointer(m_ForgetLayerNormWeights, "Null pointer check layer normalisation enabled", + "ForgetLayerNormWeights"); + ValidateTensorNumDimNumElem(m_ForgetLayerNormWeights->GetTensorInfo(), 1, n_cell, "ForgetLayerNormWeights"); + + ValidatePointer(m_OutputLayerNormWeights, "Null pointer check layer normalisation enabled", + "OutputLayerNormWeights"); + ValidateTensorNumDimNumElem(m_OutputLayerNormWeights->GetTensorInfo(), 1, n_cell, "OutputLayerNormWeights"); + + ValidatePointer(m_CellLayerNormWeights, "Null pointer check layer normalisation enabled", + "CellLayerNormWeights"); + ValidateTensorNumDimNumElem(m_CellLayerNormWeights->GetTensorInfo(), 1, n_cell, "CellLayerNormWeights"); + } + else if (m_InputLayerNormWeights || m_ForgetLayerNormWeights || m_OutputLayerNormWeights || m_CellLayerNormWeights) + { + throw InvalidArgumentException("Layer normalisation is disabled but one or more layer normalisation weights " + "are present."); + } } void ConvertFp32ToFp16QueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const diff --git a/src/backends/backendsCommon/WorkloadData.hpp b/src/backends/backendsCommon/WorkloadData.hpp index fa9e1cdf52..d241f7b24f 100644 --- a/src/backends/backendsCommon/WorkloadData.hpp +++ b/src/backends/backendsCommon/WorkloadData.hpp @@ -344,6 +344,10 @@ struct LstmQueueDescriptor : QueueDescriptorWithParameters<LstmDescriptor> , m_OutputGateBias(nullptr) , m_ProjectionWeights(nullptr) , m_ProjectionBias(nullptr) + , m_InputLayerNormWeights(nullptr) + , m_ForgetLayerNormWeights(nullptr) + , m_CellLayerNormWeights(nullptr) + , m_OutputLayerNormWeights(nullptr) { } @@ -364,6 +368,10 @@ struct LstmQueueDescriptor : QueueDescriptorWithParameters<LstmDescriptor> const ConstCpuTensorHandle* m_OutputGateBias; const ConstCpuTensorHandle* m_ProjectionWeights; const ConstCpuTensorHandle* m_ProjectionBias; + const ConstCpuTensorHandle* m_InputLayerNormWeights; + const ConstCpuTensorHandle* m_ForgetLayerNormWeights; + const ConstCpuTensorHandle* m_CellLayerNormWeights; + const ConstCpuTensorHandle* m_OutputLayerNormWeights; void Validate(const WorkloadInfo& workloadInfo) const; }; diff --git a/src/backends/backendsCommon/WorkloadFactory.cpp b/src/backends/backendsCommon/WorkloadFactory.cpp index b74b6afeb3..8ef5985fb3 100644 --- a/src/backends/backendsCommon/WorkloadFactory.cpp +++ b/src/backends/backendsCommon/WorkloadFactory.cpp @@ -396,6 +396,10 @@ bool IWorkloadFactory::IsLayerSupported(const BackendId& backendId, const TensorInfo* projectionBias = nullptr; const TensorInfo* cellToForgetWeights = nullptr; const TensorInfo* cellToOutputWeights = nullptr; + const TensorInfo* inputLayerNormWeights = nullptr; + const TensorInfo* forgetLayerNormWeights = nullptr; + const TensorInfo* cellLayerNormWeights = nullptr; + const TensorInfo* outputLayerNormWeights = nullptr; TensorInfo optInputToInputWeights; TensorInfo optRecurrentToInputWeights; @@ -405,6 +409,10 @@ bool IWorkloadFactory::IsLayerSupported(const BackendId& backendId, TensorInfo optProjectionBias; TensorInfo optCellToForgetWeights; TensorInfo optCellToOutputWeights; + TensorInfo optInputLayerNormWeights; + TensorInfo optForgetLayerNormWeights; + TensorInfo optCellLayerNormWeights; + TensorInfo optOutputLayerNormWeights; if(!descriptor.m_CifgEnabled) { @@ -449,6 +457,25 @@ bool IWorkloadFactory::IsLayerSupported(const BackendId& backendId, cellToOutputWeights = &optCellToOutputWeights; } + if(descriptor.m_LayerNormEnabled) + { + optInputLayerNormWeights = OverrideDataType( + cLayer->m_LayerNormParameters.m_InputLayerNormWeights->GetTensorInfo(), dataType); + inputLayerNormWeights = &optInputLayerNormWeights; + + optForgetLayerNormWeights = OverrideDataType( + cLayer->m_LayerNormParameters.m_ForgetLayerNormWeights->GetTensorInfo(), dataType); + forgetLayerNormWeights = &optForgetLayerNormWeights; + + optCellLayerNormWeights = OverrideDataType( + cLayer->m_LayerNormParameters.m_CellLayerNormWeights->GetTensorInfo(), dataType); + cellLayerNormWeights = &optCellLayerNormWeights; + + optOutputLayerNormWeights = OverrideDataType( + cLayer->m_LayerNormParameters.m_OutputLayerNormWeights->GetTensorInfo(), dataType); + outputLayerNormWeights = &optOutputLayerNormWeights; + } + result = layerSupportObject->IsLstmSupported( input, outputStateIn, @@ -475,7 +502,11 @@ bool IWorkloadFactory::IsLayerSupported(const BackendId& backendId, projectionBias, cellToForgetWeights, cellToOutputWeights, - reason); + reason, + inputLayerNormWeights, + forgetLayerNormWeights, + cellLayerNormWeights, + outputLayerNormWeights); break; } case LayerType::Maximum: diff --git a/src/backends/backendsCommon/test/LayerTests.cpp b/src/backends/backendsCommon/test/LayerTests.cpp index ca39438fbf..56c0ab6b12 100644 --- a/src/backends/backendsCommon/test/LayerTests.cpp +++ b/src/backends/backendsCommon/test/LayerTests.cpp @@ -1665,6 +1665,153 @@ LayerTestResult<int16_t, 3> CopyViaSplitterInt16Test( return CopyViaSplitterTestImpl<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager, 1.0f, 0); } +void LstmUtilsZeroVectorTest() +{ + armnn::TensorInfo inputDesc({4}, armnn::DataType::Float32); + boost::multi_array<float, 1> input = MakeTensor<float, 1>(inputDesc, std::vector<float>( + {2., 3., 3., 4.})); + + boost::multi_array<float, 1> expectedOutput = MakeTensor<float, 1>(inputDesc, std::vector<float>( + {0., 0., 0., 0.})); + + return LstmUtilsZeroVectorTestImpl<armnn::DataType::Float32>(input, 4, expectedOutput); +} + +void LstmUtilsMeanStddevNormalizationNoneZeroInputTest() +{ + uint32_t batchSize = 2; + uint32_t vecSize = 4; + armnn::TensorInfo inputDesc({batchSize, vecSize}, armnn::DataType::Float32); + boost::multi_array<float, 2> input = MakeTensor<float, 2>(inputDesc, std::vector<float>( + { 0.1f, 0.2f, 0.3f, 0.4f, //batch 0 + 0.9f, 1.0f, 1.1f, 1.2f })); //batch 1 + + boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(inputDesc, std::vector<float>( + { -1.34164071f, -0.447213531f, 0.44721365f, 1.34164071f, //batch 0 + -1.34163153f, -0.447210163f, 0.447211236f, 1.3416326f })); //batch 1 + + return LstmUtilsMeanStddevNormalizationTestImpl<armnn::DataType::Float32>(input, + vecSize, batchSize, expectedOutput); +} + +void LstmUtilsMeanStddevNormalizationAllZeroInputTest() +{ + uint32_t batchSize = 2; + uint32_t vecSize = 4; + armnn::TensorInfo inputDesc({batchSize, vecSize}, armnn::DataType::Float32); + boost::multi_array<float, 2> input = MakeTensor<float, 2>(inputDesc, std::vector<float>( + { 0.0f, 0.0f, 0.0f, 0.0f, //batch 0 + 0.0f, 0.0f, 0.0f, 0.0f })); //batch 1 + + boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(inputDesc, std::vector<float>( + { 0.0f, 0.0f, 0.0f, 0.0f, //batch 0 + 0.0f, 0.0f, 0.0f, 0.0f })); //batch 1 + + return LstmUtilsMeanStddevNormalizationTestImpl<armnn::DataType::Float32>(input, + vecSize, batchSize, expectedOutput); +} + +void LstmUtilsMeanStddevNormalizationMixedZeroInputTest() +{ + uint32_t batchSize = 2; + uint32_t vecSize = 4; + armnn::TensorInfo inputDesc({batchSize, vecSize}, armnn::DataType::Float32); + boost::multi_array<float, 2> input = MakeTensor<float, 2>(inputDesc, std::vector<float>( + { 0.0f, 0.0f, 0.0f, 0.0f, //batch 0 + 0.1f, 0.2f, 0.3f, 0.4f })); //batch 1 + + boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(inputDesc, std::vector<float>( + { 0.0f, 0.0f, 0.0f, 0.0f, //batch 0 + -1.34164071f, -0.447213531f, 0.44721365f, 1.34164071f })); //batch 1 + + return LstmUtilsMeanStddevNormalizationTestImpl<armnn::DataType::Float32>(input, + vecSize, batchSize, expectedOutput); +} + + +void LstmUtilsVectorBatchVectorCwiseProductTest() +{ + uint32_t batchSize = 4; + uint32_t vecSize = 29; + armnn::TensorInfo vecDesc({vecSize}, armnn::DataType::Float32); + boost::multi_array<float, 1> vector = MakeTensor<float, 1>(vecDesc, std::vector<float>( + { 1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f, 9.9f, 10.1f, + 11.11f, 12.12f, 13.13f, 14.14f, 15.15f, 16.16f, 17.17f, 18.18f, 19.19f, 20.2f, + 21.21f, 22.22f, 23.23f, 24.24f, 25.25f, 26.26f, 27.27f, 28.28f, 0.0f})); + + armnn::TensorInfo batchVecDesc({batchSize, vecSize}, armnn::DataType::Float32); + boost::multi_array<float, 2> batchVector = MakeTensor<float, 2>(batchVecDesc, std::vector<float>( + { /* batch 0 */ + 1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f, 9.9f, 10.1f, + 11.11f, 12.12f, 13.13f, 14.14f, 15.15f, 16.16f, 17.17f, 18.18f, 19.19f, 20.2f, + 21.21f, 22.22f, 23.23f, 24.24f, 25.25f, 26.26f, 27.27f, 28.28f, 0.0f, + /* batch 1 */ + -1.1f, -2.2f, -3.3f, -4.4f, -5.5f, -6.6f, -7.7f, -8.8f, -9.9f, -10.1f, + -11.11f, -12.12f, -13.13f, -14.14f, -15.15f, -16.16f, -17.17f, -18.18f, -19.19f, -20.2f, + -21.21f, -22.22f, -23.23f, -24.24f, -25.25f, -26.26f, -27.27f, -28.28f, 0.0f, + /* batch 2 */ + 1.1f, -2.2f, 3.3f, -4.4f, 5.5f, -6.6f, 7.7f, -8.8f, 9.9f, -10.1f, + 11.11f, -12.12f, 13.13f, -14.14f, 15.15f, -16.16f, 17.17f, -18.18f, 19.19f, -20.2f, + 21.21f, -22.22f, 23.23f, -24.24f, 25.25f, -26.26f, 27.27f, -28.28f, 0.0f, + /* batch 3 */ + -1.1f, 2.2f, -3.3f, 4.4f, -5.5f, 6.6f, -7.7f, 8.8f, -9.9f, 10.1f, + -11.11f, 12.12f, -13.13f, 14.14f, -15.15f, 16.16f, -17.17f, 18.18f, -19.19f, 20.2f, + -21.21f, 22.22f, -23.23f, 24.24f, -25.25f, 26.26f, -27.27f, 28.28f, 0.0f})); + + // Expect output = input * output + output. + boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(batchVecDesc, std::vector<float>( + { /* batch 0 */ + 1.210000f, 4.840000f, 10.889999f, 19.360001f, 30.250000f, 43.559998f, + 59.289997f, 77.440002f, 98.009995f, 102.010010f, 123.432091f, 146.894394f, + 172.396896f, 199.939606f, 229.522491f, 261.145599f, 294.808899f, 330.512421f, + 368.256134f, 408.040039f, 449.864075f, 493.728363f, 539.632874f, 587.577576f, + 637.562500f, 689.587585f, 743.652954f, 799.758423f, 0.000000f, + /* batch 1 */ + -1.210000f, -4.840000f, -10.889999f, -19.360001f, -30.250000f, -43.559998f, + -59.289997f, -77.440002f, -98.009995f, -102.010010f, -123.432091f, -146.894394f, + -172.396896f, -199.939606f, -229.522491f, -261.145599f, -294.808899f, -330.512421f, + -368.256134f, -408.040039f, -449.864075f, -493.728363f, -539.632874f, -587.577576f, + -637.562500f, -689.587585f, -743.652954f, -799.758423f, 0.000000f, + /* batch 2 */ + 1.210000f, -4.840000f, 10.889999f, -19.360001f, 30.250000f, -43.559998f, + 59.289997f, -77.440002f, 98.009995f, -102.010010f, 123.432091f, -146.894394f, + 172.396896f, -199.939606f, 229.522491f, -261.145599f, 294.808899f, -330.512421f, + 368.256134f, -408.040039f, 449.864075f, -493.728363f, 539.632874f, -587.577576f, + 637.562500f, -689.587585f, 743.652954f, -799.758423f, 0.000000f, + /* batch 3 */ + -1.210000f, 4.840000f, -10.889999f, 19.360001f, -30.250000f, 43.559998f, + -59.289997f, 77.440002f, -98.009995f, 102.010010f, -123.432091f, 146.894394f, + -172.396896f, 199.939606f, -229.522491f, 261.145599f, -294.808899f, 330.512421f, + -368.256134f, 408.040039f, -449.864075f, 493.728363f, -539.632874f, 587.577576f, + -637.562500f, 689.587585f, -743.652954f, 799.758423f, 0.000000f})); + + return LstmUtilsVectorBatchVectorCwiseProductTestImpl<armnn::DataType::Float32>(vector, batchVector, + vecSize, batchSize, expectedOutput); +} + + +void LstmUtilsVectorBatchVectorAddTest() +{ + uint32_t batchSize = 2; + uint32_t vecSize = 3; + armnn::TensorInfo vecDesc({vecSize}, armnn::DataType::Float32); + boost::multi_array<float, 1> vector = MakeTensor<float, 1>(vecDesc, std::vector<float>( + { 0.0f, -0.5f, 1.0f})); + + armnn::TensorInfo batchVecDesc({batchSize, vecSize}, armnn::DataType::Float32); + boost::multi_array<float, 2> batchVector = MakeTensor<float, 2>(batchVecDesc, std::vector<float>( + { 1.0f, 2.0f, 3.0f, //batch 0 + 4.0f, 5.0f, 6.0f})); //batch 1 + + boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(batchVecDesc, std::vector<float>( + { 1.0f, 1.5f, 4.0f, + 4.0f, 4.5f, 7.0f})); + + return LstmUtilsVectorBatchVectorAddTestImpl<armnn::DataType::Float32>(vector, batchVector, + vecSize, batchSize, expectedOutput); +} + + LayerTestResult<float, 2> LstmLayerFloat32WithCifgWithPeepholeNoProjectionTest( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager) @@ -1721,6 +1868,25 @@ LayerTestResult<float, 2> LstmLayerFloat32NoCifgNoPeepholeNoProjectionTest( workloadFactory, memoryManager, input, expectedOutput); } + +LayerTestResult<float, 2> LstmLayerFloat32NoCifgWithPeepholeWithProjectionWithLayerNormTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager) +{ + armnn::TensorInfo inputDesc({ 2, 5 }, armnn::DataType::Float32); + boost::multi_array<float, 2> input = MakeTensor<float, 2>(inputDesc, std::vector<float>( + {0.7f, 0.8f, 0.1f, 0.2f, 0.3f, //batch 0 + 0.3f, 0.2f, 0.9f, 0.8f, 0.1f})); //batch 1 + + armnn::TensorInfo outputDesc({ 2, 3 }, armnn::DataType::Float32); + boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(outputDesc, std::vector<float>( + { 0.0244077f, 0.128027f, -0.00170918f, //batch 0 + -0.00692428f, 0.0848741f, 0.063445f})); //batch 1 + return LstmLayerNoCifgWithPeepholeWithProjectionWithLayerNormTestImpl<armnn::DataType::Float32>( + workloadFactory, memoryManager, input, expectedOutput); +} + + LayerTestResult<int16_t, 2> LstmLayerInt16NoCifgNoPeepholeNoProjectionTest( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager) diff --git a/src/backends/backendsCommon/test/LayerTests.hpp b/src/backends/backendsCommon/test/LayerTests.hpp index 405ccff35b..66324e104c 100644 --- a/src/backends/backendsCommon/test/LayerTests.hpp +++ b/src/backends/backendsCommon/test/LayerTests.hpp @@ -1458,6 +1458,13 @@ LayerTestResult<float, 4> PermuteFloat32ValueSet3Test( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); +void LstmUtilsZeroVectorTest(); +void LstmUtilsMeanStddevNormalizationNoneZeroInputTest(); +void LstmUtilsMeanStddevNormalizationAllZeroInputTest(); +void LstmUtilsMeanStddevNormalizationMixedZeroInputTest(); +void LstmUtilsVectorBatchVectorCwiseProductTest(); +void LstmUtilsVectorBatchVectorAddTest(); + LayerTestResult<float, 2> LstmLayerFloat32WithCifgWithPeepholeNoProjectionTest( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); @@ -1470,6 +1477,10 @@ LayerTestResult<float, 2> LstmLayerFloat32NoCifgWithPeepholeWithProjectionTest( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); +LayerTestResult<float, 2> LstmLayerFloat32NoCifgWithPeepholeWithProjectionWithLayerNormTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); + LayerTestResult<int16_t, 2> LstmLayerInt16NoCifgNoPeepholeNoProjectionTest( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); diff --git a/src/backends/backendsCommon/test/LstmTestImpl.hpp b/src/backends/backendsCommon/test/LstmTestImpl.hpp index dae9c8a3f1..2ed0a974fc 100644 --- a/src/backends/backendsCommon/test/LstmTestImpl.hpp +++ b/src/backends/backendsCommon/test/LstmTestImpl.hpp @@ -16,6 +16,119 @@ #include <backendsCommon/CpuTensorHandle.hpp> #include <backendsCommon/WorkloadFactory.hpp> +#include "reference/workloads/LstmUtils.hpp" + +//LstmUtils Tests +// TODO: Add tests for the remaining functions in LstmUtils.hpp + +template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> +void LstmUtilsVectorBatchVectorAddTestImpl( + boost::multi_array<float, 1>& vec, + boost::multi_array<float, 2>& batchVec, + uint32_t vSize, + uint32_t nBatch, + boost::multi_array<float, 2>& expectedOutput ) +{ + float qScale = 0.0f; + int32_t qOffset = 0; + armnn::TensorInfo tensorInfo({nBatch, vSize}, ArmnnType, qScale, qOffset ); + + // Make encoder and decoder + std::unique_ptr<armnn::Decoder<float>> vecDecoder = armnn::MakeDecoder<float>(tensorInfo, vec.data()); + std::unique_ptr<armnn::Decoder<float>> batchVecDecoder = armnn::MakeDecoder<float>(tensorInfo, batchVec.data()); + std::unique_ptr<armnn::Encoder<float>> batchVecEncoder = armnn::MakeEncoder<float>(tensorInfo, batchVec.data()); + + VectorBatchVectorAdd(*vecDecoder, vSize, *batchVecDecoder, nBatch, *batchVecEncoder); + + // check shape and compare values + BOOST_TEST(CompareTensors(batchVec, expectedOutput)); + + // check if iterator is back at start position + batchVecEncoder->Set(1.0f); + BOOST_TEST(batchVec[0][0] == 1.0f); +} + +template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> +void LstmUtilsZeroVectorTestImpl( + boost::multi_array<float, 1>& input, + uint32_t vSize, + boost::multi_array<float, 1>& expectedOutput) { + + float qScale = 0.0f; + int32_t qOffset = 0; + + armnn::TensorInfo tensorInfo({vSize}, ArmnnType, qScale, qOffset ); + + // Make encoder for input + std::unique_ptr<armnn::Encoder<float>> outputEncoder = armnn::MakeEncoder<float>(tensorInfo, input.data()); + + // call ZeroVector + ZeroVector(*outputEncoder, vSize); + + // check shape and compare values + BOOST_TEST(CompareTensors(input, expectedOutput)); + + // check if iterator is back at start position + outputEncoder->Set(1.0f); + BOOST_TEST(input[0] == 1.0f); + +} + + +template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> +void LstmUtilsMeanStddevNormalizationTestImpl( + boost::multi_array<float, 2>& input, + uint32_t vSize, + uint32_t nBatch, + boost::multi_array<float, 2>& expectedOutput) +{ + float qScale = 0.0f; + int32_t qOffset = 0; + armnn::TensorInfo tensorInfo({nBatch, vSize}, ArmnnType, qScale, qOffset ); + + // Make encoder and decoder for input + std::unique_ptr<armnn::Decoder<float>> inputDecoder = armnn::MakeDecoder<float>(tensorInfo, input.data()); + std::unique_ptr<armnn::Encoder<float>> outputEncoder = armnn::MakeEncoder<float>(tensorInfo, input.data()); + + MeanStddevNormalization(*inputDecoder, *outputEncoder, vSize, nBatch, 1e-8f); + + // check shape and compare values + BOOST_TEST(CompareTensors(input, expectedOutput)); + + // check if iterator is back at start position + outputEncoder->Set(1.0f); + BOOST_TEST(input[0][0] == 1.0f); +} + +template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> +void LstmUtilsVectorBatchVectorCwiseProductTestImpl( + boost::multi_array<float, 1>& vec, + boost::multi_array<float, 2>& batchVec, + uint32_t vSize, + uint32_t nBatch, + boost::multi_array<float, 2>& expectedOutput) +{ + float qScale = 0.0f; + int32_t qOffset = 0; + armnn::TensorInfo tensorInfo({nBatch, vSize}, ArmnnType, qScale, qOffset ); + + // Make encoder and decoder + std::unique_ptr<armnn::Decoder<float>> vecDecoder = armnn::MakeDecoder<float>(tensorInfo, vec.data()); + std::unique_ptr<armnn::Decoder<float>> batchVecDecoder = armnn::MakeDecoder<float>(tensorInfo, batchVec.data()); + std::unique_ptr<armnn::Encoder<float>> batchVecEncoder = armnn::MakeEncoder<float>(tensorInfo, batchVec.data()); + + VectorBatchVectorCwiseProduct(*vecDecoder, vSize, *batchVecDecoder, nBatch, *batchVecEncoder); + + // check shape and compare values + BOOST_TEST(CompareTensors(batchVec, expectedOutput)); + + // check if iterator is back at start position + batchVecEncoder->Set(1.0f); + BOOST_TEST(batchVec[0][0] == 1.0f); +} + +// Lstm Layer tests: + template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> LayerTestResult<T, 2> LstmNoCifgNoPeepholeNoProjectionTestImpl( @@ -187,7 +300,6 @@ LstmNoCifgNoPeepholeNoProjectionTestImpl( data.m_RecurrentToForgetWeights = &recurrentToForgetWeightsTensor; data.m_RecurrentToCellWeights = &recurrentToCellWeightsTensor; data.m_RecurrentToOutputWeights = &recurrentToOutputWeightsTensor; - data.m_CellToInputWeights = &cellToInputWeightsTensor; data.m_InputGateBias = &inputGateBiasTensor; data.m_ForgetGateBias = &forgetGateBiasTensor; data.m_CellBias = &cellBiasTensor; @@ -1157,3 +1269,275 @@ LayerTestResult<T, 2> LstmLayerWithCifgWithPeepholeNoProjectionTestImpl( return ret3; } + + +template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> +LayerTestResult<T, 2> +LstmLayerNoCifgWithPeepholeWithProjectionWithLayerNormTestImpl(armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const boost::multi_array<T, 2>& input, + const boost::multi_array<T, 2>& outputExpected, + float qScale = 0.0f, + int32_t qOffset = 0, + armnn::DataType constantDataType = armnn::DataType::Float32) +{ + unsigned int batchSize = 2; + unsigned int outputSize = 3; + unsigned int inputSize = 5; + unsigned numUnits = 4; + + armnn::TensorInfo inputTensorInfo({batchSize , inputSize}, ArmnnType, qScale, qOffset); + armnn::TensorInfo cellStateInTensorInfo({batchSize , numUnits}, ArmnnType, qScale, qOffset); + armnn::TensorInfo outputStateInTensorInfo({batchSize , outputSize}, ArmnnType, qScale, qOffset); + + // Scratch buffer size without CIFG [batchSize, numUnits * 4] + armnn::TensorInfo scratchBufferTensorInfo({batchSize, numUnits * 4}, ArmnnType, qScale, qOffset); + armnn::TensorInfo cellStateOutTensorInfo({batchSize, numUnits}, ArmnnType, qScale, qOffset); + armnn::TensorInfo outputStateOutTensorInfo({batchSize, outputSize}, ArmnnType, qScale, qOffset); + armnn::TensorInfo outputTensorInfo({batchSize, outputSize}, ArmnnType, qScale, qOffset); + + LayerTestResult<T, 2> ret(outputTensorInfo); + + std::vector<float> inputVector; + inputVector.assign(input.data(), input.data() + (batchSize * inputSize)); + auto inputTensor = MakeTensor<float,2>(inputTensorInfo, inputVector); + + std::vector<float> cellStateInVector(batchSize * numUnits, 0.f); + auto cellStateInTensor = MakeTensor<float,2>(cellStateInTensorInfo, cellStateInVector); + + std::vector<float> outputStateInVector(batchSize * outputSize, 0.f); + auto outputStateInTensor = MakeTensor<float,2>(outputStateInTensorInfo, outputStateInVector); + + std::vector<float> scratchBufferVector(batchSize * numUnits * 4, 0.f); + auto scratchBufferTensor = MakeTensor<float,2>(scratchBufferTensorInfo, scratchBufferVector); + + std::vector<float> outputStateOutVector(batchSize * outputSize, 0.f); + auto outputStateOutTensor = MakeTensor<float,2>(outputStateOutTensorInfo, outputStateOutVector); + + std::vector<float> cellStateOutVector(batchSize * numUnits, 0.f); + auto cellStateOutTensor = MakeTensor<float,2>(cellStateOutTensorInfo, cellStateOutVector); + + std::vector<float> outputVector; + outputVector.assign(outputExpected.data(), outputExpected.data() + (batchSize * outputSize)); + ret.outputExpected = MakeTensor<float, 2>(outputTensorInfo, outputVector); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> cellStateInHandle = + workloadFactory.CreateTensorHandle(cellStateInTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputStateInHandle = + workloadFactory.CreateTensorHandle(outputStateInTensorInfo); + + std::unique_ptr<armnn::ITensorHandle> scratchHandle = workloadFactory.CreateTensorHandle(scratchBufferTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputStateOutHandle = + workloadFactory.CreateTensorHandle(outputStateOutTensorInfo); + std::unique_ptr<armnn::ITensorHandle> cellStateOutHandle = + workloadFactory.CreateTensorHandle(cellStateOutTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::LstmQueueDescriptor data; + armnn::WorkloadInfo info; + + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddInputToWorkload(data, info, outputStateInTensorInfo, outputStateInHandle.get()); + AddInputToWorkload(data, info, cellStateInTensorInfo, cellStateInHandle.get()); + + AddOutputToWorkload(data, info, scratchBufferTensorInfo, scratchHandle.get()); + AddOutputToWorkload(data, info, outputStateOutTensorInfo, outputStateOutHandle.get()); + AddOutputToWorkload(data, info, cellStateOutTensorInfo, cellStateOutHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + armnn::TensorInfo tensorInfo3({outputSize}, constantDataType, qScale, qOffset); + armnn::TensorInfo tensorInfo4({numUnits}, constantDataType, qScale, qOffset); + armnn::TensorInfo tensorInfo4x5({numUnits, inputSize}, constantDataType, qScale, qOffset); + armnn::TensorInfo tensorInfo4x3({numUnits, outputSize}, constantDataType, qScale, qOffset); + armnn::TensorInfo tensorInfo3x4({outputSize, numUnits}, constantDataType, qScale, qOffset); + + auto inputToInputWeights = + MakeTensor<float, 2>(tensorInfo4x5, { 0.5f, 0.6f, 0.7f, -0.8f, -0.9f, + 0.1f, 0.2f, 0.3f, -0.4f, 0.5f, + -0.8f, 0.7f, -0.6f, 0.5f, -0.4f, + -0.5f, -0.4f, -0.3f, -0.2f, -0.1f}); //{numUnits, inputSize} + + auto inputToForgetWeights = + MakeTensor<float, 2>(tensorInfo4x5, {-0.6f, -0.1f, 0.3f, 0.2f, 0.9f, + -0.5f, -0.2f, -0.4f, 0.3f, -0.8f, + -0.4f, 0.3f, -0.5f, -0.4f, -0.6f, + 0.3f, -0.4f, -0.6f, -0.5f, -0.5f}); //{numUnits, inputSize} + + auto inputToCellWeights = + MakeTensor<float, 2>(tensorInfo4x5, {-0.4f, -0.3f, -0.2f, -0.1f, -0.5f, + 0.5f, -0.2f, -0.3f, -0.2f, -0.6f, + 0.6f, -0.1f, -0.4f, -0.3f, -0.7f, + 0.7f, -0.9f, -0.5f, 0.8f, 0.6f}); //{numUnits, inputSize} + + auto inputToOutputWeights = + MakeTensor<float, 2>(tensorInfo4x5, {-0.8f, -0.4f, -0.2f, -0.9f, -0.1f, + -0.7f, 0.3f, -0.3f, -0.8f, -0.2f, + 0.6f, -0.2f, 0.4f, -0.7f, -0.3f, + -0.5f, 0.1f, 0.5f, -0.6f, -0.4f}); //{numUnits, inputSize} + + auto inputGateBias = + MakeTensor<float, 1>(tensorInfo4, {0.03f, 0.15f, 0.22f, 0.38f}); //{numUnits} + + auto forgetGateBias = + MakeTensor<float, 1>(tensorInfo4, {0.1f, -0.3f, -0.2f, 0.1f}); //{numUnits} + + auto cellBias = + MakeTensor<float, 1>(tensorInfo4, {-0.05f, 0.72f, 0.25f, 0.08f}); //{numUnits} + + auto outputGateBias = + MakeTensor<float, 1>(tensorInfo4, {0.05f, -0.01f, 0.2f, 0.1f}); //{numUnits} + + auto recurrentToInputWeights = + MakeTensor<float, 2>(tensorInfo4x3, {-0.2f, -0.3f, 0.4f, + 0.1f, -0.5f, 0.9f, + -0.2f, -0.3f, -0.7f, + 0.05f, -0.2f, -0.6f}); //{numUnits, outputSize} + + auto recurrentToCellWeights = + MakeTensor<float, 2>(tensorInfo4x3, {-0.3f, 0.2f, 0.1f, + -0.3f, 0.8f, -0.08f, + -0.2f, 0.3f, 0.8f, + -0.6f, -0.1f, 0.2f}); //{numUnits, outputSize} + + auto recurrentToForgetWeights = + MakeTensor<float, 2>(tensorInfo4x3, {-0.5f, -0.3f, -0.5f, + -0.2f, 0.6f, 0.4f, + 0.9f, 0.3f, -0.1f, + 0.2f, 0.5f, 0.2f}); //{numUnits, outputSize} + + auto recurrentToOutputWeights = + MakeTensor<float, 2>(tensorInfo4x3, { 0.3f, -0.1f, 0.1f, + -0.2f, -0.5f, -0.7f, + -0.2f, -0.6f, -0.1f, + -0.4f, -0.7f, -0.2f}); //{numUnits, outputSize} + + auto cellToInputWeights = + MakeTensor<float, 1>(tensorInfo4, {0.05f, 0.1f, 0.25f, 0.15f}); //{numUnits} + + auto cellToForgetWeights = + MakeTensor<float, 1>(tensorInfo4, {-0.02f, -0.15f, -0.25f, -0.03f}); //{numUnits} + + auto cellToOutputWeights = + MakeTensor<float, 1>(tensorInfo4, {0.1f, -0.1f, -0.5f, 0.05f}); //{numUnits} + + auto projectionWeights = + MakeTensor<float, 2>(tensorInfo3x4, + {-0.1f, 0.2f, 0.01f, -0.2f, + 0.1f, 0.5f, 0.3f, 0.08f, + 0.07f, 0.2f, -0.4f, 0.2f}); //{outputSize, numUnits} + + std::vector<float> projectionBiasVector(outputSize, 0.f); + auto projectionBias = MakeTensor<float,1>(tensorInfo3, projectionBiasVector); //{outputSize} + + auto inputLayerNormWeights = + MakeTensor<float, 1>(tensorInfo4, {0.1f, 0.2f, 0.3f, 0.5f}); //{numUnits} + + auto forgetLayerNormWeights = + MakeTensor<float, 1>(tensorInfo4, {0.2f, 0.2f, 0.4f, 0.3f}); //{numUnits} + + auto cellLayerNormWeights = + MakeTensor<float, 1>(tensorInfo4, {0.7f, 0.2f, 0.3f, 0.8f}); //{numUnits} + + auto outputLayerNormWeights = + MakeTensor<float, 1>(tensorInfo4, {0.6f, 0.2f, 0.2f, 0.5f}); //{numUnits} + + + armnn::ScopedCpuTensorHandle inputToInputWeightsTensor(tensorInfo4x5); + armnn::ScopedCpuTensorHandle inputToForgetWeightsTensor(tensorInfo4x5); + armnn::ScopedCpuTensorHandle inputToCellWeightsTensor(tensorInfo4x5); + armnn::ScopedCpuTensorHandle inputToOutputWeightsTensor(tensorInfo4x5); + armnn::ScopedCpuTensorHandle recurrentToForgetWeightsTensor(tensorInfo4x3); + armnn::ScopedCpuTensorHandle recurrentToInputWeightsTensor(tensorInfo4x3); + armnn::ScopedCpuTensorHandle recurrentToCellWeightsTensor(tensorInfo4x3); + armnn::ScopedCpuTensorHandle recurrentToOutputWeightsTensor(tensorInfo4x3); + armnn::ScopedCpuTensorHandle cellToInputWeightsTensor(tensorInfo4); + armnn::ScopedCpuTensorHandle inputGateBiasTensor(tensorInfo4); + armnn::ScopedCpuTensorHandle forgetGateBiasTensor(tensorInfo4); + armnn::ScopedCpuTensorHandle cellBiasTensor(tensorInfo4); + armnn::ScopedCpuTensorHandle outputGateBiasTensor(tensorInfo4); + armnn::ScopedCpuTensorHandle cellToForgetWeightsTensor(tensorInfo4); + armnn::ScopedCpuTensorHandle cellToOutputWeightsTensor(tensorInfo4); + armnn::ScopedCpuTensorHandle projectionWeightsTensor(tensorInfo3x4); + armnn::ScopedCpuTensorHandle projectionBiasTensor(tensorInfo3); + + armnn::ScopedCpuTensorHandle inputLayerNormWeightsTensor(tensorInfo4); + armnn::ScopedCpuTensorHandle forgetLayerNormWeightsTensor(tensorInfo4); + armnn::ScopedCpuTensorHandle cellLayerNormWeightsTensor(tensorInfo4); + armnn::ScopedCpuTensorHandle outputLayerNormWeightsTensor(tensorInfo4); + + AllocateAndCopyDataToITensorHandle(&inputToInputWeightsTensor, &inputToInputWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&inputToForgetWeightsTensor, &inputToForgetWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&inputToCellWeightsTensor, &inputToCellWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&inputToOutputWeightsTensor, &inputToOutputWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&recurrentToInputWeightsTensor, &recurrentToInputWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&recurrentToForgetWeightsTensor, &recurrentToForgetWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&recurrentToCellWeightsTensor, &recurrentToCellWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&recurrentToOutputWeightsTensor, &recurrentToOutputWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&cellToInputWeightsTensor, &cellToInputWeights[0]); + AllocateAndCopyDataToITensorHandle(&inputGateBiasTensor, &inputGateBias[0]); + AllocateAndCopyDataToITensorHandle(&forgetGateBiasTensor, &forgetGateBias[0]); + AllocateAndCopyDataToITensorHandle(&cellBiasTensor, &cellBias[0]); + AllocateAndCopyDataToITensorHandle(&outputGateBiasTensor, &outputGateBias[0]); + AllocateAndCopyDataToITensorHandle(&cellToForgetWeightsTensor, &cellToForgetWeights[0]); + AllocateAndCopyDataToITensorHandle(&cellToOutputWeightsTensor, &cellToOutputWeights[0]); + AllocateAndCopyDataToITensorHandle(&projectionWeightsTensor, &projectionWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&projectionBiasTensor, &projectionBias[0]); + + AllocateAndCopyDataToITensorHandle(&inputLayerNormWeightsTensor, &inputLayerNormWeights[0]); + AllocateAndCopyDataToITensorHandle(&forgetLayerNormWeightsTensor, &forgetLayerNormWeights[0]); + AllocateAndCopyDataToITensorHandle(&cellLayerNormWeightsTensor, &cellLayerNormWeights[0]); + AllocateAndCopyDataToITensorHandle(&outputLayerNormWeightsTensor, &outputLayerNormWeights[0]); + + data.m_InputToInputWeights = &inputToInputWeightsTensor; + data.m_InputToForgetWeights = &inputToForgetWeightsTensor; + data.m_InputToCellWeights = &inputToCellWeightsTensor; + data.m_InputToOutputWeights = &inputToOutputWeightsTensor; + data.m_RecurrentToInputWeights = &recurrentToInputWeightsTensor; + data.m_RecurrentToForgetWeights = &recurrentToForgetWeightsTensor; + data.m_RecurrentToCellWeights = &recurrentToCellWeightsTensor; + data.m_RecurrentToOutputWeights = &recurrentToOutputWeightsTensor; + data.m_CellToInputWeights = &cellToInputWeightsTensor; + data.m_InputGateBias = &inputGateBiasTensor; + data.m_ForgetGateBias = &forgetGateBiasTensor; + data.m_CellBias = &cellBiasTensor; + data.m_OutputGateBias = &outputGateBiasTensor; + data.m_CellToForgetWeights = &cellToForgetWeightsTensor; + data.m_CellToOutputWeights = &cellToOutputWeightsTensor; + data.m_ProjectionWeights = &projectionWeightsTensor; + data.m_ProjectionBias = &projectionBiasTensor; + + data.m_InputLayerNormWeights = &inputLayerNormWeightsTensor; + data.m_ForgetLayerNormWeights = &forgetLayerNormWeightsTensor; + data.m_CellLayerNormWeights = &cellLayerNormWeightsTensor; + data.m_OutputLayerNormWeights = &outputLayerNormWeightsTensor; + + // Flags to set test configuration + data.m_Parameters.m_ActivationFunc = 4; + data.m_Parameters.m_CifgEnabled = false; + data.m_Parameters.m_PeepholeEnabled = true; + data.m_Parameters.m_ProjectionEnabled = true; + data.m_Parameters.m_LayerNormEnabled = true; + + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateLstm(data, info); + inputHandle->Allocate(); + outputStateInHandle->Allocate(); + cellStateInHandle->Allocate(); + + scratchHandle->Allocate(); + outputStateOutHandle->Allocate(); + cellStateOutHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0]); + CopyDataToITensorHandle(outputStateInHandle.get(), &outputStateInTensor[0][0]); + CopyDataToITensorHandle(cellStateInHandle.get(), &cellStateInTensor[0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0], outputHandle.get()); + + return ret; + +}
\ No newline at end of file diff --git a/src/backends/backendsCommon/test/WorkloadDataValidation.cpp b/src/backends/backendsCommon/test/WorkloadDataValidation.cpp index 7c7af2ddce..c6960986b3 100644 --- a/src/backends/backendsCommon/test/WorkloadDataValidation.cpp +++ b/src/backends/backendsCommon/test/WorkloadDataValidation.cpp @@ -453,22 +453,139 @@ BOOST_AUTO_TEST_CASE(ReshapeQueueDescriptor_Validate_MismatchingNumElements) BOOST_AUTO_TEST_CASE(LstmQueueDescriptor_Validate) { - armnn::TensorInfo inputTensorInfo; - armnn::TensorInfo outputTensorInfo; - - unsigned int inputShape[] = { 1, 2 }; - unsigned int outputShape[] = { 1 }; - - inputTensorInfo = armnn::TensorInfo(2, inputShape, armnn::DataType::Float32); - outputTensorInfo = armnn::TensorInfo(1, outputShape, armnn::DataType::Float32); - - LstmQueueDescriptor invalidData; - WorkloadInfo invalidInfo; - - AddInputToWorkload(invalidData, invalidInfo, inputTensorInfo, nullptr); - AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); - - BOOST_CHECK_THROW(invalidData.Validate(invalidInfo), armnn::InvalidArgumentException); + armnn::DataType dataType = armnn::DataType::Float32; + + float qScale = 0.0f; + int32_t qOffset = 0; + + unsigned int batchSize = 2; + unsigned int outputSize = 3; + unsigned int inputSize = 5; + unsigned numUnits = 4; + + armnn::TensorInfo inputTensorInfo({batchSize , inputSize}, dataType, qScale, qOffset ); + armnn::TensorInfo outputStateInTensorInfo({batchSize , outputSize}, dataType, qScale, qOffset); + armnn::TensorInfo cellStateInTensorInfo({batchSize , numUnits}, dataType, qScale, qOffset); + + // Scratch buffer size with CIFG [batchSize, numUnits * 4] + armnn::TensorInfo scratchBufferTensorInfo({batchSize, numUnits * 4}, dataType, qScale, qOffset); + armnn::TensorInfo cellStateOutTensorInfo({batchSize, numUnits}, dataType, qScale, qOffset); + armnn::TensorInfo outputStateOutTensorInfo({batchSize, outputSize}, dataType, qScale, qOffset); + armnn::TensorInfo outputTensorInfo({batchSize, outputSize}, dataType, qScale, qOffset); + + armnn::TensorInfo tensorInfo3({outputSize}, dataType, qScale, qOffset); + armnn::TensorInfo tensorInfo4({numUnits}, dataType, qScale, qOffset); + armnn::TensorInfo tensorInfo4x5({numUnits, inputSize}, dataType, qScale, qOffset); + armnn::TensorInfo tensorInfo4x3({numUnits, outputSize}, dataType, qScale, qOffset); + armnn::TensorInfo tensorInfo3x4({outputSize, numUnits}, dataType, qScale, qOffset); + + LstmQueueDescriptor data; + WorkloadInfo info; + + AddInputToWorkload(data, info, inputTensorInfo, nullptr); + AddInputToWorkload(data, info, outputStateInTensorInfo, nullptr); + AddInputToWorkload(data, info, cellStateInTensorInfo, nullptr); + + AddOutputToWorkload(data, info, scratchBufferTensorInfo, nullptr); + AddOutputToWorkload(data, info, outputStateOutTensorInfo, nullptr); + AddOutputToWorkload(data, info, cellStateOutTensorInfo, nullptr); + // AddOutputToWorkload(data, info, outputTensorInfo, nullptr); is left out + + armnn::ScopedCpuTensorHandle inputToInputWeightsTensor(tensorInfo4x5); + armnn::ScopedCpuTensorHandle inputToForgetWeightsTensor(tensorInfo4x5); + armnn::ScopedCpuTensorHandle inputToCellWeightsTensor(tensorInfo4x5); + armnn::ScopedCpuTensorHandle inputToOutputWeightsTensor(tensorInfo4x5); + armnn::ScopedCpuTensorHandle recurrentToForgetWeightsTensor(tensorInfo4x3); + armnn::ScopedCpuTensorHandle recurrentToInputWeightsTensor(tensorInfo4x3); + armnn::ScopedCpuTensorHandle recurrentToCellWeightsTensor(tensorInfo4x3); + armnn::ScopedCpuTensorHandle recurrentToOutputWeightsTensor(tensorInfo4x3); + armnn::ScopedCpuTensorHandle cellToInputWeightsTensor(tensorInfo4); + armnn::ScopedCpuTensorHandle inputGateBiasTensor(tensorInfo4); + armnn::ScopedCpuTensorHandle forgetGateBiasTensor(tensorInfo4); + armnn::ScopedCpuTensorHandle cellBiasTensor(tensorInfo4); + armnn::ScopedCpuTensorHandle outputGateBiasTensor(tensorInfo4); + armnn::ScopedCpuTensorHandle cellToForgetWeightsTensor(tensorInfo4); + armnn::ScopedCpuTensorHandle cellToOutputWeightsTensor(tensorInfo4); + armnn::ScopedCpuTensorHandle projectionWeightsTensor(tensorInfo3x4); + armnn::ScopedCpuTensorHandle projectionBiasTensor(tensorInfo3); + armnn::ScopedCpuTensorHandle inputLayerNormWeightsTensor(tensorInfo4); + armnn::ScopedCpuTensorHandle forgetLayerNormWeightsTensor(tensorInfo4); + armnn::ScopedCpuTensorHandle cellLayerNormWeightsTensor(tensorInfo4); + armnn::ScopedCpuTensorHandle outputLayerNormWeightsTensor(tensorInfo4); + + data.m_InputToInputWeights = &inputToInputWeightsTensor; + data.m_InputToForgetWeights = &inputToForgetWeightsTensor; + data.m_InputToCellWeights = &inputToCellWeightsTensor; + data.m_InputToOutputWeights = &inputToOutputWeightsTensor; + data.m_RecurrentToInputWeights = &recurrentToInputWeightsTensor; + data.m_RecurrentToForgetWeights = &recurrentToForgetWeightsTensor; + data.m_RecurrentToCellWeights = &recurrentToCellWeightsTensor; + data.m_RecurrentToOutputWeights = &recurrentToOutputWeightsTensor; + data.m_CellToInputWeights = &cellToInputWeightsTensor; + data.m_InputGateBias = &inputGateBiasTensor; + data.m_ForgetGateBias = &forgetGateBiasTensor; + data.m_CellBias = &cellBiasTensor; + data.m_OutputGateBias = &outputGateBiasTensor; + data.m_CellToForgetWeights = &cellToForgetWeightsTensor; + data.m_CellToOutputWeights = &cellToOutputWeightsTensor; + data.m_ProjectionWeights = &projectionWeightsTensor; + data.m_ProjectionBias = &projectionBiasTensor; + + data.m_InputLayerNormWeights = &inputLayerNormWeightsTensor; + data.m_ForgetLayerNormWeights = &forgetLayerNormWeightsTensor; + data.m_CellLayerNormWeights = &cellLayerNormWeightsTensor; + data.m_OutputLayerNormWeights = &outputLayerNormWeightsTensor; + + // Flags to set test configuration + data.m_Parameters.m_ActivationFunc = 4; + data.m_Parameters.m_CifgEnabled = false; + data.m_Parameters.m_PeepholeEnabled = true; + data.m_Parameters.m_ProjectionEnabled = true; + data.m_Parameters.m_LayerNormEnabled = true; + + // check wrong number of outputs + BOOST_CHECK_THROW(data.Validate(info), armnn::InvalidArgumentException); + AddOutputToWorkload(data, info, outputTensorInfo, nullptr); + + // check wrong cifg parameter configuration + data.m_Parameters.m_CifgEnabled = true; + armnn::TensorInfo scratchBufferTensorInfo2({batchSize, numUnits * 3}, dataType, qScale, qOffset); + SetWorkloadOutput(data, info, 0, scratchBufferTensorInfo2, nullptr); + BOOST_CHECK_THROW(data.Validate(info), armnn::InvalidArgumentException); + data.m_Parameters.m_CifgEnabled = false; + SetWorkloadOutput(data, info, 0, scratchBufferTensorInfo, nullptr); + + // check wrong inputGateBias configuration + data.m_InputGateBias = nullptr; + BOOST_CHECK_THROW(data.Validate(info), armnn::InvalidArgumentException); + data.m_InputGateBias = &inputGateBiasTensor; + + // check inconsistant projection parameters + data.m_Parameters.m_ProjectionEnabled = false; + BOOST_CHECK_THROW(data.Validate(info), armnn::InvalidArgumentException); + data.m_Parameters.m_ProjectionEnabled = true; + data.m_ProjectionWeights = nullptr; + BOOST_CHECK_THROW(data.Validate(info), armnn::InvalidArgumentException); + data.m_ProjectionWeights = &projectionWeightsTensor; + + // check missing input layer normalisation weights + data.m_InputLayerNormWeights = nullptr; + BOOST_CHECK_THROW(data.Validate(info), armnn::InvalidArgumentException); + data.m_InputLayerNormWeights = &inputLayerNormWeightsTensor; + + // layer norm disabled but normalisation weights are present + data.m_Parameters.m_LayerNormEnabled = false; + BOOST_CHECK_THROW(data.Validate(info), armnn::InvalidArgumentException); + data.m_Parameters.m_LayerNormEnabled = true; + + // check invalid outputTensor shape + armnn::TensorInfo incorrectOutputTensorInfo({batchSize, outputSize + 1}, dataType, qScale, qOffset); + SetWorkloadOutput(data, info, 3, incorrectOutputTensorInfo, nullptr); + BOOST_CHECK_THROW(data.Validate(info), armnn::InvalidArgumentException); + SetWorkloadOutput(data, info, 3, outputTensorInfo, nullptr); + + // check correct configuration + BOOST_CHECK_NO_THROW(data.Validate(info)); } BOOST_AUTO_TEST_SUITE_END() |