aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan Eilers <jan.eilers@arm.com>2019-06-26 13:10:09 +0100
committerJan Eilers <jan.eilers@arm.com>2019-07-02 09:59:37 +0000
commit38e05bd2836b1b65b440330a9c283038ba4192c3 (patch)
treec232f71ce6a101c70ed65e046678f7b22593dbe4
parentd0c0cc3e27f1ada9df167d3b9ff248be432d16e1 (diff)
downloadarmnn-38e05bd2836b1b65b440330a9c283038ba4192c3.tar.gz
IVGCVSW-3236 Extend Ref LSTM with layer normalization support
* Add descriptor values * Update lstm queue descriptor validate function * Update lstm workload * Update isLstmSupported (Cl and Ref), LayerSupportBase, ILayerSupport * Update lstm layer * Add unit tests Signed-off-by: Jan Eilers <jan.eilers@arm.com> Change-Id: I932175d550facfb342325051eaa7bd2084ebdc18 Signed-off-by: Jan Eilers <jan.eilers@arm.com>
-rw-r--r--include/armnn/Descriptors.hpp3
-rw-r--r--include/armnn/ILayerSupport.hpp6
-rw-r--r--include/armnn/LstmParams.hpp8
-rw-r--r--src/armnn/layers/LstmLayer.cpp81
-rw-r--r--src/armnn/layers/LstmLayer.hpp13
-rw-r--r--src/backends/backendsCommon/LayerSupportBase.cpp6
-rw-r--r--src/backends/backendsCommon/LayerSupportBase.hpp6
-rw-r--r--src/backends/backendsCommon/WorkloadData.cpp271
-rw-r--r--src/backends/backendsCommon/WorkloadData.hpp8
-rw-r--r--src/backends/backendsCommon/WorkloadFactory.cpp33
-rw-r--r--src/backends/backendsCommon/test/LayerTests.cpp166
-rw-r--r--src/backends/backendsCommon/test/LayerTests.hpp11
-rw-r--r--src/backends/backendsCommon/test/LstmTestImpl.hpp386
-rw-r--r--src/backends/backendsCommon/test/WorkloadDataValidation.cpp149
-rw-r--r--src/backends/cl/ClLayerSupport.cpp6
-rw-r--r--src/backends/cl/ClLayerSupport.hpp6
-rw-r--r--src/backends/reference/RefLayerSupport.cpp10
-rw-r--r--src/backends/reference/RefLayerSupport.hpp6
-rw-r--r--src/backends/reference/backend.mk1
-rw-r--r--src/backends/reference/test/RefLayerTests.cpp14
-rw-r--r--src/backends/reference/workloads/CMakeLists.txt1
-rw-r--r--src/backends/reference/workloads/LstmUtils.cpp307
-rw-r--r--src/backends/reference/workloads/LstmUtils.hpp204
-rw-r--r--src/backends/reference/workloads/RefLstmWorkload.cpp100
-rw-r--r--src/backends/reference/workloads/RefLstmWorkload.hpp6
25 files changed, 1597 insertions, 211 deletions
diff --git a/include/armnn/Descriptors.hpp b/include/armnn/Descriptors.hpp
index 85e8b56fed..9175239aa8 100644
--- a/include/armnn/Descriptors.hpp
+++ b/include/armnn/Descriptors.hpp
@@ -589,6 +589,7 @@ struct LstmDescriptor
, m_CifgEnabled(true)
, m_PeepholeEnabled(false)
, m_ProjectionEnabled(false)
+ , m_LayerNormEnabled(false)
{}
/// @brief The activation function to use.
@@ -604,6 +605,8 @@ struct LstmDescriptor
bool m_PeepholeEnabled;
/// Enable/disable the projection layer.
bool m_ProjectionEnabled;
+ /// Enable/disable layer normalization
+ bool m_LayerNormEnabled;
};
/// A MeanDescriptor for the MeanLayer.
diff --git a/include/armnn/ILayerSupport.hpp b/include/armnn/ILayerSupport.hpp
index bf0ac90c59..635b9cc663 100644
--- a/include/armnn/ILayerSupport.hpp
+++ b/include/armnn/ILayerSupport.hpp
@@ -170,7 +170,11 @@ public:
const TensorInfo* projectionBias,
const TensorInfo* cellToForgetWeights,
const TensorInfo* cellToOutputWeights,
- Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const = 0;
+ Optional<std::string&> reasonIfUnsupported = EmptyOptional(),
+ const TensorInfo* inputLayerNormWeights = nullptr,
+ const TensorInfo* forgetLayerNormWeights = nullptr,
+ const TensorInfo* cellLayerNormWeights = nullptr,
+ const TensorInfo* outputLayerNormWeights = nullptr) const = 0;
virtual bool IsMaximumSupported(const TensorInfo& input0,
const TensorInfo& input1,
diff --git a/include/armnn/LstmParams.hpp b/include/armnn/LstmParams.hpp
index c4f38f0067..a7c57c78b2 100644
--- a/include/armnn/LstmParams.hpp
+++ b/include/armnn/LstmParams.hpp
@@ -29,6 +29,10 @@ struct LstmInputParams
, m_OutputGateBias(nullptr)
, m_ProjectionWeights(nullptr)
, m_ProjectionBias(nullptr)
+ , m_InputLayerNormWeights(nullptr)
+ , m_ForgetLayerNormWeights(nullptr)
+ , m_CellLayerNormWeights(nullptr)
+ , m_OutputLayerNormWeights(nullptr)
{
}
@@ -49,6 +53,10 @@ struct LstmInputParams
const ConstTensor* m_OutputGateBias;
const ConstTensor* m_ProjectionWeights;
const ConstTensor* m_ProjectionBias;
+ const ConstTensor* m_InputLayerNormWeights;
+ const ConstTensor* m_ForgetLayerNormWeights;
+ const ConstTensor* m_CellLayerNormWeights;
+ const ConstTensor* m_OutputLayerNormWeights;
};
} // namespace armnn
diff --git a/src/armnn/layers/LstmLayer.cpp b/src/armnn/layers/LstmLayer.cpp
index 2b99f284e8..4012839dfe 100644
--- a/src/armnn/layers/LstmLayer.cpp
+++ b/src/armnn/layers/LstmLayer.cpp
@@ -55,6 +55,19 @@ std::unique_ptr<IWorkload> LstmLayer::CreateWorkload(const Graph& graph, const I
descriptor.m_CellToForgetWeights = m_PeepholeParameters.m_CellToForgetWeights.get();
descriptor.m_CellToOutputWeights = m_PeepholeParameters.m_CellToOutputWeights.get();
}
+
+ // Layer normalisation parameters
+ if(m_Param.m_LayerNormEnabled)
+ {
+ if (!m_Param.m_CifgEnabled)
+ {
+ descriptor.m_InputLayerNormWeights = m_LayerNormParameters.m_InputLayerNormWeights.get();
+ }
+ descriptor.m_ForgetLayerNormWeights = m_LayerNormParameters.m_ForgetLayerNormWeights.get();
+ descriptor.m_CellLayerNormWeights = m_LayerNormParameters.m_CellLayerNormWeights.get();
+ descriptor.m_OutputLayerNormWeights = m_LayerNormParameters.m_OutputLayerNormWeights.get();
+ }
+
return factory.CreateLstm(descriptor, PrepInfoAndDesc(descriptor, graph));
}
@@ -110,6 +123,18 @@ LstmLayer* LstmLayer::Clone(Graph& graph) const
std::make_unique<ScopedCpuTensorHandle>(*m_PeepholeParameters.m_CellToOutputWeights) : nullptr;
}
+ if (m_Param.m_LayerNormEnabled)
+ {
+ layer->m_LayerNormParameters.m_InputLayerNormWeights = m_LayerNormParameters.m_InputLayerNormWeights ?
+ std::make_unique<ScopedCpuTensorHandle>(*m_LayerNormParameters.m_InputLayerNormWeights) : nullptr;
+ layer->m_LayerNormParameters.m_ForgetLayerNormWeights = m_LayerNormParameters.m_ForgetLayerNormWeights ?
+ std::make_unique<ScopedCpuTensorHandle>(*m_LayerNormParameters.m_ForgetLayerNormWeights) : nullptr;
+ layer->m_LayerNormParameters.m_CellLayerNormWeights = m_LayerNormParameters.m_CellLayerNormWeights ?
+ std::make_unique<ScopedCpuTensorHandle>(*m_LayerNormParameters.m_CellLayerNormWeights) : nullptr;
+ layer->m_LayerNormParameters.m_OutputLayerNormWeights = m_LayerNormParameters.m_OutputLayerNormWeights ?
+ std::make_unique<ScopedCpuTensorHandle>(*m_LayerNormParameters.m_OutputLayerNormWeights) : nullptr;
+ }
+
return std::move(layer);
}
@@ -220,6 +245,21 @@ void LstmLayer::ValidateTensorShapesFromInputs()
"LstmLayer: TensorShape set on OutputSlot[3] does not match the inferred shape.",
GetOutputSlot(3).GetTensorInfo().GetShape(),
inferredShapes[3]);
+
+ if (m_Param.m_LayerNormEnabled)
+ {
+ if(!m_Param.m_CifgEnabled)
+ {
+ BOOST_ASSERT_MSG(m_LayerNormParameters.m_InputLayerNormWeights != nullptr,
+ "LstmLayer: m_LayerNormParameters.m_inputLayerNormWeights should not be null.");
+ }
+ BOOST_ASSERT_MSG(m_LayerNormParameters.m_ForgetLayerNormWeights != nullptr,
+ "LstmLayer: m_LayerNormParameters.m_forgetLayerNormWeights should not be null.");
+ BOOST_ASSERT_MSG(m_LayerNormParameters.m_CellLayerNormWeights != nullptr,
+ "LstmLayer: m_LayerNormParameters.m_cellLayerNormWeights should not be null.");
+ BOOST_ASSERT_MSG(m_LayerNormParameters.m_OutputLayerNormWeights != nullptr,
+ "LstmLayer: m_LayerNormParameters.m_outputLayerNormWeights should not be null.");
+ }
}
Layer::ConstantTensors LstmLayer::GetConstantTensorsByRef()
@@ -246,7 +286,13 @@ Layer::ConstantTensors LstmLayer::GetConstantTensorsByRef()
// Peephole parameters
m_PeepholeParameters.m_CellToForgetWeights,
- m_PeepholeParameters.m_CellToOutputWeights};
+ m_PeepholeParameters.m_CellToOutputWeights,
+
+ // Layer normalisation parameters
+ m_LayerNormParameters.m_InputLayerNormWeights,
+ m_LayerNormParameters.m_ForgetLayerNormWeights,
+ m_LayerNormParameters.m_CellLayerNormWeights,
+ m_LayerNormParameters.m_OutputLayerNormWeights};
}
void LstmLayer::Accept(ILayerVisitor& visitor) const
@@ -392,6 +438,39 @@ void LstmLayer::Accept(ILayerVisitor& visitor) const
projectionBiasTensor = projectionBiasTensorCopy;
inputParams.m_ProjectionBias = &projectionBiasTensor;
}
+ ConstTensor inputLayerNormTensor;
+ if (m_LayerNormParameters.m_InputLayerNormWeights != nullptr)
+ {
+ ConstTensor inputLayerNormTensorCopy(m_LayerNormParameters.m_InputLayerNormWeights->GetTensorInfo(),
+ m_LayerNormParameters.m_InputLayerNormWeights->Map(true));
+ inputLayerNormTensor = inputLayerNormTensorCopy;
+ inputParams.m_InputLayerNormWeights = &inputLayerNormTensor;
+ }
+ ConstTensor forgetLayerNormTensor;
+ if (m_LayerNormParameters.m_ForgetLayerNormWeights != nullptr)
+ {
+ ConstTensor forgetLayerNormTensorCopy(m_LayerNormParameters.m_ForgetLayerNormWeights->GetTensorInfo(),
+ m_LayerNormParameters.m_ForgetLayerNormWeights->Map(true));
+ forgetLayerNormTensor = forgetLayerNormTensorCopy;
+ inputParams.m_ForgetLayerNormWeights = &forgetLayerNormTensor;
+ }
+ ConstTensor cellLayerNormTensor;
+ if (m_LayerNormParameters.m_CellLayerNormWeights != nullptr)
+ {
+ ConstTensor cellLayerNormTensorCopy(m_LayerNormParameters.m_CellLayerNormWeights->GetTensorInfo(),
+ m_LayerNormParameters.m_CellLayerNormWeights->Map(true));
+ cellLayerNormTensor = cellLayerNormTensorCopy;
+ inputParams.m_CellLayerNormWeights = &cellLayerNormTensor;
+ }
+ ConstTensor outputLayerNormTensor;
+ if (m_LayerNormParameters.m_OutputLayerNormWeights != nullptr)
+ {
+ ConstTensor outputLayerNormTensorCopy(m_LayerNormParameters.m_OutputLayerNormWeights->GetTensorInfo(),
+ m_LayerNormParameters.m_OutputLayerNormWeights->Map(true));
+ outputLayerNormTensor = outputLayerNormTensorCopy;
+ inputParams.m_OutputLayerNormWeights = &outputLayerNormTensor;
+ }
+
visitor.VisitLstmLayer(this, GetParameters(), inputParams, GetName());
}
diff --git a/src/armnn/layers/LstmLayer.hpp b/src/armnn/layers/LstmLayer.hpp
index bfea5d8232..584d8e2547 100644
--- a/src/armnn/layers/LstmLayer.hpp
+++ b/src/armnn/layers/LstmLayer.hpp
@@ -11,6 +11,18 @@ namespace armnn
class ScopedCpuTensorHandle;
+struct LstmOptLayerNormParameters
+{
+ /// A unique pointer to represent 1D weights tensor with dimensions [num_units].
+ std::unique_ptr<ScopedCpuTensorHandle> m_InputLayerNormWeights;
+ /// A unique pointer to represent 1D weights tensor with dimensions [num_units].
+ std::unique_ptr<ScopedCpuTensorHandle> m_ForgetLayerNormWeights;
+ /// A unique pointer to represent 1D weights tensor with dimensions [num_units].
+ std::unique_ptr<ScopedCpuTensorHandle> m_CellLayerNormWeights;
+ /// A unique pointer to represent 1D weights tensor with dimensions [num_units].
+ std::unique_ptr<ScopedCpuTensorHandle> m_OutputLayerNormWeights;
+};
+
struct LstmOptCifgParameters
{
/// A unique pointer to represent 2D weights tensor with dimensions [input_size, num_units].
@@ -70,6 +82,7 @@ public:
LstmOptCifgParameters m_CifgParameters;
LstmOptProjectionParameters m_ProjectionParameters;
LstmOptPeepholeParameters m_PeepholeParameters;
+ LstmOptLayerNormParameters m_LayerNormParameters;
/// Makes a workload for the LSTM type.
/// @param [in] graph The graph where this layer can be found.
diff --git a/src/backends/backendsCommon/LayerSupportBase.cpp b/src/backends/backendsCommon/LayerSupportBase.cpp
index 6c25f87c9a..4488e25c9c 100644
--- a/src/backends/backendsCommon/LayerSupportBase.cpp
+++ b/src/backends/backendsCommon/LayerSupportBase.cpp
@@ -243,7 +243,11 @@ bool LayerSupportBase::IsLstmSupported(const TensorInfo& input,
const TensorInfo* projectionBias,
const TensorInfo* cellToForgetWeights,
const TensorInfo* cellToOutputWeights,
- Optional<std::string&> reasonIfUnsupported) const
+ Optional<std::string&> reasonIfUnsupported,
+ const TensorInfo* inputLayerNormWeights,
+ const TensorInfo* forgetLayerNormWeights,
+ const TensorInfo* cellLayerNormWeights,
+ const TensorInfo* outputLayerNormWeights) const
{
return DefaultLayerSupport(__func__, __FILE__, __LINE__, reasonIfUnsupported);
}
diff --git a/src/backends/backendsCommon/LayerSupportBase.hpp b/src/backends/backendsCommon/LayerSupportBase.hpp
index 7f63ccfbb1..8abd975922 100644
--- a/src/backends/backendsCommon/LayerSupportBase.hpp
+++ b/src/backends/backendsCommon/LayerSupportBase.hpp
@@ -157,7 +157,11 @@ public:
const TensorInfo* projectionBias,
const TensorInfo* cellToForgetWeights,
const TensorInfo* cellToOutputWeights,
- Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+ Optional<std::string&> reasonIfUnsupported = EmptyOptional(),
+ const TensorInfo* inputLayerNormWeights = nullptr,
+ const TensorInfo* forgetLayerNormWeights = nullptr,
+ const TensorInfo* cellLayerNormWeights = nullptr,
+ const TensorInfo* outputLayerNormWeights = nullptr) const override;
bool IsMaximumSupported(const TensorInfo& input0,
const TensorInfo& input1,
diff --git a/src/backends/backendsCommon/WorkloadData.cpp b/src/backends/backendsCommon/WorkloadData.cpp
index e7915dd40b..3766f5f7ca 100644
--- a/src/backends/backendsCommon/WorkloadData.cpp
+++ b/src/backends/backendsCommon/WorkloadData.cpp
@@ -114,6 +114,30 @@ void ValidateTensorNumDimensions(const TensorInfo& tensor,
}
//---------------------------------------------------------------
+void ValidateTensorNumElements(const TensorInfo& tensor,
+ std::string const& descName,
+ unsigned int numElements,
+ std::string const& tensorName)
+{
+ if (tensor.GetNumElements() != numElements)
+ {
+ throw InvalidArgumentException(descName + ": Expected " + to_string(numElements) + " but got " +
+ to_string(tensor.GetNumDimensions()) + " elements for " +
+ tensorName + " tensor.");
+ }
+}
+
+//---------------------------------------------------------------
+void ValidateTensorNumDimNumElem(const TensorInfo& tensorInfo,
+ unsigned int numDimension,
+ unsigned int numElements,
+ std::string const& tensorName)
+{
+ ValidateTensorNumDimensions(tensorInfo, "ValidateTensorNumDimNumElem: NumDimensionCheck", numDimension, tensorName);
+ ValidateTensorNumElements(tensorInfo, "ValidateTensorNumDimNumElem: NumElementsCheck", numElements, tensorName);
+}
+
+//---------------------------------------------------------------
void ValidateTensorDataType(const TensorInfo& tensor, DataType dataType,
const std::string& descName, std::string const& tensorName)
{
@@ -1238,22 +1262,257 @@ void FloorQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const
void LstmQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const
{
- ValidateTensorNumDimensions(workloadInfo.m_InputTensorInfos[0], "LstmQueueDescriptor", 2, "input");
- ValidateTensorNumDimensions(workloadInfo.m_OutputTensorInfos[0], "LstmQueueDescriptor", 2, "output");
-
std::vector<DataType> supportedTypes = {
DataType::Float16,
DataType::Float32,
DataType::QuantisedSymm16
};
+ // ported from android/ml/nn/common/operations/LSTM.cpp CheckInputTensorDimensions()
+ // check for supported type of one input and match them with all the other input and output
ValidateDataTypes(workloadInfo.m_InputTensorInfos[0],
supportedTypes,
"LstmQueueDescriptor");
+ // type matches all other inputs
+ for (uint32_t i = 1; i < workloadInfo.m_InputTensorInfos.size(); ++i)
+ {
+ ValidateTensorDataTypesMatch(workloadInfo.m_InputTensorInfos[0],
+ workloadInfo.m_InputTensorInfos[i],
+ "LstmQueueDescriptor",
+ "InputTensor[0]",
+ "InputTensor[" + std::to_string(i) + "]");
+ }
+ // type matches all other outputs
+ for (uint32_t i = 0; i < workloadInfo.m_OutputTensorInfos.size(); ++i)
+ {
+ ValidateTensorDataTypesMatch(workloadInfo.m_InputTensorInfos[0],
+ workloadInfo.m_OutputTensorInfos[i],
+ "LstmQueueDescriptor",
+ "InputTensor[0]",
+ "OutputTensor[" + std::to_string(i) + "]");
+ }
- ValidateDataTypes(workloadInfo.m_OutputTensorInfos[0],
- supportedTypes,
- "LstmQueueDescriptor");
+ // TODO: check clipping parameter is valid
+
+ // Inferring batch size, number of outputs and number of cells from the inputs.
+ // TODO: figure out if there is a way to make sure the specific inputs are at that index of workloadInfo
+ const uint32_t n_input = workloadInfo.m_InputTensorInfos[0].GetShape()[1];
+ const uint32_t n_batch = workloadInfo.m_InputTensorInfos[0].GetShape()[0];
+ ValidatePointer(m_InputToOutputWeights, "Null pointer check", "InputToOutputWeights");
+ const uint32_t n_cell = m_InputToOutputWeights->GetShape()[0];
+ ValidatePointer(m_RecurrentToOutputWeights, "Null pointer check", "RecurrentToOutputWeights");
+ const uint32_t n_output = m_RecurrentToOutputWeights->GetShape()[1];
+
+ // check dimensions of all inputs and outputs
+ if (workloadInfo.m_InputTensorInfos.size() != 3)
+ {
+ throw InvalidArgumentException("Invalid number of inputs.");
+ }
+ if (workloadInfo.m_OutputTensorInfos.size() != 4)
+ {
+ throw InvalidArgumentException("Invalid number of outputs.");
+ }
+ // input tensor
+ ValidateTensorNumDimNumElem( workloadInfo.m_InputTensorInfos[0], 2, (n_batch * n_input),
+ "LstmQueueDescriptor input[0]");
+ // outputStateInTensor
+ ValidateTensorNumDimNumElem( workloadInfo.m_InputTensorInfos[1], 2, (n_batch * n_output),
+ "LstmQueueDescriptor input[1]");
+ // outputStateInTensor
+ ValidateTensorNumDimNumElem( workloadInfo.m_InputTensorInfos[2], 2, (n_batch * n_cell),
+ "LstmQueueDescriptor input[2]");
+ // scratchBufferTensor
+ unsigned int scratchBufferSize = m_Parameters.m_CifgEnabled ? n_cell * 3 : n_cell * 4;
+ ValidateTensorNumDimNumElem( workloadInfo.m_OutputTensorInfos[0], 2, (n_batch * scratchBufferSize),
+ "LstmQueueDescriptor output[0]");
+ // outputStateOutTensor
+ ValidateTensorNumDimNumElem( workloadInfo.m_OutputTensorInfos[1], 2, (n_batch * n_output),
+ "LstmQueueDescriptor output[1]");
+ // cellStateOutTensor
+ ValidateTensorNumDimNumElem( workloadInfo.m_OutputTensorInfos[2], 2, (n_batch * n_cell),
+ "LstmQueueDescriptor output[2]");
+ // outputTensor
+ ValidateTensorNumDimNumElem( workloadInfo.m_OutputTensorInfos[3], 2, (n_batch * n_output),
+ "LstmQueueDescriptor output[3]");
+
+
+ // check that dimensions of inputs/outputs and QueueDescriptor data match with each other
+ if ( m_InputToInputWeights )
+ {
+ ValidateTensorNumDimNumElem(m_InputToInputWeights->GetTensorInfo(), 2,
+ (n_cell * n_input), "InputLayerNormWeights");
+ }
+
+ ValidatePointer(m_InputToForgetWeights, "Null pointer check", "InputToForgetWeights");
+ ValidateTensorNumDimNumElem(m_InputToForgetWeights->GetTensorInfo(), 2,
+ (n_cell * n_input), "InputToForgetWeights");
+
+ ValidatePointer(m_InputToCellWeights, "Null pointer check", "InputToCellWeights");
+ ValidateTensorNumDimNumElem(m_InputToCellWeights->GetTensorInfo(), 2,
+ (n_cell * n_input), "InputToCellWeights");
+
+ if ( m_RecurrentToInputWeights )
+ {
+ ValidateTensorNumDimNumElem(m_RecurrentToInputWeights->GetTensorInfo(), 2,
+ (n_cell * n_output), "RecurrentToInputWeights");
+ }
+
+ ValidatePointer(m_RecurrentToForgetWeights, "Null pointer check", "RecurrentToForgetWeights");
+ ValidateTensorNumDimNumElem(m_RecurrentToForgetWeights->GetTensorInfo(), 2,
+ (n_cell * n_output), "RecurrentToForgetWeights");
+
+ ValidatePointer(m_RecurrentToCellWeights, "Null pointer check", "RecurrentToCellWeights");
+ ValidateTensorNumDimNumElem(m_RecurrentToCellWeights->GetTensorInfo(), 2,
+ (n_cell * n_output), "RecurrentToCellWeights");
+
+ // Make sure the input-gate's parameters are either both present (regular
+ // LSTM) or not at all (CIFG-LSTM). And CifgEnable is set accordingly.
+ bool cifg_weights_all_or_none = ((m_InputToInputWeights && m_RecurrentToInputWeights &&
+ !m_Parameters.m_CifgEnabled) ||
+ (!m_InputToInputWeights && !m_RecurrentToInputWeights &&
+ m_Parameters.m_CifgEnabled));
+ if (!cifg_weights_all_or_none)
+ {
+ throw InvalidArgumentException("Input-Gate's parameters InputToInputWeights and RecurrentToInputWeights must "
+ "either both be present (regular LSTM) or both not present (CIFG-LSTM). In "
+ "addition CifgEnable must be set accordingly");
+ }
+
+ if ( m_CellToInputWeights )
+ {
+ ValidateTensorNumDimNumElem(m_CellToInputWeights->GetTensorInfo(), 1,
+ n_cell, "CellToInputWeights");
+ }
+ if ( m_CellToForgetWeights )
+ {
+ ValidateTensorNumDimNumElem(m_CellToForgetWeights->GetTensorInfo(), 1,
+ n_cell, "CellToForgetWeights");
+ }
+ if ( m_CellToOutputWeights )
+ {
+ ValidateTensorNumDimNumElem(m_CellToOutputWeights->GetTensorInfo(), 1,
+ n_cell, "CellToOutputWeights");
+ }
+
+ // Making sure the peephole weights are there all or none. And PeepholeEnable is set accordingly.
+ bool peephole_weights_all_or_none =
+ (((m_CellToInputWeights || m_Parameters.m_CifgEnabled) && m_CellToForgetWeights
+ && m_CellToOutputWeights && m_Parameters.m_PeepholeEnabled)
+ || ( !m_CellToInputWeights && !m_CellToForgetWeights
+ && !m_CellToOutputWeights && !m_Parameters.m_PeepholeEnabled));
+ if (!peephole_weights_all_or_none)
+ {
+ throw InvalidArgumentException("Invalid combination of peephole parameters");
+ }
+
+ // Make sure the input gate bias is present only when not a CIFG-LSTM.
+ if (m_Parameters.m_CifgEnabled)
+ {
+ if (m_InputGateBias)
+ {
+ throw InvalidArgumentException("InputGateBias is present and CIFG-LSTM is enabled");
+ }
+ }
+ else
+ {
+ if (!m_InputGateBias)
+ {
+ throw InvalidArgumentException("If CIFG-LSTM is disabled InputGateBias must be present.");
+ }
+ ValidateTensorNumDimNumElem(m_InputGateBias->GetTensorInfo(), 1,
+ n_cell, "InputGateBias");
+ }
+
+ ValidatePointer(m_ForgetGateBias, "Null pointer check", "ForgetGateBias");
+ ValidateTensorNumDimNumElem(m_ForgetGateBias->GetTensorInfo(), 1, n_cell, "ForgetGateBias");
+
+ ValidatePointer(m_CellBias, "Null pointer check", "CellBias");
+ ValidateTensorNumDimNumElem(m_CellBias->GetTensorInfo(), 1, n_cell, "CellBias");
+
+ ValidatePointer(m_OutputGateBias, "Null pointer check", "OutputGateBias");
+ ValidateTensorNumDimNumElem(m_OutputGateBias->GetTensorInfo(), 1, n_cell, "OutputGateBias");
+
+ if (m_ProjectionWeights)
+ {
+ ValidateTensorNumDimNumElem(m_ProjectionWeights->GetTensorInfo(), 2,
+ (n_cell * n_output), "ProjectionWeights");
+ }
+ if (m_ProjectionBias)
+ {
+ ValidateTensorNumDimNumElem(m_ProjectionBias->GetTensorInfo(), 1, n_output, "ProjectionBias");
+ }
+
+ // Making sure the projection tensors are consistent:
+ // 1) If projection weight is not present, then projection bias should not be
+ // present.
+ // 2) If projection weight is present, then projection bias is optional.
+ bool projecton_tensors_consistent = ((!m_ProjectionWeights && !m_ProjectionBias &&
+ !m_Parameters.m_ProjectionEnabled)
+ || (m_ProjectionWeights && !m_ProjectionBias &&
+ m_Parameters.m_ProjectionEnabled)
+ || (m_ProjectionWeights && m_ProjectionBias &&
+ m_Parameters.m_ProjectionEnabled));
+ if (!projecton_tensors_consistent)
+ {
+ throw InvalidArgumentException("Projection tensors are inconsistent.");
+ }
+
+ // The four layer normalization weights either all have values or none of them have values. Additionally, if
+ // CIFG is used, input layer normalization weights tensor is omitted and the other layer normalization weights
+ // either all have values or none of them have values. Layer normalization is used when the values of all the
+ // layer normalization weights are present
+ if (m_InputLayerNormWeights)
+ {
+ ValidateTensorNumDimNumElem(m_InputLayerNormWeights->GetTensorInfo(), 1, n_cell, "InputLayerNormWeights");
+ }
+ if (m_ForgetLayerNormWeights)
+ {
+ ValidateTensorNumDimNumElem(m_ForgetLayerNormWeights->GetTensorInfo(), 1, n_cell, "ForgetLayerNormWeights");
+ }
+ if (m_CellLayerNormWeights)
+ {
+ ValidateTensorNumDimNumElem(m_CellLayerNormWeights->GetTensorInfo(), 1, n_cell, "CellLayerNormWeights");
+ }
+ if (m_OutputLayerNormWeights)
+ {
+ ValidateTensorNumDimNumElem(m_OutputLayerNormWeights->GetTensorInfo(), 1, n_cell, "OutputLayerNormWeights");
+ }
+
+
+ if (m_Parameters.m_LayerNormEnabled)
+ {
+ if (!m_Parameters.m_CifgEnabled)
+ {
+ if (!m_InputLayerNormWeights)
+ {
+ throw InvalidArgumentException("Layer normalisation is enabled and CIFG-LSTM is disabled but "
+ "InputLayerNormWeights are not present");
+ }
+ ValidateTensorNumDimNumElem(m_InputLayerNormWeights->GetTensorInfo(),
+ 1, n_cell, "InputLayerNormWeights");
+ }
+ else if (m_InputLayerNormWeights)
+ {
+ throw InvalidArgumentException("InputLayerNormWeights are present while CIFG is enabled");
+ }
+
+ ValidatePointer(m_ForgetLayerNormWeights, "Null pointer check layer normalisation enabled",
+ "ForgetLayerNormWeights");
+ ValidateTensorNumDimNumElem(m_ForgetLayerNormWeights->GetTensorInfo(), 1, n_cell, "ForgetLayerNormWeights");
+
+ ValidatePointer(m_OutputLayerNormWeights, "Null pointer check layer normalisation enabled",
+ "OutputLayerNormWeights");
+ ValidateTensorNumDimNumElem(m_OutputLayerNormWeights->GetTensorInfo(), 1, n_cell, "OutputLayerNormWeights");
+
+ ValidatePointer(m_CellLayerNormWeights, "Null pointer check layer normalisation enabled",
+ "CellLayerNormWeights");
+ ValidateTensorNumDimNumElem(m_CellLayerNormWeights->GetTensorInfo(), 1, n_cell, "CellLayerNormWeights");
+ }
+ else if (m_InputLayerNormWeights || m_ForgetLayerNormWeights || m_OutputLayerNormWeights || m_CellLayerNormWeights)
+ {
+ throw InvalidArgumentException("Layer normalisation is disabled but one or more layer normalisation weights "
+ "are present.");
+ }
}
void ConvertFp32ToFp16QueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const
diff --git a/src/backends/backendsCommon/WorkloadData.hpp b/src/backends/backendsCommon/WorkloadData.hpp
index fa9e1cdf52..d241f7b24f 100644
--- a/src/backends/backendsCommon/WorkloadData.hpp
+++ b/src/backends/backendsCommon/WorkloadData.hpp
@@ -344,6 +344,10 @@ struct LstmQueueDescriptor : QueueDescriptorWithParameters<LstmDescriptor>
, m_OutputGateBias(nullptr)
, m_ProjectionWeights(nullptr)
, m_ProjectionBias(nullptr)
+ , m_InputLayerNormWeights(nullptr)
+ , m_ForgetLayerNormWeights(nullptr)
+ , m_CellLayerNormWeights(nullptr)
+ , m_OutputLayerNormWeights(nullptr)
{
}
@@ -364,6 +368,10 @@ struct LstmQueueDescriptor : QueueDescriptorWithParameters<LstmDescriptor>
const ConstCpuTensorHandle* m_OutputGateBias;
const ConstCpuTensorHandle* m_ProjectionWeights;
const ConstCpuTensorHandle* m_ProjectionBias;
+ const ConstCpuTensorHandle* m_InputLayerNormWeights;
+ const ConstCpuTensorHandle* m_ForgetLayerNormWeights;
+ const ConstCpuTensorHandle* m_CellLayerNormWeights;
+ const ConstCpuTensorHandle* m_OutputLayerNormWeights;
void Validate(const WorkloadInfo& workloadInfo) const;
};
diff --git a/src/backends/backendsCommon/WorkloadFactory.cpp b/src/backends/backendsCommon/WorkloadFactory.cpp
index b74b6afeb3..8ef5985fb3 100644
--- a/src/backends/backendsCommon/WorkloadFactory.cpp
+++ b/src/backends/backendsCommon/WorkloadFactory.cpp
@@ -396,6 +396,10 @@ bool IWorkloadFactory::IsLayerSupported(const BackendId& backendId,
const TensorInfo* projectionBias = nullptr;
const TensorInfo* cellToForgetWeights = nullptr;
const TensorInfo* cellToOutputWeights = nullptr;
+ const TensorInfo* inputLayerNormWeights = nullptr;
+ const TensorInfo* forgetLayerNormWeights = nullptr;
+ const TensorInfo* cellLayerNormWeights = nullptr;
+ const TensorInfo* outputLayerNormWeights = nullptr;
TensorInfo optInputToInputWeights;
TensorInfo optRecurrentToInputWeights;
@@ -405,6 +409,10 @@ bool IWorkloadFactory::IsLayerSupported(const BackendId& backendId,
TensorInfo optProjectionBias;
TensorInfo optCellToForgetWeights;
TensorInfo optCellToOutputWeights;
+ TensorInfo optInputLayerNormWeights;
+ TensorInfo optForgetLayerNormWeights;
+ TensorInfo optCellLayerNormWeights;
+ TensorInfo optOutputLayerNormWeights;
if(!descriptor.m_CifgEnabled)
{
@@ -449,6 +457,25 @@ bool IWorkloadFactory::IsLayerSupported(const BackendId& backendId,
cellToOutputWeights = &optCellToOutputWeights;
}
+ if(descriptor.m_LayerNormEnabled)
+ {
+ optInputLayerNormWeights = OverrideDataType(
+ cLayer->m_LayerNormParameters.m_InputLayerNormWeights->GetTensorInfo(), dataType);
+ inputLayerNormWeights = &optInputLayerNormWeights;
+
+ optForgetLayerNormWeights = OverrideDataType(
+ cLayer->m_LayerNormParameters.m_ForgetLayerNormWeights->GetTensorInfo(), dataType);
+ forgetLayerNormWeights = &optForgetLayerNormWeights;
+
+ optCellLayerNormWeights = OverrideDataType(
+ cLayer->m_LayerNormParameters.m_CellLayerNormWeights->GetTensorInfo(), dataType);
+ cellLayerNormWeights = &optCellLayerNormWeights;
+
+ optOutputLayerNormWeights = OverrideDataType(
+ cLayer->m_LayerNormParameters.m_OutputLayerNormWeights->GetTensorInfo(), dataType);
+ outputLayerNormWeights = &optOutputLayerNormWeights;
+ }
+
result = layerSupportObject->IsLstmSupported(
input,
outputStateIn,
@@ -475,7 +502,11 @@ bool IWorkloadFactory::IsLayerSupported(const BackendId& backendId,
projectionBias,
cellToForgetWeights,
cellToOutputWeights,
- reason);
+ reason,
+ inputLayerNormWeights,
+ forgetLayerNormWeights,
+ cellLayerNormWeights,
+ outputLayerNormWeights);
break;
}
case LayerType::Maximum:
diff --git a/src/backends/backendsCommon/test/LayerTests.cpp b/src/backends/backendsCommon/test/LayerTests.cpp
index ca39438fbf..56c0ab6b12 100644
--- a/src/backends/backendsCommon/test/LayerTests.cpp
+++ b/src/backends/backendsCommon/test/LayerTests.cpp
@@ -1665,6 +1665,153 @@ LayerTestResult<int16_t, 3> CopyViaSplitterInt16Test(
return CopyViaSplitterTestImpl<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager, 1.0f, 0);
}
+void LstmUtilsZeroVectorTest()
+{
+ armnn::TensorInfo inputDesc({4}, armnn::DataType::Float32);
+ boost::multi_array<float, 1> input = MakeTensor<float, 1>(inputDesc, std::vector<float>(
+ {2., 3., 3., 4.}));
+
+ boost::multi_array<float, 1> expectedOutput = MakeTensor<float, 1>(inputDesc, std::vector<float>(
+ {0., 0., 0., 0.}));
+
+ return LstmUtilsZeroVectorTestImpl<armnn::DataType::Float32>(input, 4, expectedOutput);
+}
+
+void LstmUtilsMeanStddevNormalizationNoneZeroInputTest()
+{
+ uint32_t batchSize = 2;
+ uint32_t vecSize = 4;
+ armnn::TensorInfo inputDesc({batchSize, vecSize}, armnn::DataType::Float32);
+ boost::multi_array<float, 2> input = MakeTensor<float, 2>(inputDesc, std::vector<float>(
+ { 0.1f, 0.2f, 0.3f, 0.4f, //batch 0
+ 0.9f, 1.0f, 1.1f, 1.2f })); //batch 1
+
+ boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(inputDesc, std::vector<float>(
+ { -1.34164071f, -0.447213531f, 0.44721365f, 1.34164071f, //batch 0
+ -1.34163153f, -0.447210163f, 0.447211236f, 1.3416326f })); //batch 1
+
+ return LstmUtilsMeanStddevNormalizationTestImpl<armnn::DataType::Float32>(input,
+ vecSize, batchSize, expectedOutput);
+}
+
+void LstmUtilsMeanStddevNormalizationAllZeroInputTest()
+{
+ uint32_t batchSize = 2;
+ uint32_t vecSize = 4;
+ armnn::TensorInfo inputDesc({batchSize, vecSize}, armnn::DataType::Float32);
+ boost::multi_array<float, 2> input = MakeTensor<float, 2>(inputDesc, std::vector<float>(
+ { 0.0f, 0.0f, 0.0f, 0.0f, //batch 0
+ 0.0f, 0.0f, 0.0f, 0.0f })); //batch 1
+
+ boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(inputDesc, std::vector<float>(
+ { 0.0f, 0.0f, 0.0f, 0.0f, //batch 0
+ 0.0f, 0.0f, 0.0f, 0.0f })); //batch 1
+
+ return LstmUtilsMeanStddevNormalizationTestImpl<armnn::DataType::Float32>(input,
+ vecSize, batchSize, expectedOutput);
+}
+
+void LstmUtilsMeanStddevNormalizationMixedZeroInputTest()
+{
+ uint32_t batchSize = 2;
+ uint32_t vecSize = 4;
+ armnn::TensorInfo inputDesc({batchSize, vecSize}, armnn::DataType::Float32);
+ boost::multi_array<float, 2> input = MakeTensor<float, 2>(inputDesc, std::vector<float>(
+ { 0.0f, 0.0f, 0.0f, 0.0f, //batch 0
+ 0.1f, 0.2f, 0.3f, 0.4f })); //batch 1
+
+ boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(inputDesc, std::vector<float>(
+ { 0.0f, 0.0f, 0.0f, 0.0f, //batch 0
+ -1.34164071f, -0.447213531f, 0.44721365f, 1.34164071f })); //batch 1
+
+ return LstmUtilsMeanStddevNormalizationTestImpl<armnn::DataType::Float32>(input,
+ vecSize, batchSize, expectedOutput);
+}
+
+
+void LstmUtilsVectorBatchVectorCwiseProductTest()
+{
+ uint32_t batchSize = 4;
+ uint32_t vecSize = 29;
+ armnn::TensorInfo vecDesc({vecSize}, armnn::DataType::Float32);
+ boost::multi_array<float, 1> vector = MakeTensor<float, 1>(vecDesc, std::vector<float>(
+ { 1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f, 9.9f, 10.1f,
+ 11.11f, 12.12f, 13.13f, 14.14f, 15.15f, 16.16f, 17.17f, 18.18f, 19.19f, 20.2f,
+ 21.21f, 22.22f, 23.23f, 24.24f, 25.25f, 26.26f, 27.27f, 28.28f, 0.0f}));
+
+ armnn::TensorInfo batchVecDesc({batchSize, vecSize}, armnn::DataType::Float32);
+ boost::multi_array<float, 2> batchVector = MakeTensor<float, 2>(batchVecDesc, std::vector<float>(
+ { /* batch 0 */
+ 1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f, 9.9f, 10.1f,
+ 11.11f, 12.12f, 13.13f, 14.14f, 15.15f, 16.16f, 17.17f, 18.18f, 19.19f, 20.2f,
+ 21.21f, 22.22f, 23.23f, 24.24f, 25.25f, 26.26f, 27.27f, 28.28f, 0.0f,
+ /* batch 1 */
+ -1.1f, -2.2f, -3.3f, -4.4f, -5.5f, -6.6f, -7.7f, -8.8f, -9.9f, -10.1f,
+ -11.11f, -12.12f, -13.13f, -14.14f, -15.15f, -16.16f, -17.17f, -18.18f, -19.19f, -20.2f,
+ -21.21f, -22.22f, -23.23f, -24.24f, -25.25f, -26.26f, -27.27f, -28.28f, 0.0f,
+ /* batch 2 */
+ 1.1f, -2.2f, 3.3f, -4.4f, 5.5f, -6.6f, 7.7f, -8.8f, 9.9f, -10.1f,
+ 11.11f, -12.12f, 13.13f, -14.14f, 15.15f, -16.16f, 17.17f, -18.18f, 19.19f, -20.2f,
+ 21.21f, -22.22f, 23.23f, -24.24f, 25.25f, -26.26f, 27.27f, -28.28f, 0.0f,
+ /* batch 3 */
+ -1.1f, 2.2f, -3.3f, 4.4f, -5.5f, 6.6f, -7.7f, 8.8f, -9.9f, 10.1f,
+ -11.11f, 12.12f, -13.13f, 14.14f, -15.15f, 16.16f, -17.17f, 18.18f, -19.19f, 20.2f,
+ -21.21f, 22.22f, -23.23f, 24.24f, -25.25f, 26.26f, -27.27f, 28.28f, 0.0f}));
+
+ // Expect output = input * output + output.
+ boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(batchVecDesc, std::vector<float>(
+ { /* batch 0 */
+ 1.210000f, 4.840000f, 10.889999f, 19.360001f, 30.250000f, 43.559998f,
+ 59.289997f, 77.440002f, 98.009995f, 102.010010f, 123.432091f, 146.894394f,
+ 172.396896f, 199.939606f, 229.522491f, 261.145599f, 294.808899f, 330.512421f,
+ 368.256134f, 408.040039f, 449.864075f, 493.728363f, 539.632874f, 587.577576f,
+ 637.562500f, 689.587585f, 743.652954f, 799.758423f, 0.000000f,
+ /* batch 1 */
+ -1.210000f, -4.840000f, -10.889999f, -19.360001f, -30.250000f, -43.559998f,
+ -59.289997f, -77.440002f, -98.009995f, -102.010010f, -123.432091f, -146.894394f,
+ -172.396896f, -199.939606f, -229.522491f, -261.145599f, -294.808899f, -330.512421f,
+ -368.256134f, -408.040039f, -449.864075f, -493.728363f, -539.632874f, -587.577576f,
+ -637.562500f, -689.587585f, -743.652954f, -799.758423f, 0.000000f,
+ /* batch 2 */
+ 1.210000f, -4.840000f, 10.889999f, -19.360001f, 30.250000f, -43.559998f,
+ 59.289997f, -77.440002f, 98.009995f, -102.010010f, 123.432091f, -146.894394f,
+ 172.396896f, -199.939606f, 229.522491f, -261.145599f, 294.808899f, -330.512421f,
+ 368.256134f, -408.040039f, 449.864075f, -493.728363f, 539.632874f, -587.577576f,
+ 637.562500f, -689.587585f, 743.652954f, -799.758423f, 0.000000f,
+ /* batch 3 */
+ -1.210000f, 4.840000f, -10.889999f, 19.360001f, -30.250000f, 43.559998f,
+ -59.289997f, 77.440002f, -98.009995f, 102.010010f, -123.432091f, 146.894394f,
+ -172.396896f, 199.939606f, -229.522491f, 261.145599f, -294.808899f, 330.512421f,
+ -368.256134f, 408.040039f, -449.864075f, 493.728363f, -539.632874f, 587.577576f,
+ -637.562500f, 689.587585f, -743.652954f, 799.758423f, 0.000000f}));
+
+ return LstmUtilsVectorBatchVectorCwiseProductTestImpl<armnn::DataType::Float32>(vector, batchVector,
+ vecSize, batchSize, expectedOutput);
+}
+
+
+void LstmUtilsVectorBatchVectorAddTest()
+{
+ uint32_t batchSize = 2;
+ uint32_t vecSize = 3;
+ armnn::TensorInfo vecDesc({vecSize}, armnn::DataType::Float32);
+ boost::multi_array<float, 1> vector = MakeTensor<float, 1>(vecDesc, std::vector<float>(
+ { 0.0f, -0.5f, 1.0f}));
+
+ armnn::TensorInfo batchVecDesc({batchSize, vecSize}, armnn::DataType::Float32);
+ boost::multi_array<float, 2> batchVector = MakeTensor<float, 2>(batchVecDesc, std::vector<float>(
+ { 1.0f, 2.0f, 3.0f, //batch 0
+ 4.0f, 5.0f, 6.0f})); //batch 1
+
+ boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(batchVecDesc, std::vector<float>(
+ { 1.0f, 1.5f, 4.0f,
+ 4.0f, 4.5f, 7.0f}));
+
+ return LstmUtilsVectorBatchVectorAddTestImpl<armnn::DataType::Float32>(vector, batchVector,
+ vecSize, batchSize, expectedOutput);
+}
+
+
LayerTestResult<float, 2> LstmLayerFloat32WithCifgWithPeepholeNoProjectionTest(
armnn::IWorkloadFactory& workloadFactory,
const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
@@ -1721,6 +1868,25 @@ LayerTestResult<float, 2> LstmLayerFloat32NoCifgNoPeepholeNoProjectionTest(
workloadFactory, memoryManager, input, expectedOutput);
}
+
+LayerTestResult<float, 2> LstmLayerFloat32NoCifgWithPeepholeWithProjectionWithLayerNormTest(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+ armnn::TensorInfo inputDesc({ 2, 5 }, armnn::DataType::Float32);
+ boost::multi_array<float, 2> input = MakeTensor<float, 2>(inputDesc, std::vector<float>(
+ {0.7f, 0.8f, 0.1f, 0.2f, 0.3f, //batch 0
+ 0.3f, 0.2f, 0.9f, 0.8f, 0.1f})); //batch 1
+
+ armnn::TensorInfo outputDesc({ 2, 3 }, armnn::DataType::Float32);
+ boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(outputDesc, std::vector<float>(
+ { 0.0244077f, 0.128027f, -0.00170918f, //batch 0
+ -0.00692428f, 0.0848741f, 0.063445f})); //batch 1
+ return LstmLayerNoCifgWithPeepholeWithProjectionWithLayerNormTestImpl<armnn::DataType::Float32>(
+ workloadFactory, memoryManager, input, expectedOutput);
+}
+
+
LayerTestResult<int16_t, 2> LstmLayerInt16NoCifgNoPeepholeNoProjectionTest(
armnn::IWorkloadFactory& workloadFactory,
const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
diff --git a/src/backends/backendsCommon/test/LayerTests.hpp b/src/backends/backendsCommon/test/LayerTests.hpp
index 405ccff35b..66324e104c 100644
--- a/src/backends/backendsCommon/test/LayerTests.hpp
+++ b/src/backends/backendsCommon/test/LayerTests.hpp
@@ -1458,6 +1458,13 @@ LayerTestResult<float, 4> PermuteFloat32ValueSet3Test(
armnn::IWorkloadFactory& workloadFactory,
const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+void LstmUtilsZeroVectorTest();
+void LstmUtilsMeanStddevNormalizationNoneZeroInputTest();
+void LstmUtilsMeanStddevNormalizationAllZeroInputTest();
+void LstmUtilsMeanStddevNormalizationMixedZeroInputTest();
+void LstmUtilsVectorBatchVectorCwiseProductTest();
+void LstmUtilsVectorBatchVectorAddTest();
+
LayerTestResult<float, 2> LstmLayerFloat32WithCifgWithPeepholeNoProjectionTest(
armnn::IWorkloadFactory& workloadFactory,
const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
@@ -1470,6 +1477,10 @@ LayerTestResult<float, 2> LstmLayerFloat32NoCifgWithPeepholeWithProjectionTest(
armnn::IWorkloadFactory& workloadFactory,
const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+LayerTestResult<float, 2> LstmLayerFloat32NoCifgWithPeepholeWithProjectionWithLayerNormTest(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
LayerTestResult<int16_t, 2> LstmLayerInt16NoCifgNoPeepholeNoProjectionTest(
armnn::IWorkloadFactory& workloadFactory,
const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
diff --git a/src/backends/backendsCommon/test/LstmTestImpl.hpp b/src/backends/backendsCommon/test/LstmTestImpl.hpp
index dae9c8a3f1..2ed0a974fc 100644
--- a/src/backends/backendsCommon/test/LstmTestImpl.hpp
+++ b/src/backends/backendsCommon/test/LstmTestImpl.hpp
@@ -16,6 +16,119 @@
#include <backendsCommon/CpuTensorHandle.hpp>
#include <backendsCommon/WorkloadFactory.hpp>
+#include "reference/workloads/LstmUtils.hpp"
+
+//LstmUtils Tests
+// TODO: Add tests for the remaining functions in LstmUtils.hpp
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+void LstmUtilsVectorBatchVectorAddTestImpl(
+ boost::multi_array<float, 1>& vec,
+ boost::multi_array<float, 2>& batchVec,
+ uint32_t vSize,
+ uint32_t nBatch,
+ boost::multi_array<float, 2>& expectedOutput )
+{
+ float qScale = 0.0f;
+ int32_t qOffset = 0;
+ armnn::TensorInfo tensorInfo({nBatch, vSize}, ArmnnType, qScale, qOffset );
+
+ // Make encoder and decoder
+ std::unique_ptr<armnn::Decoder<float>> vecDecoder = armnn::MakeDecoder<float>(tensorInfo, vec.data());
+ std::unique_ptr<armnn::Decoder<float>> batchVecDecoder = armnn::MakeDecoder<float>(tensorInfo, batchVec.data());
+ std::unique_ptr<armnn::Encoder<float>> batchVecEncoder = armnn::MakeEncoder<float>(tensorInfo, batchVec.data());
+
+ VectorBatchVectorAdd(*vecDecoder, vSize, *batchVecDecoder, nBatch, *batchVecEncoder);
+
+ // check shape and compare values
+ BOOST_TEST(CompareTensors(batchVec, expectedOutput));
+
+ // check if iterator is back at start position
+ batchVecEncoder->Set(1.0f);
+ BOOST_TEST(batchVec[0][0] == 1.0f);
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+void LstmUtilsZeroVectorTestImpl(
+ boost::multi_array<float, 1>& input,
+ uint32_t vSize,
+ boost::multi_array<float, 1>& expectedOutput) {
+
+ float qScale = 0.0f;
+ int32_t qOffset = 0;
+
+ armnn::TensorInfo tensorInfo({vSize}, ArmnnType, qScale, qOffset );
+
+ // Make encoder for input
+ std::unique_ptr<armnn::Encoder<float>> outputEncoder = armnn::MakeEncoder<float>(tensorInfo, input.data());
+
+ // call ZeroVector
+ ZeroVector(*outputEncoder, vSize);
+
+ // check shape and compare values
+ BOOST_TEST(CompareTensors(input, expectedOutput));
+
+ // check if iterator is back at start position
+ outputEncoder->Set(1.0f);
+ BOOST_TEST(input[0] == 1.0f);
+
+}
+
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+void LstmUtilsMeanStddevNormalizationTestImpl(
+ boost::multi_array<float, 2>& input,
+ uint32_t vSize,
+ uint32_t nBatch,
+ boost::multi_array<float, 2>& expectedOutput)
+{
+ float qScale = 0.0f;
+ int32_t qOffset = 0;
+ armnn::TensorInfo tensorInfo({nBatch, vSize}, ArmnnType, qScale, qOffset );
+
+ // Make encoder and decoder for input
+ std::unique_ptr<armnn::Decoder<float>> inputDecoder = armnn::MakeDecoder<float>(tensorInfo, input.data());
+ std::unique_ptr<armnn::Encoder<float>> outputEncoder = armnn::MakeEncoder<float>(tensorInfo, input.data());
+
+ MeanStddevNormalization(*inputDecoder, *outputEncoder, vSize, nBatch, 1e-8f);
+
+ // check shape and compare values
+ BOOST_TEST(CompareTensors(input, expectedOutput));
+
+ // check if iterator is back at start position
+ outputEncoder->Set(1.0f);
+ BOOST_TEST(input[0][0] == 1.0f);
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+void LstmUtilsVectorBatchVectorCwiseProductTestImpl(
+ boost::multi_array<float, 1>& vec,
+ boost::multi_array<float, 2>& batchVec,
+ uint32_t vSize,
+ uint32_t nBatch,
+ boost::multi_array<float, 2>& expectedOutput)
+{
+ float qScale = 0.0f;
+ int32_t qOffset = 0;
+ armnn::TensorInfo tensorInfo({nBatch, vSize}, ArmnnType, qScale, qOffset );
+
+ // Make encoder and decoder
+ std::unique_ptr<armnn::Decoder<float>> vecDecoder = armnn::MakeDecoder<float>(tensorInfo, vec.data());
+ std::unique_ptr<armnn::Decoder<float>> batchVecDecoder = armnn::MakeDecoder<float>(tensorInfo, batchVec.data());
+ std::unique_ptr<armnn::Encoder<float>> batchVecEncoder = armnn::MakeEncoder<float>(tensorInfo, batchVec.data());
+
+ VectorBatchVectorCwiseProduct(*vecDecoder, vSize, *batchVecDecoder, nBatch, *batchVecEncoder);
+
+ // check shape and compare values
+ BOOST_TEST(CompareTensors(batchVec, expectedOutput));
+
+ // check if iterator is back at start position
+ batchVecEncoder->Set(1.0f);
+ BOOST_TEST(batchVec[0][0] == 1.0f);
+}
+
+// Lstm Layer tests:
+
template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
LayerTestResult<T, 2>
LstmNoCifgNoPeepholeNoProjectionTestImpl(
@@ -187,7 +300,6 @@ LstmNoCifgNoPeepholeNoProjectionTestImpl(
data.m_RecurrentToForgetWeights = &recurrentToForgetWeightsTensor;
data.m_RecurrentToCellWeights = &recurrentToCellWeightsTensor;
data.m_RecurrentToOutputWeights = &recurrentToOutputWeightsTensor;
- data.m_CellToInputWeights = &cellToInputWeightsTensor;
data.m_InputGateBias = &inputGateBiasTensor;
data.m_ForgetGateBias = &forgetGateBiasTensor;
data.m_CellBias = &cellBiasTensor;
@@ -1157,3 +1269,275 @@ LayerTestResult<T, 2> LstmLayerWithCifgWithPeepholeNoProjectionTestImpl(
return ret3;
}
+
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 2>
+LstmLayerNoCifgWithPeepholeWithProjectionWithLayerNormTestImpl(armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const boost::multi_array<T, 2>& input,
+ const boost::multi_array<T, 2>& outputExpected,
+ float qScale = 0.0f,
+ int32_t qOffset = 0,
+ armnn::DataType constantDataType = armnn::DataType::Float32)
+{
+ unsigned int batchSize = 2;
+ unsigned int outputSize = 3;
+ unsigned int inputSize = 5;
+ unsigned numUnits = 4;
+
+ armnn::TensorInfo inputTensorInfo({batchSize , inputSize}, ArmnnType, qScale, qOffset);
+ armnn::TensorInfo cellStateInTensorInfo({batchSize , numUnits}, ArmnnType, qScale, qOffset);
+ armnn::TensorInfo outputStateInTensorInfo({batchSize , outputSize}, ArmnnType, qScale, qOffset);
+
+ // Scratch buffer size without CIFG [batchSize, numUnits * 4]
+ armnn::TensorInfo scratchBufferTensorInfo({batchSize, numUnits * 4}, ArmnnType, qScale, qOffset);
+ armnn::TensorInfo cellStateOutTensorInfo({batchSize, numUnits}, ArmnnType, qScale, qOffset);
+ armnn::TensorInfo outputStateOutTensorInfo({batchSize, outputSize}, ArmnnType, qScale, qOffset);
+ armnn::TensorInfo outputTensorInfo({batchSize, outputSize}, ArmnnType, qScale, qOffset);
+
+ LayerTestResult<T, 2> ret(outputTensorInfo);
+
+ std::vector<float> inputVector;
+ inputVector.assign(input.data(), input.data() + (batchSize * inputSize));
+ auto inputTensor = MakeTensor<float,2>(inputTensorInfo, inputVector);
+
+ std::vector<float> cellStateInVector(batchSize * numUnits, 0.f);
+ auto cellStateInTensor = MakeTensor<float,2>(cellStateInTensorInfo, cellStateInVector);
+
+ std::vector<float> outputStateInVector(batchSize * outputSize, 0.f);
+ auto outputStateInTensor = MakeTensor<float,2>(outputStateInTensorInfo, outputStateInVector);
+
+ std::vector<float> scratchBufferVector(batchSize * numUnits * 4, 0.f);
+ auto scratchBufferTensor = MakeTensor<float,2>(scratchBufferTensorInfo, scratchBufferVector);
+
+ std::vector<float> outputStateOutVector(batchSize * outputSize, 0.f);
+ auto outputStateOutTensor = MakeTensor<float,2>(outputStateOutTensorInfo, outputStateOutVector);
+
+ std::vector<float> cellStateOutVector(batchSize * numUnits, 0.f);
+ auto cellStateOutTensor = MakeTensor<float,2>(cellStateOutTensorInfo, cellStateOutVector);
+
+ std::vector<float> outputVector;
+ outputVector.assign(outputExpected.data(), outputExpected.data() + (batchSize * outputSize));
+ ret.outputExpected = MakeTensor<float, 2>(outputTensorInfo, outputVector);
+
+ std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
+ std::unique_ptr<armnn::ITensorHandle> cellStateInHandle =
+ workloadFactory.CreateTensorHandle(cellStateInTensorInfo);
+ std::unique_ptr<armnn::ITensorHandle> outputStateInHandle =
+ workloadFactory.CreateTensorHandle(outputStateInTensorInfo);
+
+ std::unique_ptr<armnn::ITensorHandle> scratchHandle = workloadFactory.CreateTensorHandle(scratchBufferTensorInfo);
+ std::unique_ptr<armnn::ITensorHandle> outputStateOutHandle =
+ workloadFactory.CreateTensorHandle(outputStateOutTensorInfo);
+ std::unique_ptr<armnn::ITensorHandle> cellStateOutHandle =
+ workloadFactory.CreateTensorHandle(cellStateOutTensorInfo);
+ std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+ armnn::LstmQueueDescriptor data;
+ armnn::WorkloadInfo info;
+
+ AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
+ AddInputToWorkload(data, info, outputStateInTensorInfo, outputStateInHandle.get());
+ AddInputToWorkload(data, info, cellStateInTensorInfo, cellStateInHandle.get());
+
+ AddOutputToWorkload(data, info, scratchBufferTensorInfo, scratchHandle.get());
+ AddOutputToWorkload(data, info, outputStateOutTensorInfo, outputStateOutHandle.get());
+ AddOutputToWorkload(data, info, cellStateOutTensorInfo, cellStateOutHandle.get());
+ AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
+
+ armnn::TensorInfo tensorInfo3({outputSize}, constantDataType, qScale, qOffset);
+ armnn::TensorInfo tensorInfo4({numUnits}, constantDataType, qScale, qOffset);
+ armnn::TensorInfo tensorInfo4x5({numUnits, inputSize}, constantDataType, qScale, qOffset);
+ armnn::TensorInfo tensorInfo4x3({numUnits, outputSize}, constantDataType, qScale, qOffset);
+ armnn::TensorInfo tensorInfo3x4({outputSize, numUnits}, constantDataType, qScale, qOffset);
+
+ auto inputToInputWeights =
+ MakeTensor<float, 2>(tensorInfo4x5, { 0.5f, 0.6f, 0.7f, -0.8f, -0.9f,
+ 0.1f, 0.2f, 0.3f, -0.4f, 0.5f,
+ -0.8f, 0.7f, -0.6f, 0.5f, -0.4f,
+ -0.5f, -0.4f, -0.3f, -0.2f, -0.1f}); //{numUnits, inputSize}
+
+ auto inputToForgetWeights =
+ MakeTensor<float, 2>(tensorInfo4x5, {-0.6f, -0.1f, 0.3f, 0.2f, 0.9f,
+ -0.5f, -0.2f, -0.4f, 0.3f, -0.8f,
+ -0.4f, 0.3f, -0.5f, -0.4f, -0.6f,
+ 0.3f, -0.4f, -0.6f, -0.5f, -0.5f}); //{numUnits, inputSize}
+
+ auto inputToCellWeights =
+ MakeTensor<float, 2>(tensorInfo4x5, {-0.4f, -0.3f, -0.2f, -0.1f, -0.5f,
+ 0.5f, -0.2f, -0.3f, -0.2f, -0.6f,
+ 0.6f, -0.1f, -0.4f, -0.3f, -0.7f,
+ 0.7f, -0.9f, -0.5f, 0.8f, 0.6f}); //{numUnits, inputSize}
+
+ auto inputToOutputWeights =
+ MakeTensor<float, 2>(tensorInfo4x5, {-0.8f, -0.4f, -0.2f, -0.9f, -0.1f,
+ -0.7f, 0.3f, -0.3f, -0.8f, -0.2f,
+ 0.6f, -0.2f, 0.4f, -0.7f, -0.3f,
+ -0.5f, 0.1f, 0.5f, -0.6f, -0.4f}); //{numUnits, inputSize}
+
+ auto inputGateBias =
+ MakeTensor<float, 1>(tensorInfo4, {0.03f, 0.15f, 0.22f, 0.38f}); //{numUnits}
+
+ auto forgetGateBias =
+ MakeTensor<float, 1>(tensorInfo4, {0.1f, -0.3f, -0.2f, 0.1f}); //{numUnits}
+
+ auto cellBias =
+ MakeTensor<float, 1>(tensorInfo4, {-0.05f, 0.72f, 0.25f, 0.08f}); //{numUnits}
+
+ auto outputGateBias =
+ MakeTensor<float, 1>(tensorInfo4, {0.05f, -0.01f, 0.2f, 0.1f}); //{numUnits}
+
+ auto recurrentToInputWeights =
+ MakeTensor<float, 2>(tensorInfo4x3, {-0.2f, -0.3f, 0.4f,
+ 0.1f, -0.5f, 0.9f,
+ -0.2f, -0.3f, -0.7f,
+ 0.05f, -0.2f, -0.6f}); //{numUnits, outputSize}
+
+ auto recurrentToCellWeights =
+ MakeTensor<float, 2>(tensorInfo4x3, {-0.3f, 0.2f, 0.1f,
+ -0.3f, 0.8f, -0.08f,
+ -0.2f, 0.3f, 0.8f,
+ -0.6f, -0.1f, 0.2f}); //{numUnits, outputSize}
+
+ auto recurrentToForgetWeights =
+ MakeTensor<float, 2>(tensorInfo4x3, {-0.5f, -0.3f, -0.5f,
+ -0.2f, 0.6f, 0.4f,
+ 0.9f, 0.3f, -0.1f,
+ 0.2f, 0.5f, 0.2f}); //{numUnits, outputSize}
+
+ auto recurrentToOutputWeights =
+ MakeTensor<float, 2>(tensorInfo4x3, { 0.3f, -0.1f, 0.1f,
+ -0.2f, -0.5f, -0.7f,
+ -0.2f, -0.6f, -0.1f,
+ -0.4f, -0.7f, -0.2f}); //{numUnits, outputSize}
+
+ auto cellToInputWeights =
+ MakeTensor<float, 1>(tensorInfo4, {0.05f, 0.1f, 0.25f, 0.15f}); //{numUnits}
+
+ auto cellToForgetWeights =
+ MakeTensor<float, 1>(tensorInfo4, {-0.02f, -0.15f, -0.25f, -0.03f}); //{numUnits}
+
+ auto cellToOutputWeights =
+ MakeTensor<float, 1>(tensorInfo4, {0.1f, -0.1f, -0.5f, 0.05f}); //{numUnits}
+
+ auto projectionWeights =
+ MakeTensor<float, 2>(tensorInfo3x4,
+ {-0.1f, 0.2f, 0.01f, -0.2f,
+ 0.1f, 0.5f, 0.3f, 0.08f,
+ 0.07f, 0.2f, -0.4f, 0.2f}); //{outputSize, numUnits}
+
+ std::vector<float> projectionBiasVector(outputSize, 0.f);
+ auto projectionBias = MakeTensor<float,1>(tensorInfo3, projectionBiasVector); //{outputSize}
+
+ auto inputLayerNormWeights =
+ MakeTensor<float, 1>(tensorInfo4, {0.1f, 0.2f, 0.3f, 0.5f}); //{numUnits}
+
+ auto forgetLayerNormWeights =
+ MakeTensor<float, 1>(tensorInfo4, {0.2f, 0.2f, 0.4f, 0.3f}); //{numUnits}
+
+ auto cellLayerNormWeights =
+ MakeTensor<float, 1>(tensorInfo4, {0.7f, 0.2f, 0.3f, 0.8f}); //{numUnits}
+
+ auto outputLayerNormWeights =
+ MakeTensor<float, 1>(tensorInfo4, {0.6f, 0.2f, 0.2f, 0.5f}); //{numUnits}
+
+
+ armnn::ScopedCpuTensorHandle inputToInputWeightsTensor(tensorInfo4x5);
+ armnn::ScopedCpuTensorHandle inputToForgetWeightsTensor(tensorInfo4x5);
+ armnn::ScopedCpuTensorHandle inputToCellWeightsTensor(tensorInfo4x5);
+ armnn::ScopedCpuTensorHandle inputToOutputWeightsTensor(tensorInfo4x5);
+ armnn::ScopedCpuTensorHandle recurrentToForgetWeightsTensor(tensorInfo4x3);
+ armnn::ScopedCpuTensorHandle recurrentToInputWeightsTensor(tensorInfo4x3);
+ armnn::ScopedCpuTensorHandle recurrentToCellWeightsTensor(tensorInfo4x3);
+ armnn::ScopedCpuTensorHandle recurrentToOutputWeightsTensor(tensorInfo4x3);
+ armnn::ScopedCpuTensorHandle cellToInputWeightsTensor(tensorInfo4);
+ armnn::ScopedCpuTensorHandle inputGateBiasTensor(tensorInfo4);
+ armnn::ScopedCpuTensorHandle forgetGateBiasTensor(tensorInfo4);
+ armnn::ScopedCpuTensorHandle cellBiasTensor(tensorInfo4);
+ armnn::ScopedCpuTensorHandle outputGateBiasTensor(tensorInfo4);
+ armnn::ScopedCpuTensorHandle cellToForgetWeightsTensor(tensorInfo4);
+ armnn::ScopedCpuTensorHandle cellToOutputWeightsTensor(tensorInfo4);
+ armnn::ScopedCpuTensorHandle projectionWeightsTensor(tensorInfo3x4);
+ armnn::ScopedCpuTensorHandle projectionBiasTensor(tensorInfo3);
+
+ armnn::ScopedCpuTensorHandle inputLayerNormWeightsTensor(tensorInfo4);
+ armnn::ScopedCpuTensorHandle forgetLayerNormWeightsTensor(tensorInfo4);
+ armnn::ScopedCpuTensorHandle cellLayerNormWeightsTensor(tensorInfo4);
+ armnn::ScopedCpuTensorHandle outputLayerNormWeightsTensor(tensorInfo4);
+
+ AllocateAndCopyDataToITensorHandle(&inputToInputWeightsTensor, &inputToInputWeights[0][0]);
+ AllocateAndCopyDataToITensorHandle(&inputToForgetWeightsTensor, &inputToForgetWeights[0][0]);
+ AllocateAndCopyDataToITensorHandle(&inputToCellWeightsTensor, &inputToCellWeights[0][0]);
+ AllocateAndCopyDataToITensorHandle(&inputToOutputWeightsTensor, &inputToOutputWeights[0][0]);
+ AllocateAndCopyDataToITensorHandle(&recurrentToInputWeightsTensor, &recurrentToInputWeights[0][0]);
+ AllocateAndCopyDataToITensorHandle(&recurrentToForgetWeightsTensor, &recurrentToForgetWeights[0][0]);
+ AllocateAndCopyDataToITensorHandle(&recurrentToCellWeightsTensor, &recurrentToCellWeights[0][0]);
+ AllocateAndCopyDataToITensorHandle(&recurrentToOutputWeightsTensor, &recurrentToOutputWeights[0][0]);
+ AllocateAndCopyDataToITensorHandle(&cellToInputWeightsTensor, &cellToInputWeights[0]);
+ AllocateAndCopyDataToITensorHandle(&inputGateBiasTensor, &inputGateBias[0]);
+ AllocateAndCopyDataToITensorHandle(&forgetGateBiasTensor, &forgetGateBias[0]);
+ AllocateAndCopyDataToITensorHandle(&cellBiasTensor, &cellBias[0]);
+ AllocateAndCopyDataToITensorHandle(&outputGateBiasTensor, &outputGateBias[0]);
+ AllocateAndCopyDataToITensorHandle(&cellToForgetWeightsTensor, &cellToForgetWeights[0]);
+ AllocateAndCopyDataToITensorHandle(&cellToOutputWeightsTensor, &cellToOutputWeights[0]);
+ AllocateAndCopyDataToITensorHandle(&projectionWeightsTensor, &projectionWeights[0][0]);
+ AllocateAndCopyDataToITensorHandle(&projectionBiasTensor, &projectionBias[0]);
+
+ AllocateAndCopyDataToITensorHandle(&inputLayerNormWeightsTensor, &inputLayerNormWeights[0]);
+ AllocateAndCopyDataToITensorHandle(&forgetLayerNormWeightsTensor, &forgetLayerNormWeights[0]);
+ AllocateAndCopyDataToITensorHandle(&cellLayerNormWeightsTensor, &cellLayerNormWeights[0]);
+ AllocateAndCopyDataToITensorHandle(&outputLayerNormWeightsTensor, &outputLayerNormWeights[0]);
+
+ data.m_InputToInputWeights = &inputToInputWeightsTensor;
+ data.m_InputToForgetWeights = &inputToForgetWeightsTensor;
+ data.m_InputToCellWeights = &inputToCellWeightsTensor;
+ data.m_InputToOutputWeights = &inputToOutputWeightsTensor;
+ data.m_RecurrentToInputWeights = &recurrentToInputWeightsTensor;
+ data.m_RecurrentToForgetWeights = &recurrentToForgetWeightsTensor;
+ data.m_RecurrentToCellWeights = &recurrentToCellWeightsTensor;
+ data.m_RecurrentToOutputWeights = &recurrentToOutputWeightsTensor;
+ data.m_CellToInputWeights = &cellToInputWeightsTensor;
+ data.m_InputGateBias = &inputGateBiasTensor;
+ data.m_ForgetGateBias = &forgetGateBiasTensor;
+ data.m_CellBias = &cellBiasTensor;
+ data.m_OutputGateBias = &outputGateBiasTensor;
+ data.m_CellToForgetWeights = &cellToForgetWeightsTensor;
+ data.m_CellToOutputWeights = &cellToOutputWeightsTensor;
+ data.m_ProjectionWeights = &projectionWeightsTensor;
+ data.m_ProjectionBias = &projectionBiasTensor;
+
+ data.m_InputLayerNormWeights = &inputLayerNormWeightsTensor;
+ data.m_ForgetLayerNormWeights = &forgetLayerNormWeightsTensor;
+ data.m_CellLayerNormWeights = &cellLayerNormWeightsTensor;
+ data.m_OutputLayerNormWeights = &outputLayerNormWeightsTensor;
+
+ // Flags to set test configuration
+ data.m_Parameters.m_ActivationFunc = 4;
+ data.m_Parameters.m_CifgEnabled = false;
+ data.m_Parameters.m_PeepholeEnabled = true;
+ data.m_Parameters.m_ProjectionEnabled = true;
+ data.m_Parameters.m_LayerNormEnabled = true;
+
+
+ std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateLstm(data, info);
+ inputHandle->Allocate();
+ outputStateInHandle->Allocate();
+ cellStateInHandle->Allocate();
+
+ scratchHandle->Allocate();
+ outputStateOutHandle->Allocate();
+ cellStateOutHandle->Allocate();
+ outputHandle->Allocate();
+
+ CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0]);
+ CopyDataToITensorHandle(outputStateInHandle.get(), &outputStateInTensor[0][0]);
+ CopyDataToITensorHandle(cellStateInHandle.get(), &cellStateInTensor[0][0]);
+
+ workload->Execute();
+
+ CopyDataFromITensorHandle(&ret.output[0][0], outputHandle.get());
+
+ return ret;
+
+} \ No newline at end of file
diff --git a/src/backends/backendsCommon/test/WorkloadDataValidation.cpp b/src/backends/backendsCommon/test/WorkloadDataValidation.cpp
index 7c7af2ddce..c6960986b3 100644
--- a/src/backends/backendsCommon/test/WorkloadDataValidation.cpp
+++ b/src/backends/backendsCommon/test/WorkloadDataValidation.cpp
@@ -453,22 +453,139 @@ BOOST_AUTO_TEST_CASE(ReshapeQueueDescriptor_Validate_MismatchingNumElements)
BOOST_AUTO_TEST_CASE(LstmQueueDescriptor_Validate)
{
- armnn::TensorInfo inputTensorInfo;
- armnn::TensorInfo outputTensorInfo;
-
- unsigned int inputShape[] = { 1, 2 };
- unsigned int outputShape[] = { 1 };
-
- inputTensorInfo = armnn::TensorInfo(2, inputShape, armnn::DataType::Float32);
- outputTensorInfo = armnn::TensorInfo(1, outputShape, armnn::DataType::Float32);
-
- LstmQueueDescriptor invalidData;
- WorkloadInfo invalidInfo;
-
- AddInputToWorkload(invalidData, invalidInfo, inputTensorInfo, nullptr);
- AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr);
-
- BOOST_CHECK_THROW(invalidData.Validate(invalidInfo), armnn::InvalidArgumentException);
+ armnn::DataType dataType = armnn::DataType::Float32;
+
+ float qScale = 0.0f;
+ int32_t qOffset = 0;
+
+ unsigned int batchSize = 2;
+ unsigned int outputSize = 3;
+ unsigned int inputSize = 5;
+ unsigned numUnits = 4;
+
+ armnn::TensorInfo inputTensorInfo({batchSize , inputSize}, dataType, qScale, qOffset );
+ armnn::TensorInfo outputStateInTensorInfo({batchSize , outputSize}, dataType, qScale, qOffset);
+ armnn::TensorInfo cellStateInTensorInfo({batchSize , numUnits}, dataType, qScale, qOffset);
+
+ // Scratch buffer size with CIFG [batchSize, numUnits * 4]
+ armnn::TensorInfo scratchBufferTensorInfo({batchSize, numUnits * 4}, dataType, qScale, qOffset);
+ armnn::TensorInfo cellStateOutTensorInfo({batchSize, numUnits}, dataType, qScale, qOffset);
+ armnn::TensorInfo outputStateOutTensorInfo({batchSize, outputSize}, dataType, qScale, qOffset);
+ armnn::TensorInfo outputTensorInfo({batchSize, outputSize}, dataType, qScale, qOffset);
+
+ armnn::TensorInfo tensorInfo3({outputSize}, dataType, qScale, qOffset);
+ armnn::TensorInfo tensorInfo4({numUnits}, dataType, qScale, qOffset);
+ armnn::TensorInfo tensorInfo4x5({numUnits, inputSize}, dataType, qScale, qOffset);
+ armnn::TensorInfo tensorInfo4x3({numUnits, outputSize}, dataType, qScale, qOffset);
+ armnn::TensorInfo tensorInfo3x4({outputSize, numUnits}, dataType, qScale, qOffset);
+
+ LstmQueueDescriptor data;
+ WorkloadInfo info;
+
+ AddInputToWorkload(data, info, inputTensorInfo, nullptr);
+ AddInputToWorkload(data, info, outputStateInTensorInfo, nullptr);
+ AddInputToWorkload(data, info, cellStateInTensorInfo, nullptr);
+
+ AddOutputToWorkload(data, info, scratchBufferTensorInfo, nullptr);
+ AddOutputToWorkload(data, info, outputStateOutTensorInfo, nullptr);
+ AddOutputToWorkload(data, info, cellStateOutTensorInfo, nullptr);
+ // AddOutputToWorkload(data, info, outputTensorInfo, nullptr); is left out
+
+ armnn::ScopedCpuTensorHandle inputToInputWeightsTensor(tensorInfo4x5);
+ armnn::ScopedCpuTensorHandle inputToForgetWeightsTensor(tensorInfo4x5);
+ armnn::ScopedCpuTensorHandle inputToCellWeightsTensor(tensorInfo4x5);
+ armnn::ScopedCpuTensorHandle inputToOutputWeightsTensor(tensorInfo4x5);
+ armnn::ScopedCpuTensorHandle recurrentToForgetWeightsTensor(tensorInfo4x3);
+ armnn::ScopedCpuTensorHandle recurrentToInputWeightsTensor(tensorInfo4x3);
+ armnn::ScopedCpuTensorHandle recurrentToCellWeightsTensor(tensorInfo4x3);
+ armnn::ScopedCpuTensorHandle recurrentToOutputWeightsTensor(tensorInfo4x3);
+ armnn::ScopedCpuTensorHandle cellToInputWeightsTensor(tensorInfo4);
+ armnn::ScopedCpuTensorHandle inputGateBiasTensor(tensorInfo4);
+ armnn::ScopedCpuTensorHandle forgetGateBiasTensor(tensorInfo4);
+ armnn::ScopedCpuTensorHandle cellBiasTensor(tensorInfo4);
+ armnn::ScopedCpuTensorHandle outputGateBiasTensor(tensorInfo4);
+ armnn::ScopedCpuTensorHandle cellToForgetWeightsTensor(tensorInfo4);
+ armnn::ScopedCpuTensorHandle cellToOutputWeightsTensor(tensorInfo4);
+ armnn::ScopedCpuTensorHandle projectionWeightsTensor(tensorInfo3x4);
+ armnn::ScopedCpuTensorHandle projectionBiasTensor(tensorInfo3);
+ armnn::ScopedCpuTensorHandle inputLayerNormWeightsTensor(tensorInfo4);
+ armnn::ScopedCpuTensorHandle forgetLayerNormWeightsTensor(tensorInfo4);
+ armnn::ScopedCpuTensorHandle cellLayerNormWeightsTensor(tensorInfo4);
+ armnn::ScopedCpuTensorHandle outputLayerNormWeightsTensor(tensorInfo4);
+
+ data.m_InputToInputWeights = &inputToInputWeightsTensor;
+ data.m_InputToForgetWeights = &inputToForgetWeightsTensor;
+ data.m_InputToCellWeights = &inputToCellWeightsTensor;
+ data.m_InputToOutputWeights = &inputToOutputWeightsTensor;
+ data.m_RecurrentToInputWeights = &recurrentToInputWeightsTensor;
+ data.m_RecurrentToForgetWeights = &recurrentToForgetWeightsTensor;
+ data.m_RecurrentToCellWeights = &recurrentToCellWeightsTensor;
+ data.m_RecurrentToOutputWeights = &recurrentToOutputWeightsTensor;
+ data.m_CellToInputWeights = &cellToInputWeightsTensor;
+ data.m_InputGateBias = &inputGateBiasTensor;
+ data.m_ForgetGateBias = &forgetGateBiasTensor;
+ data.m_CellBias = &cellBiasTensor;
+ data.m_OutputGateBias = &outputGateBiasTensor;
+ data.m_CellToForgetWeights = &cellToForgetWeightsTensor;
+ data.m_CellToOutputWeights = &cellToOutputWeightsTensor;
+ data.m_ProjectionWeights = &projectionWeightsTensor;
+ data.m_ProjectionBias = &projectionBiasTensor;
+
+ data.m_InputLayerNormWeights = &inputLayerNormWeightsTensor;
+ data.m_ForgetLayerNormWeights = &forgetLayerNormWeightsTensor;
+ data.m_CellLayerNormWeights = &cellLayerNormWeightsTensor;
+ data.m_OutputLayerNormWeights = &outputLayerNormWeightsTensor;
+
+ // Flags to set test configuration
+ data.m_Parameters.m_ActivationFunc = 4;
+ data.m_Parameters.m_CifgEnabled = false;
+ data.m_Parameters.m_PeepholeEnabled = true;
+ data.m_Parameters.m_ProjectionEnabled = true;
+ data.m_Parameters.m_LayerNormEnabled = true;
+
+ // check wrong number of outputs
+ BOOST_CHECK_THROW(data.Validate(info), armnn::InvalidArgumentException);
+ AddOutputToWorkload(data, info, outputTensorInfo, nullptr);
+
+ // check wrong cifg parameter configuration
+ data.m_Parameters.m_CifgEnabled = true;
+ armnn::TensorInfo scratchBufferTensorInfo2({batchSize, numUnits * 3}, dataType, qScale, qOffset);
+ SetWorkloadOutput(data, info, 0, scratchBufferTensorInfo2, nullptr);
+ BOOST_CHECK_THROW(data.Validate(info), armnn::InvalidArgumentException);
+ data.m_Parameters.m_CifgEnabled = false;
+ SetWorkloadOutput(data, info, 0, scratchBufferTensorInfo, nullptr);
+
+ // check wrong inputGateBias configuration
+ data.m_InputGateBias = nullptr;
+ BOOST_CHECK_THROW(data.Validate(info), armnn::InvalidArgumentException);
+ data.m_InputGateBias = &inputGateBiasTensor;
+
+ // check inconsistant projection parameters
+ data.m_Parameters.m_ProjectionEnabled = false;
+ BOOST_CHECK_THROW(data.Validate(info), armnn::InvalidArgumentException);
+ data.m_Parameters.m_ProjectionEnabled = true;
+ data.m_ProjectionWeights = nullptr;
+ BOOST_CHECK_THROW(data.Validate(info), armnn::InvalidArgumentException);
+ data.m_ProjectionWeights = &projectionWeightsTensor;
+
+ // check missing input layer normalisation weights
+ data.m_InputLayerNormWeights = nullptr;
+ BOOST_CHECK_THROW(data.Validate(info), armnn::InvalidArgumentException);
+ data.m_InputLayerNormWeights = &inputLayerNormWeightsTensor;
+
+ // layer norm disabled but normalisation weights are present
+ data.m_Parameters.m_LayerNormEnabled = false;
+ BOOST_CHECK_THROW(data.Validate(info), armnn::InvalidArgumentException);
+ data.m_Parameters.m_LayerNormEnabled = true;
+
+ // check invalid outputTensor shape
+ armnn::TensorInfo incorrectOutputTensorInfo({batchSize, outputSize + 1}, dataType, qScale, qOffset);
+ SetWorkloadOutput(data, info, 3, incorrectOutputTensorInfo, nullptr);
+ BOOST_CHECK_THROW(data.Validate(info), armnn::InvalidArgumentException);
+ SetWorkloadOutput(data, info, 3, outputTensorInfo, nullptr);
+
+ // check correct configuration
+ BOOST_CHECK_NO_THROW(data.Validate(info));
}
BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/backends/cl/ClLayerSupport.cpp b/src/backends/cl/ClLayerSupport.cpp
index 12c2efebb8..ec134a16e8 100644
--- a/src/backends/cl/ClLayerSupport.cpp
+++ b/src/backends/cl/ClLayerSupport.cpp
@@ -420,7 +420,11 @@ bool ClLayerSupport::IsLstmSupported(const TensorInfo& input,
const TensorInfo* projectionBias,
const TensorInfo* cellToForgetWeights,
const TensorInfo* cellToOutputWeights,
- Optional<std::string&> reasonIfUnsupported) const
+ Optional<std::string&> reasonIfUnsupported,
+ const TensorInfo* inputLayerNormWeights,
+ const TensorInfo* forgetLayerNormWeights,
+ const TensorInfo* cellLayerNormWeights,
+ const TensorInfo* outputLayerNormWeights) const
{
FORWARD_WORKLOAD_VALIDATE_FUNC(ClLstmFloatWorkloadValidate,
reasonIfUnsupported,
diff --git a/src/backends/cl/ClLayerSupport.hpp b/src/backends/cl/ClLayerSupport.hpp
index f2f25af87d..4d0f5bdfbb 100644
--- a/src/backends/cl/ClLayerSupport.hpp
+++ b/src/backends/cl/ClLayerSupport.hpp
@@ -131,7 +131,11 @@ public:
const TensorInfo* projectionBias,
const TensorInfo* cellToForgetWeights,
const TensorInfo* cellToOutputWeights,
- Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+ Optional<std::string&> reasonIfUnsupported = EmptyOptional(),
+ const TensorInfo* inputLayerNormWeights = nullptr,
+ const TensorInfo* forgetLayerNormWeights = nullptr,
+ const TensorInfo* cellLayerNormWeights = nullptr,
+ const TensorInfo* outputLayerNormWeights = nullptr) const override;
bool IsMaximumSupported(const TensorInfo& input0,
const TensorInfo& input1,
diff --git a/src/backends/reference/RefLayerSupport.cpp b/src/backends/reference/RefLayerSupport.cpp
index b563badca5..3d260c5abd 100644
--- a/src/backends/reference/RefLayerSupport.cpp
+++ b/src/backends/reference/RefLayerSupport.cpp
@@ -861,7 +861,11 @@ bool RefLayerSupport::IsLstmSupported(const TensorInfo& input,
const TensorInfo* projectionBias,
const TensorInfo* cellToForgetWeights,
const TensorInfo* cellToOutputWeights,
- Optional<std::string&> reasonIfUnsupported) const
+ Optional<std::string&> reasonIfUnsupported,
+ const TensorInfo* inputLayerNormWeights,
+ const TensorInfo* forgetLayerNormWeights,
+ const TensorInfo* cellLayerNormWeights,
+ const TensorInfo* outputLayerNormWeights) const
{
ignore_unused(descriptor);
ignore_unused(inputToForgetWeights);
@@ -881,6 +885,10 @@ bool RefLayerSupport::IsLstmSupported(const TensorInfo& input,
ignore_unused(projectionBias);
ignore_unused(cellToForgetWeights);
ignore_unused(cellToOutputWeights);
+ ignore_unused(inputLayerNormWeights);
+ ignore_unused(forgetLayerNormWeights);
+ ignore_unused(cellLayerNormWeights);
+ ignore_unused(outputLayerNormWeights);
bool supported = true;
diff --git a/src/backends/reference/RefLayerSupport.hpp b/src/backends/reference/RefLayerSupport.hpp
index 22b007b378..ead4d1ce4a 100644
--- a/src/backends/reference/RefLayerSupport.hpp
+++ b/src/backends/reference/RefLayerSupport.hpp
@@ -155,7 +155,11 @@ public:
const TensorInfo* projectionBias,
const TensorInfo* cellToForgetWeights,
const TensorInfo* cellToOutputWeights,
- Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+ Optional<std::string&> reasonIfUnsupported = EmptyOptional(),
+ const TensorInfo* inputLayerNormWeights = nullptr,
+ const TensorInfo* forgetLayerNormWeights = nullptr,
+ const TensorInfo* cellLayerNormWeights = nullptr,
+ const TensorInfo* outputLayerNormWeights = nullptr) const override;
bool IsMaximumSupported(const TensorInfo& input0,
const TensorInfo& input1,
diff --git a/src/backends/reference/backend.mk b/src/backends/reference/backend.mk
index 12e57740e7..a736a889a5 100644
--- a/src/backends/reference/backend.mk
+++ b/src/backends/reference/backend.mk
@@ -22,6 +22,7 @@ BACKEND_SOURCES := \
workloads/ElementwiseFunction.cpp \
workloads/FullyConnected.cpp \
workloads/Gather.cpp \
+ workloads/LstmUtils.cpp \
workloads/Mean.cpp \
workloads/Concatenate.cpp \
workloads/Pad.cpp \
diff --git a/src/backends/reference/test/RefLayerTests.cpp b/src/backends/reference/test/RefLayerTests.cpp
index 7797f17a22..9f89c8c2e2 100644
--- a/src/backends/reference/test/RefLayerTests.cpp
+++ b/src/backends/reference/test/RefLayerTests.cpp
@@ -827,6 +827,17 @@ ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet2, PermuteFloat32ValueSet2Test)
ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet3, PermuteFloat32ValueSet3Test)
// Lstm
+BOOST_AUTO_TEST_CASE(LstmUtilsZeroVector) {
+ LstmUtilsZeroVectorTest(); }
+BOOST_AUTO_TEST_CASE(LstmUtilsMeanStddevNormalization) {
+ LstmUtilsMeanStddevNormalizationNoneZeroInputTest();
+ LstmUtilsMeanStddevNormalizationAllZeroInputTest();
+ LstmUtilsMeanStddevNormalizationMixedZeroInputTest(); }
+BOOST_AUTO_TEST_CASE(LstmUtilsVectorBatchVectorCwiseProduct) {
+ LstmUtilsVectorBatchVectorCwiseProductTest(); }
+BOOST_AUTO_TEST_CASE(LstmUtilsVectorBatchVectorAdd) {
+ LstmUtilsVectorBatchVectorAddTest(); }
+
ARMNN_AUTO_TEST_CASE(LstmLayerFloat32WithCifgWithPeepholeNoProjection,
LstmLayerFloat32WithCifgWithPeepholeNoProjectionTest)
ARMNN_AUTO_TEST_CASE(LstmLayerFloat32NoCifgNoPeepholeNoProjection,
@@ -834,6 +845,9 @@ ARMNN_AUTO_TEST_CASE(LstmLayerFloat32NoCifgNoPeepholeNoProjection,
ARMNN_AUTO_TEST_CASE(LstmLayerFloat32NoCifgWithPeepholeWithProjection,
LstmLayerFloat32NoCifgWithPeepholeWithProjectionTest)
+ARMNN_AUTO_TEST_CASE(LstmLayerFloat32NoCifgWithPeepholeWithProjectionWithLayerNorm,
+ LstmLayerFloat32NoCifgWithPeepholeWithProjectionWithLayerNormTest)
+
ARMNN_AUTO_TEST_CASE(LstmLayerInt16NoCifgNoPeepholeNoProjection,
LstmLayerInt16NoCifgNoPeepholeNoProjectionTest)
ARMNN_AUTO_TEST_CASE(LstmLayerInt16WithCifgWithPeepholeNoProjection,
diff --git a/src/backends/reference/workloads/CMakeLists.txt b/src/backends/reference/workloads/CMakeLists.txt
index 3c0af01c00..696605d662 100644
--- a/src/backends/reference/workloads/CMakeLists.txt
+++ b/src/backends/reference/workloads/CMakeLists.txt
@@ -28,6 +28,7 @@ list(APPEND armnnRefBackendWorkloads_sources
Gather.cpp
Gather.hpp
LstmUtils.hpp
+ LstmUtils.cpp
Maximum.hpp
Mean.cpp
Mean.hpp
diff --git a/src/backends/reference/workloads/LstmUtils.cpp b/src/backends/reference/workloads/LstmUtils.cpp
new file mode 100644
index 0000000000..f197aae291
--- /dev/null
+++ b/src/backends/reference/workloads/LstmUtils.cpp
@@ -0,0 +1,307 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+//#pragma once
+
+#include "LstmUtils.hpp"
+#include "BaseIterator.hpp"
+#include <backendsCommon/CpuTensorHandle.hpp>
+
+
+// Helper functions ported from the Android code base
+// Refer to: android/external/tensorflow/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.cc
+
+void VectorBatchVectorAdd(armnn::Decoder<float>& vector,
+ uint32_t vSize,
+ armnn::Decoder<float>& batchVector,
+ uint32_t nBatch,
+ armnn::Encoder<float>& outResult )
+{
+ for (uint32_t b = 0; b < nBatch; b++)
+ {
+ for (uint32_t v = 0; v < vSize; v++)
+ {
+ outResult.Set(batchVector.Get() + vector.Get());
+ ++outResult;
+ ++vector;
+ ++batchVector;
+ }
+ vector -= vSize;
+ }
+ batchVector -= vSize * nBatch;
+ outResult -= vSize * nBatch;
+}
+
+
+// Layer norm for each batch.
+// normalization_epsilon is added to avoid divergence.
+void MeanStddevNormalization(armnn::Decoder<float>& input_vector,
+ armnn::Encoder<float>& output_vector,
+ uint32_t v_size,
+ uint32_t n_batch,
+ float normalization_epsilon)
+{
+ for (uint32_t batch = 0; batch < n_batch; ++batch) {
+ float sum = 0.0f;
+ float sum_sq = 0.0f;
+ for (uint32_t i = 0; i < v_size; ++i) {
+ sum += input_vector.Get();
+ sum_sq += input_vector.Get() * input_vector.Get();
+ ++input_vector;
+ }
+ input_vector -= v_size;
+
+ const float mean = sum / static_cast<float>(v_size);
+ float stddev_inv = 0.0f;
+ const float variance = sum_sq / static_cast<float>(v_size) - mean * mean;
+ if (variance == 0) {
+ stddev_inv = 1.0f / std::sqrt(normalization_epsilon);
+ } else {
+ stddev_inv = 1.0f / std::sqrt(variance);
+ }
+
+ for (uint32_t i = 0; i < v_size; ++i) {
+ output_vector.Set((input_vector.Get() - mean) * stddev_inv);
+ ++output_vector;
+ ++input_vector;
+ }
+ // Don't reset iterator to handle next batch
+ }
+ output_vector -= v_size * n_batch;
+ input_vector -= v_size * n_batch;
+}
+
+void ZeroVector(armnn::Encoder<float>& vector,
+ uint32_t vSize)
+{
+ for (uint32_t v = 0; v < vSize; v++)
+ {
+ vector.Set(0.0f);
+ ++vector;
+ }
+ vector -= vSize;
+}
+
+void MatrixBatchVectorMultiplyAccumulate(armnn::Decoder<float>& matrix,
+ uint32_t mRows,
+ uint32_t mCols,
+ armnn::Decoder<float>& vector,
+ uint32_t nBatch,
+ armnn::Encoder<float>& outResult)
+{
+ for (uint32_t b = 0; b < nBatch; b++)
+ {
+ for (uint32_t r = 0; r < mRows; r++)
+ {
+ vector += b * mCols;
+ for (uint32_t c = 0; c < mCols; c++)
+ {
+ outResult.Set(outResult.Get() + matrix.Get() * vector.Get());
+ ++matrix;
+ ++vector;
+ }
+ outResult += 1;
+ vector -= (b+1) * mCols;
+ }
+ matrix -= (mRows * mCols);
+ }
+ outResult -= (mRows * nBatch);
+}
+
+void VectorBatchVectorAssign(armnn::Decoder<float>& vector,
+ uint32_t vSize,
+ uint32_t nBatch,
+ armnn::Encoder<float>& outBatchVector)
+{
+ for (uint32_t b = 0; b < nBatch; b++)
+ {
+ for (uint32_t v = 0; v < vSize; v++)
+ {
+ outBatchVector.Set(vector.Get());
+ ++outBatchVector;
+ ++vector;
+ }
+ vector -= vSize;
+ }
+ outBatchVector -= (nBatch * vSize);
+}
+
+void VectorBatchVectorCwiseProductAccumulate(armnn::Decoder<float>& vector,
+ uint32_t vSize,
+ armnn::Decoder<float>& batchVector,
+ uint32_t nBatch,
+ armnn::Encoder<float>& outResult)
+{
+ for (uint32_t b = 0; b < nBatch; b++)
+ {
+ for (uint32_t v = 0; v < vSize; v++)
+ {
+ outResult.Set(outResult.Get() + vector.Get() * batchVector.Get());
+ ++outResult;
+ ++vector;
+ ++batchVector;
+ }
+ vector -= vSize;
+ }
+ batchVector -= vSize * nBatch;
+ outResult -= vSize * nBatch;
+}
+
+void VectorBatchVectorCwiseProduct(armnn::Decoder<float>& vector,
+ uint32_t vSize,
+ armnn::Decoder<float>& batchVector,
+ uint32_t nBatch,
+ armnn::Encoder<float>& outResult)
+{
+ for (uint32_t b = 0; b < nBatch; b++)
+ {
+ for (uint32_t v = 0; v < vSize; v++)
+ {
+ outResult.Set(vector.Get() * batchVector.Get());
+ ++outResult;
+ ++vector;
+ ++batchVector;
+ }
+ vector -= vSize;
+ }
+ batchVector -= vSize * nBatch;
+ outResult -= vSize * nBatch;
+}
+
+void Sub1Vector(armnn::Decoder<float>& vector,
+ uint32_t vSize,
+ armnn::Encoder<float>& result)
+{
+ for (uint32_t v = 0; v < vSize; v++)
+ {
+ result.Set(1.0f - vector.Get());
+ ++vector;
+ ++result;
+ }
+ vector -= vSize;
+ result -= vSize;
+}
+
+void VectorVectorCwiseProduct(armnn::Decoder<float>& vector1,
+ armnn::Decoder<float>& vector2,
+ uint32_t vSize,
+ armnn::Encoder<float>& outResult)
+{
+ for (uint32_t v = 0; v < vSize; v++)
+ {
+ outResult.Set(vector1.Get() * vector2.Get());
+ ++outResult;
+ ++vector1;
+ ++vector2;
+ }
+ outResult -= vSize;
+ vector1 -= vSize;
+ vector2 -= vSize;
+}
+
+void VectorVectorCwiseProductAccumulate(armnn::Decoder<float>& vector1,
+ armnn::Decoder<float>& vector2,
+ uint32_t vSize,
+ armnn::Encoder<float>& outResult)
+{
+ for (uint32_t v = 0; v < vSize; v++)
+ {
+ outResult.Set(outResult.Get() + vector1.Get() * vector2.Get());
+ ++outResult;
+ ++vector1;
+ ++vector2;
+ }
+ outResult -= vSize;
+ vector1 -= vSize;
+ vector2 -= vSize;
+}
+
+float Clip(float f,
+ float absLimit)
+{
+ float result = (absLimit < f) ? absLimit : f;
+ result = (-absLimit > result) ? -absLimit : result;
+ return result;
+}
+
+void ClipVector(armnn::Decoder<float>& vector,
+ uint32_t vSize,
+ float absLimit,
+ armnn::Encoder<float>& outResult)
+{
+ for (uint32_t v = 0; v < vSize; v++)
+ {
+ outResult.Set(Clip(vector.Get(), absLimit));
+ ++vector;
+ ++outResult;
+ }
+ vector -= vSize;
+ outResult -= vSize;
+}
+
+void CopyVector(armnn::Decoder<float>& vector,
+ uint32_t vSize,
+ armnn::Encoder<float>& outResult)
+{
+ for (uint32_t v = 0; v < vSize; v++)
+ {
+ outResult.Set(vector.Get());
+ ++outResult;
+ ++vector;
+ }
+ outResult -= vSize;
+ vector -= vSize;
+}
+
+void SetActivationParameters(uint32_t activation,
+ armnn::ActivationFunction& outArmnnActivation,
+ float& outA,
+ float& outB)
+{
+ switch (activation)
+ {
+ case 0: // None
+ outA = 0;
+ outB = 0;
+ return;
+
+ case 1: // Relu
+ outArmnnActivation = armnn::ActivationFunction::ReLu;
+ outA = 0;
+ outB = 0;
+ return;
+
+ case 3: // Relu6
+ outArmnnActivation = armnn::ActivationFunction::BoundedReLu;
+ outA = 6;
+ outB = 0;
+ return;
+
+ case 4: // Tanh
+ outArmnnActivation = armnn::ActivationFunction::TanH;
+ outA = 1;
+ outB = 1;
+ return;
+
+ case 6: // Sigmoid
+ outArmnnActivation = armnn::ActivationFunction::Sigmoid;
+ outA = 0;
+ outB = 0;
+ return;
+
+ default:
+ throw armnn::Exception("Unsupported activation function: " + std::to_string(activation));
+ }
+}
+
+std::unique_ptr<armnn::ScopedCpuTensorHandle> AssignScopedCpuTensorHandle(const armnn::ConstCpuTensorHandle* ptr)
+{
+ if (!ptr)
+ {
+ return nullptr;
+ }
+
+ return std::make_unique<armnn::ScopedCpuTensorHandle>(*ptr);
+}
diff --git a/src/backends/reference/workloads/LstmUtils.hpp b/src/backends/reference/workloads/LstmUtils.hpp
index db02a84a45..f6aff8b69f 100644
--- a/src/backends/reference/workloads/LstmUtils.hpp
+++ b/src/backends/reference/workloads/LstmUtils.hpp
@@ -8,211 +8,81 @@
#include "BaseIterator.hpp"
#include <backendsCommon/CpuTensorHandle.hpp>
-namespace
-{
-
// Helper functions ported from the Android code base
// Refer to: android/external/tensorflow/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.cc
+
+void VectorBatchVectorAdd(armnn::Decoder<float>& vector,
+ uint32_t vSize,
+ armnn::Decoder<float>& batchVector,
+ uint32_t nBatch,
+ armnn::Encoder<float>& outResult );
+
+// Layer norm for each batch.
+// normalization_epsilon is added to avoid divergence.
+void MeanStddevNormalization(armnn::Decoder<float>& input_vector,
+ armnn::Encoder<float>& output_vector,
+ uint32_t v_size,
+ uint32_t n_batch,
+ float normalization_epsilon);
+
+void ZeroVector(armnn::Encoder<float>& vector,
+ uint32_t vSize);
+
void MatrixBatchVectorMultiplyAccumulate(armnn::Decoder<float>& matrix,
uint32_t mRows,
uint32_t mCols,
armnn::Decoder<float>& vector,
uint32_t nBatch,
- armnn::Encoder<float>& outResult)
-{
- for (uint32_t b = 0; b < nBatch; b++)
- {
- for (uint32_t r = 0; r < mRows; r++)
- {
- vector += b * mCols;
- for (uint32_t c = 0; c < mCols; c++)
- {
- outResult.Set(outResult.Get() + matrix.Get() * vector.Get());
- ++matrix;
- ++vector;
- }
- outResult += 1;
- vector -= (b+1) * mCols;
- }
- matrix -= (mRows * mCols);
- }
- outResult -= (mRows * nBatch);
-}
+ armnn::Encoder<float>& outResult);
void VectorBatchVectorAssign(armnn::Decoder<float>& vector,
uint32_t vSize,
uint32_t nBatch,
- armnn::Encoder<float>& outBatchVector)
-{
- for (uint32_t b = 0; b < nBatch; b++)
- {
- for (uint32_t v = 0; v < vSize; v++)
- {
- outBatchVector.Set(vector.Get());
- ++outBatchVector;
- ++vector;
- }
- vector -= vSize;
- }
- outBatchVector -= (nBatch * vSize);
-}
+ armnn::Encoder<float>& outBatchVector);
void VectorBatchVectorCwiseProductAccumulate(armnn::Decoder<float>& vector,
uint32_t vSize,
armnn::Decoder<float>& batchVector,
uint32_t nBatch,
- armnn::Encoder<float>& outResult)
-{
- for (uint32_t b = 0; b < nBatch; b++)
- {
- for (uint32_t v = 0; v < vSize; v++)
- {
- outResult.Set(outResult.Get() + vector.Get() * batchVector.Get());
- ++outResult;
- ++vector;
- ++batchVector;
- }
- vector -= vSize;
- }
- batchVector -= vSize * nBatch;
- outResult -= vSize * nBatch;
-}
+ armnn::Encoder<float>& outResult);
+
+void VectorBatchVectorCwiseProduct(armnn::Decoder<float>& vector,
+ uint32_t vSize,
+ armnn::Decoder<float>& batchVector,
+ uint32_t nBatch,
+ armnn::Encoder<float>& outResult);
void Sub1Vector(armnn::Decoder<float>& vector,
uint32_t vSize,
- armnn::Encoder<float>& result)
-{
- for (uint32_t v = 0; v < vSize; v++)
- {
- result.Set(1.0f - vector.Get());
- ++vector;
- ++result;
- }
- vector -= vSize;
- result -= vSize;
-}
+ armnn::Encoder<float>& result);
+
void VectorVectorCwiseProduct(armnn::Decoder<float>& vector1,
armnn::Decoder<float>& vector2,
uint32_t vSize,
- armnn::Encoder<float>& outResult)
-{
- for (uint32_t v = 0; v < vSize; v++)
- {
- outResult.Set(vector1.Get() * vector2.Get());
- ++outResult;
- ++vector1;
- ++vector2;
- }
- outResult -= vSize;
- vector1 -= vSize;
- vector2 -= vSize;
-}
+ armnn::Encoder<float>& outResult);
void VectorVectorCwiseProductAccumulate(armnn::Decoder<float>& vector1,
armnn::Decoder<float>& vector2,
uint32_t vSize,
- armnn::Encoder<float>& outResult)
-{
- for (uint32_t v = 0; v < vSize; v++)
- {
- outResult.Set(outResult.Get() + vector1.Get() * vector2.Get());
- ++outResult;
- ++vector1;
- ++vector2;
- }
- outResult -= vSize;
- vector1 -= vSize;
- vector2 -= vSize;
-}
+ armnn::Encoder<float>& outResult);
float Clip(float f,
- float absLimit)
-{
- float result = (absLimit < f) ? absLimit : f;
- result = (-absLimit > result) ? -absLimit : result;
- return result;
-}
+ float absLimit);
void ClipVector(armnn::Decoder<float>& vector,
uint32_t vSize,
float absLimit,
- armnn::Encoder<float>& outResult)
-{
- for (uint32_t v = 0; v < vSize; v++)
- {
- outResult.Set(Clip(vector.Get(), absLimit));
- ++vector;
- ++outResult;
- }
- vector -= vSize;
- outResult -= vSize;
-}
+ armnn::Encoder<float>& outResult);
void CopyVector(armnn::Decoder<float>& vector,
uint32_t vSize,
- armnn::Encoder<float>& outResult)
-{
- for (uint32_t v = 0; v < vSize; v++)
- {
- outResult.Set(vector.Get());
- ++outResult;
- ++vector;
- }
- outResult -= vSize;
- vector -= vSize;
-}
+ armnn::Encoder<float>& outResult);
void SetActivationParameters(uint32_t activation,
armnn::ActivationFunction& outArmnnActivation,
float& outA,
- float& outB)
-{
- switch (activation)
- {
- case 0: // None
- outA = 0;
- outB = 0;
- return;
-
- case 1: // Relu
- outArmnnActivation = armnn::ActivationFunction::ReLu;
- outA = 0;
- outB = 0;
- return;
-
- case 3: // Relu6
- outArmnnActivation = armnn::ActivationFunction::BoundedReLu;
- outA = 6;
- outB = 0;
- return;
-
- case 4: // Tanh
- outArmnnActivation = armnn::ActivationFunction::TanH;
- outA = 1;
- outB = 1;
- return;
-
- case 6: // Sigmoid
- outArmnnActivation = armnn::ActivationFunction::Sigmoid;
- outA = 0;
- outB = 0;
- return;
-
- default:
- throw armnn::Exception("Unsupported activation function: " + std::to_string(activation));
- }
-}
-
-std::unique_ptr<armnn::ScopedCpuTensorHandle> AssignScopedCpuTensorHandle(const armnn::ConstCpuTensorHandle* ptr)
-{
- if (!ptr)
- {
- return nullptr;
- }
-
- return std::make_unique<armnn::ScopedCpuTensorHandle>(*ptr);
-}
-
-} // anonymous namespace
+ float& outB);
+
+std::unique_ptr<armnn::ScopedCpuTensorHandle> AssignScopedCpuTensorHandle(const armnn::ConstCpuTensorHandle* ptr);
diff --git a/src/backends/reference/workloads/RefLstmWorkload.cpp b/src/backends/reference/workloads/RefLstmWorkload.cpp
index f8ebc58f6e..70b3443d88 100644
--- a/src/backends/reference/workloads/RefLstmWorkload.cpp
+++ b/src/backends/reference/workloads/RefLstmWorkload.cpp
@@ -32,6 +32,10 @@ RefLstmWorkload::RefLstmWorkload(const LstmQueueDescriptor &descriptor, const Wo
, m_OutputGateBiasTensor (AssignScopedCpuTensorHandle(descriptor.m_OutputGateBias))
, m_ProjectionWeightsTensor (AssignScopedCpuTensorHandle(descriptor.m_ProjectionWeights))
, m_ProjectionBiasTensor (AssignScopedCpuTensorHandle(descriptor.m_ProjectionBias))
+ , m_InputLayerNormWeights (AssignScopedCpuTensorHandle(descriptor.m_InputLayerNormWeights))
+ , m_ForgetLayerNormWeights (AssignScopedCpuTensorHandle(descriptor.m_ForgetLayerNormWeights))
+ , m_CellLayerNormWeights (AssignScopedCpuTensorHandle(descriptor.m_CellLayerNormWeights))
+ , m_OutputLayerNormWeights (AssignScopedCpuTensorHandle(descriptor.m_OutputLayerNormWeights))
{}
void RefLstmWorkload::Execute() const
@@ -62,8 +66,9 @@ void RefLstmWorkload::Execute() const
const uint32_t nCell = m_InputToOutputWeightsTensor->GetShape()[0];
const uint32_t nOutput = m_RecurrentToOutputWeightsTensor->GetShape()[1];
- const bool useCifg = m_Data.m_Parameters.m_CifgEnabled;
- const bool usePeephole = m_Data.m_Parameters.m_PeepholeEnabled;
+ const bool useCifg = m_Data.m_Parameters.m_CifgEnabled;
+ const bool usePeephole = m_Data.m_Parameters.m_PeepholeEnabled;
+ const bool useLayerNorm = m_Data.m_Parameters.m_LayerNormEnabled;
// Index the scratch buffers pointers to the global scratch buffer.
std::unique_ptr<Encoder<float>> inputGateScratch = MakeEncoder<float>(outputInfo, m_Data.m_Outputs[0]->Map());
@@ -134,6 +139,26 @@ void RefLstmWorkload::Execute() const
std::unique_ptr<Decoder<float>> projectionWeightsTensor;
std::unique_ptr<Decoder<float>> projectionBiasTensor;
+ std::unique_ptr<Decoder<float>> inputLayerNormWeights;
+ std::unique_ptr<Decoder<float>> forgetLayerNormWeights;
+ std::unique_ptr<Decoder<float>> cellLayerNormWeights;
+ std::unique_ptr<Decoder<float>> outputLayerNormWeights;
+
+ if (useLayerNorm)
+ {
+ if (!useCifg)
+ {
+ inputLayerNormWeights = MakeDecoder<float>(
+ m_InputLayerNormWeights->GetTensorInfo(), m_InputLayerNormWeights->GetTensor<void>());
+ }
+ forgetLayerNormWeights = MakeDecoder<float>(
+ m_ForgetLayerNormWeights->GetTensorInfo(), m_ForgetLayerNormWeights->GetTensor<void>());
+ cellLayerNormWeights = MakeDecoder<float>(
+ m_CellLayerNormWeights->GetTensorInfo(), m_CellLayerNormWeights->GetTensor<void>());
+ outputLayerNormWeights = MakeDecoder<float>(
+ m_OutputLayerNormWeights->GetTensorInfo(), m_OutputLayerNormWeights->GetTensor<void>());
+ }
+
if (!useCifg)
{
inputToInputWeightsTensor = MakeDecoder<float>(
@@ -169,18 +194,32 @@ void RefLstmWorkload::Execute() const
}
}
- // Initialize scratch buffers with bias.
- if (!useCifg)
+ if (!useLayerNorm)
{
- VectorBatchVectorAssign(*inputGateBiasTensor,
- nCell, nBatch, *inputGateScratch);
+ // Initialize scratch buffers with bias.
+ if (!useCifg)
+ {
+ VectorBatchVectorAssign(*inputGateBiasTensor,
+ nCell, nBatch, *inputGateScratch);
+ }
+ VectorBatchVectorAssign(*forgetGateBiasTensor,
+ nCell, nBatch, *forgetGateScratch);
+ VectorBatchVectorAssign(*cellBiasTensor,
+ nCell, nBatch, *cellScratch);
+ VectorBatchVectorAssign(*outputGateBiasTensor,
+ nCell, nBatch, *outputGateScratch);
+ }
+ else
+ {
+ // Initialize scratch buffers with zeroes.
+ if (!useCifg)
+ {
+ ZeroVector(*inputGateScratch, nCell * nBatch);
+ }
+ ZeroVector(*forgetGateScratch, nCell * nBatch);
+ ZeroVector(*cellScratch , nCell * nBatch);
+ ZeroVector(*outputGateScratch, nCell * nBatch);
}
- VectorBatchVectorAssign(*forgetGateBiasTensor,
- nCell, nBatch, *forgetGateScratch);
- VectorBatchVectorAssign(*cellBiasTensor,
- nCell, nBatch, *cellScratch);
- VectorBatchVectorAssign(*outputGateBiasTensor,
- nCell, nBatch, *outputGateScratch);
// For each batch and cell: compute input_weight * input.
if (!useCifg)
@@ -216,6 +255,15 @@ void RefLstmWorkload::Execute() const
VectorBatchVectorCwiseProductAccumulate(*cellToInputWeightsTensor,
nCell, *cellStateIn, nBatch, *inputGateScratch);
}
+ if (useLayerNorm)
+ {
+ MeanStddevNormalization(*inputGateScratchDecoder,
+ *inputGateScratch, nCell, nBatch, m_LayerNormEpsilon);
+ VectorBatchVectorCwiseProduct(*inputLayerNormWeights,
+ nCell, *inputGateScratchDecoder, nBatch, *inputGateScratch);
+ VectorBatchVectorAdd(*inputGateBiasTensor,
+ nCell, *inputGateScratchDecoder, nBatch, *inputGateScratch);
+ }
Activation(*inputGateScratchDecoder, *inputGateScratch,
TensorInfo({nCell, nBatch}, outputType),
ActivationFunction::Sigmoid, 0, 0);
@@ -227,11 +275,30 @@ void RefLstmWorkload::Execute() const
VectorBatchVectorCwiseProductAccumulate(*cellToForgetWeightsTensor, nCell,
*cellStateIn, nBatch, *forgetGateScratch);
}
+ if (useLayerNorm)
+ {
+ MeanStddevNormalization(*forgetGateScratchDecoder,
+ *forgetGateScratch, nCell, nBatch, m_LayerNormEpsilon);
+ VectorBatchVectorCwiseProduct(*forgetLayerNormWeights,
+ nCell, *forgetGateScratchDecoder, nBatch, *forgetGateScratch);
+ VectorBatchVectorAdd(*forgetGateBiasTensor,
+ nCell, *forgetGateScratchDecoder, nBatch, *forgetGateScratch);
+ }
Activation(*forgetGateScratchDecoder, *forgetGateScratch,
TensorInfo({nCell, nBatch}, outputType),
ActivationFunction::Sigmoid, 0, 0);
// For each batch and cell: update the cell.
+ if (useLayerNorm)
+ {
+ MeanStddevNormalization(*cellScratchDecoder,
+ *cellScratch, nCell, nBatch, m_LayerNormEpsilon);
+ VectorBatchVectorCwiseProduct(*cellLayerNormWeights,
+ nCell, *cellScratchDecoder, nBatch, *cellScratch);
+ VectorBatchVectorAdd(*cellBiasTensor,
+ nCell, *cellScratchDecoder, nBatch, *cellScratch);
+ }
+
VectorVectorCwiseProduct(*forgetGateScratchDecoder, *cellStateIn, nBatch * nCell, *cellStateOut);
ActivationFunction armnnActivationFunc = ActivationFunction::Sigmoid;
@@ -267,6 +334,15 @@ void RefLstmWorkload::Execute() const
VectorBatchVectorCwiseProductAccumulate(*cellToOutputWeightsTensor,
nCell, *cellStateOutDecoder, nBatch, *outputGateScratch);
}
+ if (useLayerNorm)
+ {
+ MeanStddevNormalization(*outputGateScratchDecoder,
+ *outputGateScratch, nCell, nBatch, m_LayerNormEpsilon);
+ VectorBatchVectorCwiseProduct(*outputLayerNormWeights,
+ nCell, *outputGateScratchDecoder, nBatch, *outputGateScratch);
+ VectorBatchVectorAdd(*outputGateBiasTensor,
+ nCell, *outputGateScratchDecoder, nBatch, *outputGateScratch);
+ }
Activation(*outputGateScratchDecoder, *outputGateScratch,
TensorInfo({nCell, nBatch}, outputType),
ActivationFunction::Sigmoid, 0, 0);
diff --git a/src/backends/reference/workloads/RefLstmWorkload.hpp b/src/backends/reference/workloads/RefLstmWorkload.hpp
index 38e3fb956c..ce5a775269 100644
--- a/src/backends/reference/workloads/RefLstmWorkload.hpp
+++ b/src/backends/reference/workloads/RefLstmWorkload.hpp
@@ -38,6 +38,12 @@ private:
std::unique_ptr<ScopedCpuTensorHandle> m_OutputGateBiasTensor;
std::unique_ptr<ScopedCpuTensorHandle> m_ProjectionWeightsTensor;
std::unique_ptr<ScopedCpuTensorHandle> m_ProjectionBiasTensor;
+ std::unique_ptr<ScopedCpuTensorHandle> m_InputLayerNormWeights;
+ std::unique_ptr<ScopedCpuTensorHandle> m_ForgetLayerNormWeights;
+ std::unique_ptr<ScopedCpuTensorHandle> m_CellLayerNormWeights;
+ std::unique_ptr<ScopedCpuTensorHandle> m_OutputLayerNormWeights;
+
+ float m_LayerNormEpsilon = static_cast<float>(1e-8);
};
} //namespace armnn