ArmNN
 20.05
QLstmEndToEndTestImpl.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2020 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
7 
8 #include "CommonTestUtils.hpp"
9 #include "EndToEndTestImpl.hpp"
10 
11 #include <armnn/INetwork.hpp>
12 #include <armnn/LstmParams.hpp>
13 
14 #include <boost/test/unit_test.hpp>
15 
16 namespace
17 {
18 
19 // Checks if two values of an arithmetic type are close enough to each other
20 // with regard to a given tolerance value.
21 template<typename T>
22 typename std::enable_if<std::is_arithmetic<T>::value, bool>::type
23 IsCloseEnough(T value1, T value2, T tolerance)
24 {
25  if (tolerance < 0)
26  {
27  throw armnn::InvalidArgumentException("Tolerance cannot be < 0");
28  }
29 
30  T diff = value1 >= value2 ? static_cast<T>(value1 - value2) : static_cast<T>(value2 - value1);
31  return diff <= tolerance;
32 }
33 
34 } // anonymous namespace
35 
36 void QLstmEndToEnd(const std::vector<armnn::BackendId>& backends)
37 {
38  const unsigned int numBatches = 2;
39  const unsigned int inputSize = 5;
40  const unsigned int outputSize = 4;
41  const unsigned int numUnits = 4;
42 
43  bool cifgEnabled = true;
44  bool peepholeEnabled = false;
45  bool projectionEnabled = false;
46  bool layerNormEnabled = true;
47 
48  // Scale/Offset quantization info
49  const float inputScale = 0.0078125f;
50  const int32_t inputOffset = 0;
51 
52  const int32_t hiddenStateZeroPoint = 0;
53  const float hiddenStateScale = 0.007f;
54 
55  // if (!projectionEnabled) outputScale == hiddenStateScale
56  const float outputScale = hiddenStateScale;
57  const int32_t outputOffset = hiddenStateZeroPoint;
58 
59  const float cellStateScale = 3.05176e-05f;
60  const int32_t cellStateOffset = 0;
61 
62  const float weightsScale = 0.00784314f;
63  const int32_t weightsOffset = 0;
64 
65  const float layerNormScale = 3.05182e-05f;
66  const int32_t layerNormOffset = 0;
67 
68  const float biasScale = layerNormScale / 1024;
69  const int32_t biasOffset = 0;
70 
71  const float inputIntermediateScale = 0.007059f;
72  const float forgetIntermediateScale = 0.007812f;
73  const float cellIntermediateScale = inputIntermediateScale;
74  const float outputIntermediateScale = forgetIntermediateScale;
75 
76  const float cellClip = 0.0f;
77  const float projectionClip = 0.0f;
78 
79  // Weights and bias tensor info
80  const armnn::TensorInfo inputWeightsInfo({outputSize, inputSize},
82  weightsScale,
83  weightsOffset);
84 
85  const armnn::TensorInfo recurrentWeightsInfo({outputSize, outputSize},
87  weightsScale,
88  weightsOffset);
89 
90  const armnn::TensorInfo biasInfo({outputSize},
92  biasScale,
93  biasOffset);
94 
95  const armnn::TensorInfo layerNormWeightsInfo({numUnits},
97  layerNormScale,
98  layerNormOffset);
99 
100  // Mandatory params
101  const std::vector<int8_t> inputToForgetWeightsVector =
102  {-77, -13, 38, 25, 115, -64, -25, -51, 38, -102, -51, 38, -64, -51, -77, 38, -51, -77, -64, -64};
103  const std::vector<int8_t> inputToCellWeightsTensorVector =
104  {-51, -38, -25, -13, -64, 64, -25, -38, -25, -77, 77, -13, -51, -38, -89, 89, -115, -64, 102, 77};
105  const std::vector<int8_t> inputToOutputWeightsTensorVector =
106  {-102, -51, -25, -115, -13, -89, 38, -38, -102, -25, 77, -25, 51, -89, -38, -64, 13, 64, -77, -51};
107 
108  armnn::ConstTensor inputToForgetWeightsTensor(inputWeightsInfo, inputToForgetWeightsVector.data());
109  armnn::ConstTensor inputToCellWeightsTensor(inputWeightsInfo, inputToCellWeightsTensorVector.data());
110  armnn::ConstTensor inputToOutputWeightsTensor(inputWeightsInfo, inputToOutputWeightsTensorVector.data());
111 
112  const std::vector<int8_t> recurrentToForgetWeightsTensorVector =
113  {-64, -38, -64, -25, 77, 51, 115, 38, -13, 25, 64, 25, 25, 38, -13, 51};
114  const std::vector<int8_t> recurrentToCellWeightsTensorVector =
115  {-38, 25, 13, -38, 102, -10, -25, 38, 102, -77, -13, 25, 38, -13, 25, 64};
116  const std::vector<int8_t> recurrentToOutputWeightsTensorVector =
117  {38, -13, 13, -25, -64, -89, -25, -77, -13, -51, -89, -25, 13, 64, 25, -38};
118 
119  armnn::ConstTensor recurrentToForgetWeightsTensor(recurrentWeightsInfo,
120  recurrentToForgetWeightsTensorVector.data());
121  armnn::ConstTensor recurrentToCellWeightsTensor(recurrentWeightsInfo,
122  recurrentToCellWeightsTensorVector.data());
123  armnn::ConstTensor recurrentToOutputWeightsTensor(recurrentWeightsInfo,
124  recurrentToOutputWeightsTensorVector.data());
125 
126  const std::vector<int32_t> forgetGateBiasTensorVector = {2147484, -6442451, -4294968, 2147484};
127  const std::vector<int32_t> cellBiasTensorVector = {-1073742, 15461883, 5368709, 1717987};
128  const std::vector<int32_t> outputGateBiasTensorVector = {1073742, -214748, 4294968, 2147484};
129 
130  armnn::ConstTensor forgetGateBiasTensor(biasInfo, forgetGateBiasTensorVector.data());
131  armnn::ConstTensor cellBiasTensor(biasInfo, cellBiasTensorVector.data());
132  armnn::ConstTensor outputGateBiasTensor(biasInfo, outputGateBiasTensorVector.data());
133 
134  // Layer Norm
135  const std::vector<int16_t> forgetLayerNormWeightsVector = {6553, 6553, 13107, 9830};
136  const std::vector<int16_t> cellLayerNormWeightsVector = {22937, 6553, 9830, 26214};
137  const std::vector<int16_t> outputLayerNormWeightsVector = {19660, 6553, 6553, 16384};
138 
139  armnn::ConstTensor forgetLayerNormWeights(layerNormWeightsInfo, forgetLayerNormWeightsVector.data());
140  armnn::ConstTensor cellLayerNormWeights(layerNormWeightsInfo, cellLayerNormWeightsVector.data());
141  armnn::ConstTensor outputLayerNormWeights(layerNormWeightsInfo, outputLayerNormWeightsVector.data());
142 
143  // Set up params
144  armnn::LstmInputParams params;
145  params.m_InputToForgetWeights = &inputToForgetWeightsTensor;
146  params.m_InputToCellWeights = &inputToCellWeightsTensor;
147  params.m_InputToOutputWeights = &inputToOutputWeightsTensor;
148 
149  params.m_RecurrentToForgetWeights = &recurrentToForgetWeightsTensor;
150  params.m_RecurrentToCellWeights = &recurrentToCellWeightsTensor;
151  params.m_RecurrentToOutputWeights = &recurrentToOutputWeightsTensor;
152 
153  params.m_ForgetGateBias = &forgetGateBiasTensor;
154  params.m_CellBias = &cellBiasTensor;
155  params.m_OutputGateBias = &outputGateBiasTensor;
156 
157  params.m_ForgetLayerNormWeights = &forgetLayerNormWeights;
158  params.m_CellLayerNormWeights = &cellLayerNormWeights;
159  params.m_OutputLayerNormWeights = &outputLayerNormWeights;
160 
161  QLstmDescriptor descriptor;
162  descriptor.m_CifgEnabled = cifgEnabled;
163  descriptor.m_PeepholeEnabled = peepholeEnabled;
164  descriptor.m_ProjectionEnabled = projectionEnabled;
165  descriptor.m_LayerNormEnabled = layerNormEnabled;
166 
167  descriptor.m_CellClip = cellClip;
168  descriptor.m_ProjectionClip = projectionClip;
169 
170  descriptor.m_HiddenStateZeroPoint = hiddenStateZeroPoint;
171  descriptor.m_HiddenStateScale = hiddenStateScale;
172 
173  descriptor.m_InputIntermediateScale = inputIntermediateScale;
174  descriptor.m_ForgetIntermediateScale = forgetIntermediateScale;
175  descriptor.m_CellIntermediateScale = cellIntermediateScale;
176  descriptor.m_OutputIntermediateScale = outputIntermediateScale;
177 
178  // Input/Output tensor info
179  const armnn::TensorInfo inputInfo({numBatches , inputSize},
181  inputScale,
182  inputOffset);
183 
184  const armnn::TensorInfo cellStateInfo({numBatches , numUnits},
186  cellStateScale,
187  cellStateOffset);
188 
189  const armnn::TensorInfo outputStateInfo({numBatches , outputSize},
191  outputScale,
192  outputOffset);
193 
194  // Input tensor data
195  const std::vector<int8_t> inputVector = {90, 102, 13, 26, 38, 102, 13, 26, 51, 64};
196  const std::vector<int8_t> outputStateInVector = {0, 0, 0, 0, 0, 0, 0, 0};
197  const std::vector<int16_t> cellStateInVector = {0, 0, 0, 0, 0, 0, 0, 0};
198 
199  // Expected output tensor data
200  const std::vector<int8_t> outputStateOutVector = {-15, 21, 14, 20, -15, 15, 5, 27};
201  const std::vector<int16_t> cellStateOutVector = {-11692, 9960, 5491, 8861, -9422, 7726, 2056, 13149};
202  const std::vector<int8_t> outputVector = {-15, 21, 14, 20, -15, 15, 5, 27};
203 
204  // Build network
206 
207  armnn::IConnectableLayer* const input = net->AddInputLayer(0);
208  armnn::IConnectableLayer* const outputStateIn = net->AddInputLayer(1);
209  armnn::IConnectableLayer* const cellStateIn = net->AddInputLayer(2);
210 
211  armnn::IConnectableLayer* const qLstmLayer = net->AddQLstmLayer(descriptor, params, "qLstm");
212 
213  armnn::IConnectableLayer* const outputStateOut = net->AddOutputLayer(0);
214  armnn::IConnectableLayer* const cellStateOut = net->AddOutputLayer(1);
215  armnn::IConnectableLayer* const output = net->AddOutputLayer(2);
216 
217  // Connect input/output slots
218  Connect(input, qLstmLayer, inputInfo, 0, 0);
219  Connect(outputStateIn, qLstmLayer, outputStateInfo, 0, 1);
220  Connect(cellStateIn, qLstmLayer, cellStateInfo, 0, 2);
221 
222  Connect(qLstmLayer, outputStateOut, outputStateInfo, 0, 0);
223  Connect(qLstmLayer, cellStateOut, cellStateInfo, 1, 0);
224  Connect(qLstmLayer, output, outputStateInfo, 2, 0);
225 
226  // Create runtime
228  IRuntimePtr runtime(IRuntime::Create(options));
229 
230  // Optimize the network
231  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
232 
233  // Loads network into runtime
234  NetworkId netId;
235  runtime->LoadNetwork(netId, std::move(optNet));
236 
237  // Push back input tensors
238  InputTensors inputTensors;
239  inputTensors.reserve(3);
240 
241  inputTensors.push_back({0, ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputVector.data())});
242  inputTensors.push_back({1, ConstTensor(runtime->GetInputTensorInfo(netId, 1), outputStateInVector.data())});
243  inputTensors.push_back({2, ConstTensor(runtime->GetInputTensorInfo(netId, 2), cellStateInVector.data())});
244 
245  // Push back output tensors
246  OutputTensors outputTensors;
247  outputTensors.reserve(3);
248 
249  std::vector<int8_t> outputStateOutResult(outputStateOutVector.size());
250  std::vector<int16_t> cellStateOutResult(cellStateOutVector.size());
251  std::vector<int8_t> outputResult(outputStateOutVector.size());
252 
253  outputTensors.push_back({0, Tensor(runtime->GetOutputTensorInfo(netId, 0), outputStateOutResult.data())});
254  outputTensors.push_back({1, Tensor(runtime->GetOutputTensorInfo(netId, 1), cellStateOutResult.data())});
255  outputTensors.push_back({2, Tensor(runtime->GetOutputTensorInfo(netId, 2), outputResult.data())});
256 
257  // Execute inference
258  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
259 
260  constexpr int8_t toleranceInt8 = 1;
261  for (unsigned int i = 0u; i < outputStateOutResult.size(); ++i)
262  {
263  BOOST_TEST(IsCloseEnough(outputStateOutVector[i], outputStateOutResult[i], toleranceInt8));
264  }
265 
266  for (unsigned int i = 0u; i < outputResult.size(); ++i)
267  {
268  BOOST_TEST(IsCloseEnough(outputVector[i], outputResult[i], toleranceInt8));
269  }
270 }
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:61
bool m_PeepholeEnabled
Enable/disable peephole.
float m_HiddenStateScale
Hidden State quantization scale.
float m_OutputIntermediateScale
Output intermediate quantization scale.
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
Definition: IRuntime.hpp:25
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:225
int NetworkId
Definition: IRuntime.hpp:20
void QLstmEndToEnd(const std::vector< armnn::BackendId > &backends)
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:191
bool m_LayerNormEnabled
Enable/disable layer normalization.
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:1003
float m_ProjectionClip
Clipping threshold value for the projection.
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:199
float m_InputIntermediateScale
Input intermediate quantization scale.
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:226
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:573
A QLstmDescriptor for the QLstmLayer.
float m_ForgetIntermediateScale
Forget intermediate quantization scale.
float m_CellClip
Clipping threshold value for the cell state.
bool m_ProjectionEnabled
Enable/disable the projection layer.
void Connect(armnn::IConnectableLayer *from, armnn::IConnectableLayer *to, const armnn::TensorInfo &tensorInfo, unsigned int fromIndex, unsigned int toIndex)
Definition: TestUtils.cpp:12
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:101
armnn::Runtime::CreationOptions::ExternalProfilingOptions options
float m_CellIntermediateScale
Cell intermediate quantization scale.
bool m_CifgEnabled
Enable/disable CIFG (coupled input & forget gate).
static INetworkPtr Create()
Definition: Network.cpp:50
const ConstTensor * m_InputToForgetWeights
Definition: LstmParams.hpp:41
int32_t m_HiddenStateZeroPoint
Hidden State zero point.