ArmNN
 22.11
TfLiteParser.hpp
Go to the documentation of this file.
1 //
2 // Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #pragma once
6 
7 #include "armnn/INetwork.hpp"
9 #include "armnn/Types.hpp"
10 
11 #include <schema_generated.h>
12 #include <functional>
13 #include <unordered_map>
14 #include <vector>
15 
16 #include <tensorflow/lite/version.h>
17 
18 #if TF_MAJOR_VERSION > 2 || (TF_MAJOR_VERSION == 2 && TF_MINOR_VERSION > 3)
19 #define ARMNN_POST_TFLITE_2_3
20 #endif
21 
22 namespace armnnTfLiteParser
23 {
24 
26 {
27 public:
28  // Shorthands for TfLite types
29  using ModelPtr = std::unique_ptr<tflite::ModelT>;
30  using SubgraphPtr = std::unique_ptr<tflite::SubGraphT>;
31  using OperatorPtr = std::unique_ptr<tflite::OperatorT>;
32  using OperatorCodePtr = std::unique_ptr<tflite::OperatorCodeT>;
33  using TensorPtr = std::unique_ptr<tflite::TensorT>;
34  using TensorRawPtr = const tflite::TensorT *;
35  using TensorRawPtrVector = std::vector<TensorRawPtr>;
36  using TensorIdRawPtr = std::pair<size_t, TensorRawPtr>;
37  using TensorIdRawPtrVector = std::vector<TensorIdRawPtr>;
38  using BufferPtr = std::unique_ptr<tflite::BufferT>;
39  using BufferRawPtr = const tflite::BufferT *;
40 
41 public:
42  /// Create the network from a flatbuffers binary file on disk
43  armnn::INetworkPtr CreateNetworkFromBinaryFile(const char* graphFile);
44 
45  /// Create the network from a flatbuffers binary
46  armnn::INetworkPtr CreateNetworkFromBinary(const std::vector<uint8_t> & binaryContent);
47 
48 
49  /// Retrieve binding info (layer id and tensor info) for the network input identified by
50  /// the given layer name and subgraph id
52  const std::string& name) const;
53 
54  /// Retrieve binding info (layer id and tensor info) for the network output identified by
55  /// the given layer name and subgraph id
57  const std::string& name) const;
58 
59  /// Return the number of subgraphs in the parsed model
60  size_t GetSubgraphCount() const;
61 
62  /// Return the input tensor names for a given subgraph
63  std::vector<std::string> GetSubgraphInputTensorNames(size_t subgraphId) const;
64 
65  /// Return the output tensor names for a given subgraph
66  std::vector<std::string> GetSubgraphOutputTensorNames(size_t subgraphId) const;
67 
69  ~TfLiteParserImpl() = default;
70 
71 public:
72  // testable helpers
73  armnn::INetworkPtr CreateNetworkFromBinaryAsDynamic(const std::vector<uint8_t>& binaryContent);
74 
75  armnn::INetworkPtr LoadModel(std::unique_ptr<tflite::ModelT> model);
76 
77  static ModelPtr LoadModelFromFile(const char* fileName);
78  static ModelPtr LoadModelFromBinary(const uint8_t* binaryContent, size_t len);
79  static TensorRawPtrVector GetInputs(const ModelPtr& model, size_t subgraphIndex, size_t operatorIndex);
80  static TensorRawPtrVector GetOutputs(const ModelPtr& model, size_t subgraphIndex, size_t operatorIndex);
81  static TensorIdRawPtrVector GetSubgraphInputs(const ModelPtr& model, size_t subgraphIndex);
82  static TensorIdRawPtrVector GetSubgraphOutputs(const ModelPtr& model, size_t subgraphIndex);
83  static std::vector<int32_t>& GetInputTensorIds(const ModelPtr& model, size_t subgraphIndex, size_t operatorIndex);
84  static std::vector<int32_t>& GetOutputTensorIds(const ModelPtr& model, size_t subgraphIndex, size_t operatorIndex);
85 
86  static BufferRawPtr GetBuffer(const ModelPtr& model, size_t bufferIndex);
87  static armnn::TensorInfo OutputShapeOfSqueeze(std::vector<uint32_t> squeezeDims,
88  const armnn::TensorInfo& inputTensorInfo);
89  static armnn::TensorInfo OutputShapeOfReshape(const armnn::TensorInfo& inputTensorInfo,
90  const std::vector<int32_t>& targetDimsIn);
91 
92  /// Retrieve version in X.Y.Z form
93  static const std::string GetVersion();
94 
95 private:
96 
97  // No copying allowed until it is wanted and properly implemented
98  TfLiteParserImpl(const TfLiteParserImpl &) = delete;
99  TfLiteParserImpl & operator=(const TfLiteParserImpl &) = delete;
100 
101  /// Create the network from an already loaded flatbuffers model
102  armnn::INetworkPtr CreateNetworkFromModel();
103 
104  // signature for the parser functions
105  using OperatorParsingFunction = void(TfLiteParserImpl::*)(size_t subgraphIndex, size_t operatorIndex);
106 
107  void ParseCustomOperator(size_t subgraphIndex, size_t operatorIndex);
108  void ParseUnsupportedOperator(size_t subgraphIndex, size_t operatorIndex);
109 
110  void ParseAbs(size_t subgraphIndex, size_t operatorIndex);
111  void ParseActivation(size_t subgraphIndex, size_t operatorIndex, armnn::ActivationFunction activationType);
112  void ParseAdd(size_t subgraphIndex, size_t operatorIndex);
113  void ParseArgMinMax(size_t subgraphIndex, size_t operatorIndex, armnn::ArgMinMaxFunction argMinMaxFunction);
114  void ParseArgMin(size_t subgraphIndex, size_t operatorIndex);
115  void ParseArgMax(size_t subgraphIndex, size_t operatorIndex);
116  void ParseAveragePool2D(size_t subgraphIndex, size_t operatorIndex);
117  void ParseBatchMatMul(size_t subgraphIndex, size_t operatorIndex);
118  void ParseBatchToSpaceND(size_t subgraphIndex, size_t operatorIndex);
119  void ParseCast(size_t subgraphIndex, size_t operatorIndex);
120  void ParseComparison(size_t subgraphIndex, size_t operatorIndex, armnn::ComparisonOperation comparisonOperation);
121  void ParseConcatenation(size_t subgraphIndex, size_t operatorIndex);
122  void ParseConv2D(size_t subgraphIndex, size_t operatorIndex);
123  // Conv3D support was added in TF 2.5, so for backwards compatibility a hash define is needed.
124  #if defined(ARMNN_POST_TFLITE_2_4)
125  void ParseConv3D(size_t subgraphIndex, size_t operatorIndex);
126  #endif
127  void ParseDepthToSpace(size_t subgraphIndex, size_t operatorIndex);
128  void ParseDepthwiseConv2D(size_t subgraphIndex, size_t operatorIndex);
129  void ParseDequantize(size_t subgraphIndex, size_t operatorIndex);
130  void ParseDetectionPostProcess(size_t subgraphIndex, size_t operatorIndex);
131  void ParseDiv(size_t subgraphIndex, size_t operatorIndex);
132  void ParseElementwiseUnary(size_t subgraphIndex, size_t operatorIndex, armnn::UnaryOperation unaryOperation);
133  void ParseElu(size_t subgraphIndex, size_t operatorIndex);
134  void ParseEqual(size_t subgraphIndex, size_t operatorIndex);
135  void ParseExp(size_t subgraphIndex, size_t operatorIndex);
136  void ParseExpandDims(size_t subgraphIndex, size_t operatorIndex);
137  void ParseFloorDiv(size_t subgraphIndex, size_t operatorIndex);
138  void ParseFullyConnected(size_t subgraphIndex, size_t operatorIndex);
139  void ParseGather(size_t subgraphIndex, size_t operatorIndex);
140  void ParseGatherNd(size_t subgraphIndex, size_t operatorIndex);
141  void ParseGreater(size_t subgraphIndex, size_t operatorIndex);
142  void ParseGreaterOrEqual(size_t subgraphIndex, size_t operatorIndex);
143  void ParseHardSwish(size_t subgraphIndex, size_t operatorIndex);
144  void ParseLeakyRelu(size_t subgraphIndex, size_t operatorIndex);
145  void ParseLess(size_t subgraphIndex, size_t operatorIndex);
146  void ParseLessOrEqual(size_t subgraphIndex, size_t operatorIndex);
147  void ParseLog(size_t subgraphIndex, size_t operatorIndex);
148  void ParseLocalResponseNormalization(size_t subgraphIndex, size_t operatorIndex);
149  void ParseLogicalNot(size_t subgraphIndex, size_t operatorIndex);
150  void ParseLogistic(size_t subgraphIndex, size_t operatorIndex);
151  void ParseLogSoftmax(size_t subgraphIndex, size_t operatorIndex);
152  void ParseL2Normalization(size_t subgraphIndex, size_t operatorIndex);
153  void ParseMaxPool2D(size_t subgraphIndex, size_t operatorIndex);
154  void ParseMaximum(size_t subgraphIndex, size_t operatorIndex);
155  void ParseMean(size_t subgraphIndex, size_t operatorIndex);
156  void ParseMinimum(size_t subgraphIndex, size_t operatorIndex);
157  void ParseMirrorPad(size_t subgraphIndex, size_t operatorIndex);
158  void ParseMul(size_t subgraphIndex, size_t operatorIndex);
159  void ParseNeg(size_t subgraphIndex, size_t operatorIndex);
160  void ParseNotEqual(size_t subgraphIndex, size_t operatorIndex);
161  void ParsePack(size_t subgraphIndex, size_t operatorIndex);
162  void ParsePad(size_t subgraphIndex, size_t operatorIndex);
163  void ParsePool(size_t subgraphIndex, size_t operatorIndex, armnn::PoolingAlgorithm algorithm);
164  void ParsePrelu(size_t subgraphIndex, size_t operatorIndex);
165  void ParseQuantize(size_t subgraphIndex, size_t operatorIndex);
166  void ParseReduce(size_t subgraphIndex, size_t operatorIndex, armnn::ReduceOperation reduceOperation);
167  void ParseReduceMax(size_t subgraphIndex, size_t operatorIndex);
168  void ParseReduceMin(size_t subgraphIndex, size_t operatorIndex);
169  void ParseReduceProd(size_t subgraphIndex, size_t operatorIndex);
170  void ParseRelu(size_t subgraphIndex, size_t operatorIndex);
171  void ParseRelu6(size_t subgraphIndex, size_t operatorIndex);
172  void ParseReshape(size_t subgraphIndex, size_t operatorIndex);
173  void ParseResize(size_t subgraphIndex, size_t operatorIndex, armnn::ResizeMethod resizeMethod);
174  void ParseResizeBilinear(size_t subgraphIndex, size_t operatorIndex);
175  void ParseResizeNearestNeighbor(size_t subgraphIndex, size_t operatorIndex);
176  void ParseRsqrt(size_t subgraphIndex, size_t operatorIndex);
177  void ParseShape(size_t subgraphIndex, size_t operatorIndex);
178  void ParseSin(size_t subgraphIndex, size_t operatorIndex);
179  void ParseSlice(size_t subgraphIndex, size_t operatorIndex);
180  void ParseSoftmax(size_t subgraphIndex, size_t operatorIndex);
181  void ParseSqrt(size_t subgraphIndex, size_t operatorIndex);
182  void ParseSpaceToBatchND(size_t subgraphIndex, size_t operatorIndex);
183  void ParseSplit(size_t subgraphIndex, size_t operatorIndex);
184  void ParseSplitV(size_t subgraphIndex, size_t operatorIndex);
185  void ParseSqueeze(size_t subgraphIndex, size_t operatorIndex);
186  void ParseStridedSlice(size_t subgraphIndex, size_t operatorIndex);
187  void ParseSub(size_t subgraphIndex, size_t operatorIndex);
188  void ParseSum(size_t subgraphIndex, size_t operatorIndex);
189  void ParseTanH(size_t subgraphIndex, size_t operatorIndex);
190  void ParseTranspose(size_t subgraphIndex, size_t operatorIndex);
191  void ParseTransposeConv(size_t subgraphIndex, size_t operatorIndex);
192  void ParseUnidirectionalSequenceLSTM(size_t subgraphIndex, size_t operatorIndex);
193  void ParseUnpack(size_t subgraphIndex, size_t operatorIndex);
194 
195  void RegisterProducerOfTensor(size_t subgraphIndex, size_t tensorIndex, armnn::IOutputSlot* slot);
196  void RegisterConsumerOfTensor(size_t subgraphIndex, size_t tensorIndex, armnn::IInputSlot* slot);
197  void RegisterInputSlots(size_t subgraphIndex,
198  size_t operatorIndex,
200  const std::vector<unsigned int>& tensorIndexes,
201  unsigned int startingSlotIndex = 0);
202  void RegisterOutputSlots(size_t subgraphIndex,
203  size_t operatorIndex,
205  const std::vector<unsigned int>& tensorIndexes);
206 
207  void SetupInputLayers(size_t subgraphIndex);
208  void SetupOutputLayers(size_t subgraphIndex);
209  void SetupConstantLayers(size_t subgraphIndex);
210 
211  void ResetParser();
212 
213  void AddBroadcastReshapeLayer(size_t subgraphIndex,
214  size_t operatorIndex,
215  armnn::IConnectableLayer* layer);
216 
217  /// Attach an activation layer to the one passed as a parameter
218  armnn::IConnectableLayer* AddFusedActivationLayer(armnn::IConnectableLayer* layer,
219  unsigned int outputSlot,
220  tflite::ActivationFunctionType activationType);
221 
222  /// Attach a floor layer to the one passed as a parameter
223  armnn::IConnectableLayer* AddFusedFloorLayer(armnn::IConnectableLayer* layer, unsigned int outputSlot);
224 
225  // SupportedDataStorage's purpose is to hold data till we pass over to the network.
226  // We don't care about the content, and we want a single datatype to simplify the code.
227  struct SupportedDataStorage
228  {
229  public:
230  // Convenience constructors
231  SupportedDataStorage(std::unique_ptr<float[]>&& data);
232  SupportedDataStorage(std::unique_ptr<uint8_t[]>&& data);
233  SupportedDataStorage(std::unique_ptr<int8_t[]>&& data);
234  SupportedDataStorage(std::unique_ptr<int32_t[]>&& data);
235 
236  private:
237  // Pointers to the data buffers
238  std::unique_ptr<float[]> m_FloatData;
239  std::unique_ptr<uint8_t[]> m_Uint8Data;
240  std::unique_ptr<int8_t[]> m_Int8Data;
241  std::unique_ptr<int32_t[]> m_Int32Data;
242  };
243 
244  bool ShouldConstantTensorBeCreated(unsigned int tensorIndex);
245  bool IsConstTensor(TensorRawPtr tensorPtr);
246  armnn::ConstTensor CreateConstTensorNonPermuted(TensorRawPtr tensorPtr,
247  armnn::TensorInfo& tensorInfo);
248 
249  std::pair<armnn::ConstTensor, SupportedDataStorage>
250  CreateConstTensorPermuted(TensorRawPtr tensorPtr,
251  armnn::TensorInfo& tensorInfo,
253  std::pair<armnn::ConstTensor, std::unique_ptr<float[]>>
254  CreateConstTensorNonPermuted(TensorRawPtr tensorPtr,
255  armnn::TensorInfo& tensorInfo,
256  armnn::DataType inputDataType);
257 
258  template<typename T>
259  std::pair<armnn::ConstTensor, TfLiteParserImpl::SupportedDataStorage>
260  CreateConstTensorAndStoreData(TfLiteParserImpl::BufferRawPtr bufferPtr,
262  armnn::TensorInfo& tensorInfo,
264  std::pair<armnn::ConstTensor*, std::unique_ptr<float[]>>
265  CreateConstTensorPtr(TensorRawPtr tensorPtr,
266  armnn::TensorInfo& inputTensorInfo);
267 
268  // Settings for configuring the TfLiteParser
270 
271  /// The network we're building. Gets cleared after it is passed to the user
272  armnn::INetworkPtr m_Network;
273  ModelPtr m_Model;
274 
275  std::vector<OperatorParsingFunction> m_ParserFunctions;
276  std::unordered_map<std::string, OperatorParsingFunction> m_CustomParserFunctions;
277 
278  /// A mapping of an output slot to each of the input slots it should be connected to
279  /// The outputSlot is from the layer that creates this tensor as one of its ouputs
280  /// The inputSlots are from the layers that use this tensor as one of their inputs
281  struct TensorSlots
282  {
283  armnn::IOutputSlot* outputSlot;
284  std::vector<armnn::IInputSlot*> inputSlots;
285 
286  TensorSlots() : outputSlot(nullptr) { }
287  };
288  typedef std::vector<TensorSlots> TensorConnections;
289  /// Connections for tensors in each subgraph
290  /// The first index is the subgraph ID, the second index is the tensor ID
291  std::vector<TensorConnections> m_SubgraphConnections;
292 
293  /// This is used in case that the model does not specify the output.
294  /// The shape can be calculated from the options.
295  std::vector<std::vector<unsigned int>> m_OverridenOutputShapes;
296 
297  std::vector<unsigned int> m_ConstantsToDequantize;
298  std::vector<unsigned int> m_ConstantsToBeCreated;
299 };
300 
301 }
std::unique_ptr< tflite::TensorT > TensorPtr
std::unique_ptr< tflite::ModelT > ModelPtr
static TensorIdRawPtrVector GetSubgraphOutputs(const ModelPtr &model, size_t subgraphIndex)
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:68
const tflite::TensorT * TensorRawPtr
const tflite::BufferT * BufferRawPtr
BindingPointInfo GetNetworkOutputBindingInfo(size_t subgraphId, const std::string &name) const
Retrieve binding info (layer id and tensor info) for the network output identified by the given layer...
std::vector< std::string > GetSubgraphOutputTensorNames(size_t subgraphId) const
Return the output tensor names for a given subgraph.
std::unique_ptr< tflite::OperatorT > OperatorPtr
static armnn::TensorInfo OutputShapeOfReshape(const armnn::TensorInfo &inputTensorInfo, const std::vector< int32_t > &targetDimsIn)
PoolingAlgorithm
Definition: Types.hpp:136
TfLiteParserImpl(const armnn::Optional< ITfLiteParser::TfLiteParserOptions > &options=armnn::EmptyOptional())
std::unique_ptr< tflite::BufferT > BufferPtr
armnn::INetworkPtr CreateNetworkFromBinary(const std::vector< uint8_t > &binaryContent)
Create the network from a flatbuffers binary.
static BufferRawPtr GetBuffer(const ModelPtr &model, size_t bufferIndex)
armnn::INetworkPtr CreateNetworkFromBinaryFile(const char *graphFile)
Create the network from a flatbuffers binary file on disk.
std::unique_ptr< tflite::OperatorCodeT > OperatorCodePtr
ComparisonOperation
Definition: Types.hpp:108
ReduceOperation
Definition: Types.hpp:143
BindingPointInfo GetNetworkInputBindingInfo(size_t subgraphId, const std::string &name) const
Retrieve binding info (layer id and tensor info) for the network input identified by the given layer ...
static ModelPtr LoadModelFromBinary(const uint8_t *binaryContent, size_t len)
DataType
Definition: Types.hpp:48
std::vector< TensorIdRawPtr > TensorIdRawPtrVector
static std::vector< int32_t > & GetInputTensorIds(const ModelPtr &model, size_t subgraphIndex, size_t operatorIndex)
An output connection slot for a layer.
Definition: INetwork.hpp:41
static const std::string GetVersion()
Retrieve version in X.Y.Z form.
static ModelPtr LoadModelFromFile(const char *fileName)
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:327
std::vector< TensorRawPtr > TensorRawPtrVector
size_t GetSubgraphCount() const
Return the number of subgraphs in the parsed model.
armnn::INetworkPtr LoadModel(std::unique_ptr< tflite::ModelT > model)
std::pair< size_t, TensorRawPtr > TensorIdRawPtr
std::unique_ptr< tflite::SubGraphT > SubgraphPtr
static TensorIdRawPtrVector GetSubgraphInputs(const ModelPtr &model, size_t subgraphIndex)
static TensorRawPtrVector GetInputs(const ModelPtr &model, size_t subgraphIndex, size_t operatorIndex)
static TensorRawPtrVector GetOutputs(const ModelPtr &model, size_t subgraphIndex, size_t operatorIndex)
EmptyOptional is used to initialize the Optional class in case we want to have default value for an O...
Definition: Optional.hpp:32
static std::vector< int32_t > & GetOutputTensorIds(const ModelPtr &model, size_t subgraphIndex, size_t operatorIndex)
ArgMinMaxFunction
Definition: Types.hpp:102
ResizeMethod
Definition: Types.hpp:152
UnaryOperation
Definition: Types.hpp:124
armnn::BindingPointInfo BindingPointInfo
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:253
armnn::INetworkPtr CreateNetworkFromBinaryAsDynamic(const std::vector< uint8_t > &binaryContent)
static armnn::TensorInfo OutputShapeOfSqueeze(std::vector< uint32_t > squeezeDims, const armnn::TensorInfo &inputTensorInfo)
std::vector< std::string > GetSubgraphInputTensorNames(size_t subgraphId) const
Return the input tensor names for a given subgraph.
An input connection slot for a layer.
Definition: INetwork.hpp:25
ActivationFunction
Definition: Types.hpp:86