// // Copyright © 2017 Arm Ltd. All rights reserved. // SPDX-License-Identifier: MIT // #pragma once #include "armnn/INetwork.hpp" #include "armnnTfLiteParser/ITfLiteParser.hpp" #include "armnn/Types.hpp" #include #include #include #include #include #if TF_MAJOR_VERSION > 2 || (TF_MAJOR_VERSION == 2 && TF_MINOR_VERSION > 3) #define ARMNN_POST_TFLITE_2_3 #endif namespace armnnTfLiteParser { class TfLiteParserImpl { public: // Shorthands for TfLite types using ModelPtr = std::unique_ptr; using SubgraphPtr = std::unique_ptr; using OperatorPtr = std::unique_ptr; using OperatorCodePtr = std::unique_ptr; using TensorPtr = std::unique_ptr; using TensorRawPtr = const tflite::TensorT *; using TensorRawPtrVector = std::vector; using TensorIdRawPtr = std::pair; using TensorIdRawPtrVector = std::vector; using BufferPtr = std::unique_ptr; using BufferRawPtr = const tflite::BufferT *; public: /// Create the network from a flatbuffers binary file on disk armnn::INetworkPtr CreateNetworkFromBinaryFile(const char* graphFile); /// Create the network from a flatbuffers binary armnn::INetworkPtr CreateNetworkFromBinary(const std::vector & binaryContent); /// Retrieve binding info (layer id and tensor info) for the network input identified by /// the given layer name and subgraph id BindingPointInfo GetNetworkInputBindingInfo(size_t subgraphId, const std::string& name) const; /// Retrieve binding info (layer id and tensor info) for the network output identified by /// the given layer name and subgraph id BindingPointInfo GetNetworkOutputBindingInfo(size_t subgraphId, const std::string& name) const; /// Return the number of subgraphs in the parsed model size_t GetSubgraphCount() const; /// Return the input tensor names for a given subgraph std::vector GetSubgraphInputTensorNames(size_t subgraphId) const; /// Return the output tensor names for a given subgraph std::vector GetSubgraphOutputTensorNames(size_t subgraphId) const; TfLiteParserImpl(const armnn::Optional& options = armnn::EmptyOptional()); ~TfLiteParserImpl() = default; public: // testable helpers armnn::INetworkPtr CreateNetworkFromBinaryAsDynamic(const std::vector& binaryContent); armnn::INetworkPtr LoadModel(std::unique_ptr model); static ModelPtr LoadModelFromFile(const char* fileName); static ModelPtr LoadModelFromBinary(const uint8_t* binaryContent, size_t len); static TensorRawPtrVector GetInputs(const ModelPtr& model, size_t subgraphIndex, size_t operatorIndex); static TensorRawPtrVector GetOutputs(const ModelPtr& model, size_t subgraphIndex, size_t operatorIndex); static TensorIdRawPtrVector GetSubgraphInputs(const ModelPtr& model, size_t subgraphIndex); static TensorIdRawPtrVector GetSubgraphOutputs(const ModelPtr& model, size_t subgraphIndex); static std::vector& GetInputTensorIds(const ModelPtr& model, size_t subgraphIndex, size_t operatorIndex); static std::vector& GetOutputTensorIds(const ModelPtr& model, size_t subgraphIndex, size_t operatorIndex); static BufferRawPtr GetBuffer(const ModelPtr& model, size_t bufferIndex); static armnn::TensorInfo OutputShapeOfSqueeze(std::vector squeezeDims, const armnn::TensorInfo& inputTensorInfo); static armnn::TensorInfo OutputShapeOfReshape(const armnn::TensorInfo& inputTensorInfo, const std::vector& targetDimsIn); /// Retrieve version in X.Y.Z form static const std::string GetVersion(); private: // No copying allowed until it is wanted and properly implemented TfLiteParserImpl(const TfLiteParserImpl &) = delete; TfLiteParserImpl & operator=(const TfLiteParserImpl &) = delete; /// Create the network from an already loaded flatbuffers model armnn::INetworkPtr CreateNetworkFromModel(); // signature for the parser functions using OperatorParsingFunction = void(TfLiteParserImpl::*)(size_t subgraphIndex, size_t operatorIndex); void ParseCustomOperator(size_t subgraphIndex, size_t operatorIndex); void ParseUnsupportedOperator(size_t subgraphIndex, size_t operatorIndex); void ParseAbs(size_t subgraphIndex, size_t operatorIndex); void ParseActivation(size_t subgraphIndex, size_t operatorIndex, armnn::ActivationFunction activationType); void ParseAdd(size_t subgraphIndex, size_t operatorIndex); void ParseArgMinMax(size_t subgraphIndex, size_t operatorIndex, armnn::ArgMinMaxFunction argMinMaxFunction); void ParseArgMin(size_t subgraphIndex, size_t operatorIndex); void ParseArgMax(size_t subgraphIndex, size_t operatorIndex); void ParseAveragePool2D(size_t subgraphIndex, size_t operatorIndex); void ParseBatchToSpaceND(size_t subgraphIndex, size_t operatorIndex); void ParseCast(size_t subgraphIndex, size_t operatorIndex); void ParseComparison(size_t subgraphIndex, size_t operatorIndex, armnn::ComparisonOperation comparisonOperation); void ParseConcatenation(size_t subgraphIndex, size_t operatorIndex); void ParseConv2D(size_t subgraphIndex, size_t operatorIndex); // Conv3D support was added in TF 2.5, so for backwards compatibility a hash define is needed. #if defined(ARMNN_POST_TFLITE_2_3) void ParseConv3D(size_t subgraphIndex, size_t operatorIndex); #endif void ParseDepthToSpace(size_t subgraphIndex, size_t operatorIndex); void ParseDepthwiseConv2D(size_t subgraphIndex, size_t operatorIndex); void ParseDequantize(size_t subgraphIndex, size_t operatorIndex); void ParseDetectionPostProcess(size_t subgraphIndex, size_t operatorIndex); void ParseDiv(size_t subgraphIndex, size_t operatorIndex); void ParseElementwiseUnary(size_t subgraphIndex, size_t operatorIndex, armnn::UnaryOperation unaryOperation); void ParseElu(size_t subgraphIndex, size_t operatorIndex); void ParseEqual(size_t subgraphIndex, size_t operatorIndex); void ParseExp(size_t subgraphIndex, size_t operatorIndex); void ParseExpandDims(size_t subgraphIndex, size_t operatorIndex); void ParseFloorDiv(size_t subgraphIndex, size_t operatorIndex); void ParseFullyConnected(size_t subgraphIndex, size_t operatorIndex); void ParseGather(size_t subgraphIndex, size_t operatorIndex); void ParseGatherNd(size_t subgraphIndex, size_t operatorIndex); void ParseGreater(size_t subgraphIndex, size_t operatorIndex); void ParseGreaterOrEqual(size_t subgraphIndex, size_t operatorIndex); void ParseHardSwish(size_t subgraphIndex, size_t operatorIndex); void ParseLeakyRelu(size_t subgraphIndex, size_t operatorIndex); void ParseLess(size_t subgraphIndex, size_t operatorIndex); void ParseLessOrEqual(size_t subgraphIndex, size_t operatorIndex); void ParseLocalResponseNormalization(size_t subgraphIndex, size_t operatorIndex); void ParseLogicalNot(size_t subgraphIndex, size_t operatorIndex); void ParseLogistic(size_t subgraphIndex, size_t operatorIndex); void ParseL2Normalization(size_t subgraphIndex, size_t operatorIndex); void ParseMaxPool2D(size_t subgraphIndex, size_t operatorIndex); void ParseMaximum(size_t subgraphIndex, size_t operatorIndex); void ParseMean(size_t subgraphIndex, size_t operatorIndex); void ParseMinimum(size_t subgraphIndex, size_t operatorIndex); void ParseMirrorPad(size_t subgraphIndex, size_t operatorIndex); void ParseMul(size_t subgraphIndex, size_t operatorIndex); void ParseNeg(size_t subgraphIndex, size_t operatorIndex); void ParseNotEqual(size_t subgraphIndex, size_t operatorIndex); void ParsePack(size_t subgraphIndex, size_t operatorIndex); void ParsePad(size_t subgraphIndex, size_t operatorIndex); void ParsePool(size_t subgraphIndex, size_t operatorIndex, armnn::PoolingAlgorithm algorithm); void ParsePrelu(size_t subgraphIndex, size_t operatorIndex); void ParseQuantize(size_t subgraphIndex, size_t operatorIndex); void ParseReduce(size_t subgraphIndex, size_t operatorIndex, armnn::ReduceOperation reduceOperation); void ParseReduceMax(size_t subgraphIndex, size_t operatorIndex); void ParseReduceMin(size_t subgraphIndex, size_t operatorIndex); void ParseReduceProd(size_t subgraphIndex, size_t operatorIndex); void ParseRelu(size_t subgraphIndex, size_t operatorIndex); void ParseRelu6(size_t subgraphIndex, size_t operatorIndex); void ParseReshape(size_t subgraphIndex, size_t operatorIndex); void ParseResize(size_t subgraphIndex, size_t operatorIndex, armnn::ResizeMethod resizeMethod); void ParseResizeBilinear(size_t subgraphIndex, size_t operatorIndex); void ParseResizeNearestNeighbor(size_t subgraphIndex, size_t operatorIndex); void ParseRsqrt(size_t subgraphIndex, size_t operatorIndex); void ParseShape(size_t subgraphIndex, size_t operatorIndex); void ParseSlice(size_t subgraphIndex, size_t operatorIndex); void ParseSoftmax(size_t subgraphIndex, size_t operatorIndex); void ParseSpaceToBatchND(size_t subgraphIndex, size_t operatorIndex); void ParseSplit(size_t subgraphIndex, size_t operatorIndex); void ParseSplitV(size_t subgraphIndex, size_t operatorIndex); void ParseSqueeze(size_t subgraphIndex, size_t operatorIndex); void ParseStridedSlice(size_t subgraphIndex, size_t operatorIndex); void ParseSub(size_t subgraphIndex, size_t operatorIndex); void ParseSum(size_t subgraphIndex, size_t operatorIndex); void ParseTanH(size_t subgraphIndex, size_t operatorIndex); void ParseTranspose(size_t subgraphIndex, size_t operatorIndex); void ParseTransposeConv(size_t subgraphIndex, size_t operatorIndex); void ParseUnidirectionalSequenceLSTM(size_t subgraphIndex, size_t operatorIndex); void ParseUnpack(size_t subgraphIndex, size_t operatorIndex); void RegisterProducerOfTensor(size_t subgraphIndex, size_t tensorIndex, armnn::IOutputSlot* slot); void RegisterConsumerOfTensor(size_t subgraphIndex, size_t tensorIndex, armnn::IInputSlot* slot); void RegisterInputSlots(size_t subgraphIndex, size_t operatorIndex, armnn::IConnectableLayer* layer, const std::vector& tensorIndexes, unsigned int startingSlotIndex = 0); void RegisterOutputSlots(size_t subgraphIndex, size_t operatorIndex, armnn::IConnectableLayer* layer, const std::vector& tensorIndexes); void SetupInputLayers(size_t subgraphIndex); void SetupOutputLayers(size_t subgraphIndex); void SetupConstantLayers(size_t subgraphIndex); void ResetParser(); void AddBroadcastReshapeLayer(size_t subgraphIndex, size_t operatorIndex, armnn::IConnectableLayer* layer); /// Attach an activation layer to the one passed as a parameter armnn::IConnectableLayer* AddFusedActivationLayer(armnn::IConnectableLayer* layer, unsigned int outputSlot, tflite::ActivationFunctionType activationType); /// Attach a floor layer to the one passed as a parameter armnn::IConnectableLayer* AddFusedFloorLayer(armnn::IConnectableLayer* layer, unsigned int outputSlot); // SupportedDataStorage's purpose is to hold data till we pass over to the network. // We don't care about the content, and we want a single datatype to simplify the code. struct SupportedDataStorage { public: // Convenience constructors SupportedDataStorage(std::unique_ptr&& data); SupportedDataStorage(std::unique_ptr&& data); SupportedDataStorage(std::unique_ptr&& data); SupportedDataStorage(std::unique_ptr&& data); private: // Pointers to the data buffers std::unique_ptr m_FloatData; std::unique_ptr m_Uint8Data; std::unique_ptr m_Int8Data; std::unique_ptr m_Int32Data; }; bool ShouldConstantTensorBeCreated(unsigned int tensorIndex); bool IsConstTensor(TensorRawPtr tensorPtr); armnn::ConstTensor CreateConstTensorNonPermuted(TensorRawPtr tensorPtr, armnn::TensorInfo& tensorInfo); std::pair CreateConstTensorPermuted(TensorRawPtr tensorPtr, armnn::TensorInfo& tensorInfo, armnn::Optional permutationVector); std::pair> CreateConstTensorNonPermuted(TensorRawPtr tensorPtr, armnn::TensorInfo& tensorInfo, armnn::DataType inputDataType); template std::pair CreateConstTensorAndStoreData(TfLiteParserImpl::BufferRawPtr bufferPtr, TfLiteParserImpl::TensorRawPtr tensorPtr, armnn::TensorInfo& tensorInfo, armnn::Optional permutationVector); std::pair> CreateConstTensorPtr(TensorRawPtr tensorPtr, armnn::TensorInfo& inputTensorInfo); // Settings for configuring the TfLiteParser armnn::Optional m_Options; /// The network we're building. Gets cleared after it is passed to the user armnn::INetworkPtr m_Network; ModelPtr m_Model; std::vector m_ParserFunctions; std::unordered_map m_CustomParserFunctions; /// A mapping of an output slot to each of the input slots it should be connected to /// The outputSlot is from the layer that creates this tensor as one of its ouputs /// The inputSlots are from the layers that use this tensor as one of their inputs struct TensorSlots { armnn::IOutputSlot* outputSlot; std::vector inputSlots; TensorSlots() : outputSlot(nullptr) { } }; typedef std::vector TensorConnections; /// Connections for tensors in each subgraph /// The first index is the subgraph ID, the second index is the tensor ID std::vector m_SubgraphConnections; /// This is used in case that the model does not specify the output. /// The shape can be calculated from the options. std::vector> m_OverridenOutputShapes; std::vector m_ConstantsToDequantize; std::vector m_ConstantsToBeCreated; }; }