// // Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #pragma once #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace armnn { class Graph; using NetworkImplPtr = std::unique_ptr; /// Private implementation of INetwork. class NetworkImpl { public: NetworkImpl(const NetworkOptions& networkOptions = {}); ~NetworkImpl(); const Graph& GetGraph() const { return *m_Graph; } Status PrintGraph(); IConnectableLayer* AddInputLayer(LayerBindingId id, const char* name = nullptr); IConnectableLayer* AddActivationLayer(const ActivationDescriptor& activationDescriptor, const char* name = nullptr); ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use AddElementwiseBinaryLayer instead", "24.02") IConnectableLayer* AddAdditionLayer(const char* name = nullptr); IConnectableLayer* AddArgMinMaxLayer(const ArgMinMaxDescriptor& desc, const char* name = nullptr); IConnectableLayer* AddBatchMatMulLayer(const BatchMatMulDescriptor& desc, const char* name = nullptr); IConnectableLayer* AddBatchNormalizationLayer(const BatchNormalizationDescriptor& desc, const ConstTensor& mean, const ConstTensor& variance, const ConstTensor& beta, const ConstTensor& gamma, const char* name = nullptr); IConnectableLayer* AddBatchToSpaceNdLayer(const BatchToSpaceNdDescriptor& batchToSpaceNdDescriptor, const char* name = nullptr); IConnectableLayer* AddBroadcastToLayer(const BroadcastToDescriptor& descriptor, const char* name = nullptr); IConnectableLayer* AddCastLayer(const char* name = nullptr); IConnectableLayer* AddChannelShuffleLayer(const ChannelShuffleDescriptor& channelShuffleDescriptor, const char* name = nullptr); IConnectableLayer* AddComparisonLayer(const ComparisonDescriptor& comparisonDescriptor, const char* name = nullptr); IConnectableLayer* AddConcatLayer(const ConcatDescriptor& concatDescriptor, const char* name = nullptr); IConnectableLayer* AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor, const char* name = nullptr); IConnectableLayer* AddConvolution3dLayer(const Convolution3dDescriptor& convolution3dDescriptor, const char* name = nullptr); IConnectableLayer* AddConstantLayer(const ConstTensor& input, const char* name = nullptr); IConnectableLayer* AddDepthToSpaceLayer(const DepthToSpaceDescriptor& depthToSpaceDescriptor, const char* name = nullptr); IConnectableLayer* AddDepthwiseConvolution2dLayer(const DepthwiseConvolution2dDescriptor& convolution2dDescriptor, const char* name = nullptr); IConnectableLayer* AddDequantizeLayer(const char* name = nullptr); IConnectableLayer* AddDetectionPostProcessLayer(const DetectionPostProcessDescriptor& descriptor, const ConstTensor& anchors, const char* name = nullptr); ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use AddElementwiseBinaryLayer instead", "24.02") IConnectableLayer* AddDivisionLayer(const char* name = nullptr); IConnectableLayer* AddElementwiseBinaryLayer(const ElementwiseBinaryDescriptor& elementwiseBinaryDescriptor, const char* name = nullptr); IConnectableLayer* AddElementwiseUnaryLayer(const ElementwiseUnaryDescriptor& elementwiseUnaryDescriptor, const char* name = nullptr); IConnectableLayer* AddMergeLayer(const char* name = nullptr); IConnectableLayer* AddFillLayer(const FillDescriptor& fillDescriptor, const char* name = nullptr); IConnectableLayer* AddFloorLayer(const char* name = nullptr); IConnectableLayer* AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor, const char* name = nullptr); IConnectableLayer* AddFusedLayer(const FusedDescriptor& fusedDescriptor, const char* name = nullptr); IConnectableLayer* AddGatherLayer(const GatherDescriptor& gatherDescriptor, const char* name = nullptr); IConnectableLayer* AddGatherNdLayer(const char* name = nullptr); IConnectableLayer* AddInstanceNormalizationLayer(const InstanceNormalizationDescriptor& desc, const char* name = nullptr); IConnectableLayer* AddL2NormalizationLayer(const L2NormalizationDescriptor& desc, const char* name = nullptr); IConnectableLayer* AddLogSoftmaxLayer(const LogSoftmaxDescriptor& logSoftmaxDescriptor, const char* name = nullptr); IConnectableLayer* AddLogicalBinaryLayer(const LogicalBinaryDescriptor& logicalBinaryDescriptor, const char* name = nullptr); IConnectableLayer* AddLstmLayer(const LstmDescriptor& descriptor, const LstmInputParams& params, const char* name = nullptr); ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use AddElementwiseBinaryLayer instead", "24.02") IConnectableLayer* AddMaximumLayer(const char* name = nullptr); IConnectableLayer* AddMeanLayer(const MeanDescriptor& meanDescriptor, const char* name = nullptr); ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use AddElementwiseBinaryLayer instead", "24.02") IConnectableLayer* AddMinimumLayer(const char* name = nullptr); ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use AddElementwiseBinaryLayer instead", "24.02") IConnectableLayer* AddMultiplicationLayer(const char* name = nullptr); IConnectableLayer* AddNormalizationLayer(const NormalizationDescriptor& normalizationDescriptor, const char* name = nullptr); IConnectableLayer* AddOutputLayer(LayerBindingId id, const char* name = nullptr); IConnectableLayer* AddPadLayer(const PadDescriptor& padDescriptor, const char* name = nullptr); IConnectableLayer* AddPermuteLayer(const PermuteDescriptor& permuteDescriptor, const char* name = nullptr); IConnectableLayer* AddPooling2dLayer(const Pooling2dDescriptor& pooling2dDescriptor, const char* name = nullptr); IConnectableLayer* AddPooling3dLayer(const Pooling3dDescriptor& pooling3dDescriptor, const char* name = nullptr); IConnectableLayer* AddPrecompiledLayer(const PreCompiledDescriptor& preCompiledDescriptor, CompiledBlobPtr compiledBlobPtr, const Optional& backend, const char* name = nullptr); IConnectableLayer* AddPreluLayer(const char* name = nullptr); IConnectableLayer* AddQuantizeLayer(const char* name = nullptr); IConnectableLayer* AddQLstmLayer(const QLstmDescriptor& descriptor, const LstmInputParams& params, const char* name = nullptr); IConnectableLayer* AddQuantizedLstmLayer(const QuantizedLstmInputParams& params, const char* name = nullptr); IConnectableLayer* AddRankLayer(const char* name = nullptr); IConnectableLayer* AddReduceLayer(const ReduceDescriptor& reduceDescriptor, const char* name = nullptr); IConnectableLayer* AddResizeLayer(const ResizeDescriptor& resizeDescriptor, const char* name = nullptr); IConnectableLayer* AddReshapeLayer(const ReshapeDescriptor& reshapeDescriptor, const char* name = nullptr); IConnectableLayer* AddReverseV2Layer(const char* name = nullptr); IConnectableLayer* AddShapeLayer(const char* name = nullptr); IConnectableLayer* AddSliceLayer(const SliceDescriptor& sliceDescriptor, const char* name = nullptr); IConnectableLayer* AddSoftmaxLayer(const SoftmaxDescriptor& softmaxDescriptor, const char* name = nullptr); IConnectableLayer* AddSplitterLayer(const ViewsDescriptor& splitterDescriptor, const char* name = nullptr); IConnectableLayer* AddSpaceToBatchNdLayer(const SpaceToBatchNdDescriptor& spaceToBatchNdDescriptor, const char* name = nullptr); IConnectableLayer* AddSpaceToDepthLayer(const SpaceToDepthDescriptor& spaceToDepthDescriptor, const char* name = nullptr); IConnectableLayer* AddStackLayer(const StackDescriptor& stackDescriptor, const char* name = nullptr); IConnectableLayer* AddStandInLayer(const StandInDescriptor& descriptor, const char* name = nullptr); IConnectableLayer* AddStridedSliceLayer(const StridedSliceDescriptor& stridedSliceDescriptor, const char* name = nullptr); ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use AddElementwiseBinaryLayer instead", "24.02") IConnectableLayer* AddSubtractionLayer(const char* name = nullptr); IConnectableLayer* AddSwitchLayer(const char* name = nullptr); IConnectableLayer* AddTileLayer(const TileDescriptor& tileDescriptor, const char* name = nullptr); IConnectableLayer* AddTransposeConvolution2dLayer(const TransposeConvolution2dDescriptor& descriptor, const ConstTensor& weights, const Optional& biases, const char* name = nullptr); IConnectableLayer* AddTransposeLayer(const TransposeDescriptor& transposeDescriptor, const char* name = nullptr); IConnectableLayer* AddUnidirectionalSequenceLstmLayer(const UnidirectionalSequenceLstmDescriptor& descriptor, const LstmInputParams& params, const char* name = nullptr); IConnectableLayer* AddConvertFp16ToFp32Layer(const char* name = nullptr); IConnectableLayer* AddConvertFp32ToFp16Layer(const char* name = nullptr); void ExecuteStrategy(IStrategy& strategy) const; private: bool GetShapeInferenceMethod(); bool GetAllowExpandedDims(); NetworkOptions m_NetworkOptions; std::unique_ptr m_Graph; ModelOptions m_ModelOptions; }; struct OptimizationResult { bool m_Warning; bool m_Error; OptimizationResult(bool warning, bool error) : m_Warning(warning), m_Error(error) {} OptimizationResult() : OptimizationResult(false, false) {} bool IsOk() const { return !m_Warning && !m_Error; } bool IsWarningOnly() const { return m_Warning && !m_Error; } bool IsError() const { return m_Error; } }; using BackendsMap = std::map>; BackendsMap CreateSupportedBackends(TensorHandleFactoryRegistry& handleFactoryRegistry, struct BackendSettings& backendSettings); OptimizationResult SelectTensorHandleStrategy(Graph& optGraph, BackendsMap& backends, TensorHandleFactoryRegistry& registry, bool importEnabled, bool exportEnabled, Optional&> errMessages); OptimizationResult AssignBackends(OptimizedNetworkImpl* optNetObjPtr, BackendSettings& backendSettings, Graph::Iterator& firstLayer, Graph::Iterator& lastLayer, Optional&> errMessages); OptimizationResult AssignBackends(OptimizedNetworkImpl* optNetObjPtr, BackendSettings& backendSettings, SubgraphView::IConnectableLayerIterator& firstLayer, SubgraphView::IConnectableLayerIterator& lastLayer, Optional&> errMessages); struct OptimizerOptionsOpaqueImpl { ~OptimizerOptionsOpaqueImpl() = default; explicit OptimizerOptionsOpaqueImpl() : m_ReduceFp32ToFp16(false) , m_Debug(false) , m_DebugToFile(false) , m_ReduceFp32ToBf16(false) , m_shapeInferenceMethod(armnn::ShapeInferenceMethod::ValidateOnly) , m_ImportEnabled(false) , m_ModelOptions() , m_ProfilingEnabled(false) , m_ExportEnabled(false) , m_AllowExpandedDims(false) { } explicit OptimizerOptionsOpaqueImpl(bool reduceFp32ToFp16, bool debug, bool reduceFp32ToBf16, bool importEnabled, ModelOptions modelOptions = {}, bool exportEnabled = false, bool debugToFile = false) : m_ReduceFp32ToFp16(reduceFp32ToFp16) , m_Debug(debug) , m_DebugToFile(debugToFile) , m_ReduceFp32ToBf16(reduceFp32ToBf16) , m_shapeInferenceMethod(armnn::ShapeInferenceMethod::ValidateOnly) , m_ImportEnabled(importEnabled) , m_ModelOptions(modelOptions) , m_ProfilingEnabled(false) , m_ExportEnabled(exportEnabled) , m_AllowExpandedDims(false) { } explicit OptimizerOptionsOpaqueImpl(bool reduceFp32ToFp16, bool debug, bool reduceFp32ToBf16, ShapeInferenceMethod shapeInferenceMethod, bool importEnabled, ModelOptions modelOptions, bool exportEnabled, bool debugToFile, bool allowExpandedDims) : m_ReduceFp32ToFp16(reduceFp32ToFp16) , m_Debug(debug) , m_DebugToFile(debugToFile) , m_ReduceFp32ToBf16(reduceFp32ToBf16) , m_shapeInferenceMethod(shapeInferenceMethod) , m_ImportEnabled(importEnabled) , m_ModelOptions(modelOptions) , m_ProfilingEnabled(false) , m_ExportEnabled(exportEnabled) , m_AllowExpandedDims(allowExpandedDims) { } /// Reduces all Fp32 operators in the model to Fp16 for faster processing. /// If the first preferred backend does not have Fp16 support, this option will be disabled. /// If the value of converted Fp16 is infinity, round to the closest finite Fp16 value. /// @Note This feature works best if all operators of the model are in Fp32. ArmNN will add conversion layers /// between layers that weren't in Fp32 in the first place or if the operator is not supported in Fp16. /// The overhead of these conversions can lead to a slower overall performance if too many conversions are /// required. bool m_ReduceFp32ToFp16 = false; /// Add debug data for easier troubleshooting bool m_Debug = false; /// Pass debug data to separate output files for easier troubleshooting bool m_DebugToFile = false; /// @Note This feature has been replaced by enabling Fast Math in compute library backend options. /// This is currently a placeholder option bool m_ReduceFp32ToBf16 = false; /// Infer output size when not available ShapeInferenceMethod m_shapeInferenceMethod = armnn::ShapeInferenceMethod::ValidateOnly; /// Enable Import bool m_ImportEnabled = false; /// Enable Model Options ModelOptions m_ModelOptions; /// Enable profiling dump of the optimizer phase bool m_ProfilingEnabled = false; /// Enable Export bool m_ExportEnabled = false; /// When calculating tensor sizes, dimensions of size == 1 will be ignored bool m_AllowExpandedDims = false; }; } // namespace armnn