// // Copyright © 2017 Arm Ltd. All rights reserved. // SPDX-License-Identifier: MIT // #include "Graph.hpp" #include "SubgraphView.hpp" #include "LayersFwd.hpp" #include #include #include #include #include #include #include #include #include #include #include namespace armnn { Graph::Graph(const Graph& other) : m_LayersInOrder(other.m_LayersInOrder) { std::unordered_map otherToClonedMap; for (auto&& otherLayer : other.m_Layers) { Layer* const layer = otherLayer->Clone(*this); otherToClonedMap.emplace(otherLayer, layer); } // Copies slot connections. for (auto&& otherLayer : other.m_Layers) { Layer* const thisLayer = otherToClonedMap[otherLayer]; auto outputSlot = thisLayer->BeginOutputSlots(); for (auto&& otherOutputSlot : otherLayer->GetOutputSlots()) { for (auto&& otherInputSlot : otherOutputSlot.GetConnections()) { const Layer& otherTgtLayer = otherInputSlot->GetOwningLayer(); Layer* const thisTgtLayer = otherToClonedMap[&otherTgtLayer]; InputSlot& inputSlot = thisTgtLayer->GetInputSlot(otherInputSlot->GetSlotIndex()); outputSlot->Connect(inputSlot); } outputSlot->SetTensorInfo(otherOutputSlot.GetTensorInfo()); ++outputSlot; } } } Status Graph::Print() const { if (m_Layers.empty()) { BOOST_LOG_TRIVIAL(info) << "\n Graph is empty.\n"; return Status::Success; } BOOST_LOG_TRIVIAL(info) << "\n"; BOOST_LOG_TRIVIAL(info) << "Walking Pattern: \n"; for (auto&& it : TopologicalSort()) { BOOST_LOG_TRIVIAL(info) << it->GetName() << ":" << GetLayerTypeAsCString(it->GetType()) << ":" << it->GetBackendId().Get(); } BOOST_LOG_TRIVIAL(info) << "\n\n"; return Status::Success; } Status Graph::SerializeToDot(std::ostream& stream) { { DotGraph graph(stream, "Optimized"); { // Default node attributes: DotDefaults nodes(stream, "node"); nodes.GetAttributeSet() .AddAttribute("shape", "record"); } { // Default edge attributes: DotDefaults edges(stream, "edge"); edges.GetAttributeSet() .AddAttribute("fontsize", 8) .AddAttribute("fontcolor", "blue") .AddAttribute("fontname", "arial-bold"); } // First declares the nodes. for (auto&& layer : m_Layers) { DotNode node(stream, layer->GetGuid(), GetLayerTypeAsCString(layer->GetType())); // Extracts the layer parameters. ParameterStringifyFunction extractParams = [&node](const std::string & name, const std::string & value){ node.GetContents().AddContent(name + " : " + value); }; layer->SerializeLayerParameters(extractParams); } // Second declares the edges. for (auto&& layer : m_Layers) { LayerGuid toId = layer->GetGuid(); for (unsigned int i=0;iGetNumInputSlots(); i++) { OutputSlot* outputSlot = static_cast(layer->GetInputSlot(i).GetConnection()); LayerGuid fromId = outputSlot->GetOwningLayer().GetGuid(); DotEdge edge(stream, fromId, toId); // Now print the tensor shape on the edge. { // Constructs the label attribute with HTML markup. std::stringstream ss; ss << "< " << outputSlot->GetTensorInfo().GetShape() << " >"; edge.GetAttributeSet().AddAttribute("label", ss); } } } } if (stream.bad()) { return Status::Failure; } return Status::Success; } Status Graph::AllocateDynamicBuffers() { // Layers must be sorted in topological order BOOST_ASSERT(m_LayersInOrder); std::unordered_set preallocatedTensors; std::unordered_map handleReferenceCounts; // Finds the first TensorHandle ancestor of a SubTensorHandle. If the ITensorHandle provided // is a TensorHandle, the function just returns it auto TraceSubTensorHandleAncestry = [](ITensorHandle* const subTensorHandle) { ITensorHandle* ancestor = subTensorHandle; while (ancestor && ancestor->GetParent()) { ancestor = ancestor->GetParent(); } return ancestor; }; // Checks whether a TensorHandle has been pre-allocated auto IsPreallocated = [&](ITensorHandle* const tensorHandle) { return tensorHandle && preallocatedTensors.find(tensorHandle) != preallocatedTensors.end(); }; // Constant tensor handles need to last from the beginning of execution till the end, // therefore we pre-allocate them upfront for (auto&& layer : m_Layers) { if (layer->GetType() == LayerType::Constant) { for (auto&& slot = layer->BeginOutputSlots(); slot != layer->EndOutputSlots(); ++slot) { ITensorHandle *tensorHandle = TraceSubTensorHandleAncestry(slot->GetOutputHandler().GetData()); if (tensorHandle && !IsPreallocated(tensorHandle)) { tensorHandle->Allocate(); preallocatedTensors.insert(tensorHandle); } } } } // Iterate over the network in topological order for (auto&& layer : m_Layers) { // Count the amount of times each output slot references a certain buffer (ITensorHandle). // The first time we encounter a new tensor handle, we start managing its lifetime. for (auto&& slot = layer->BeginOutputSlots(); slot != layer->EndOutputSlots(); ++slot) { ITensorHandle *tensorHandle = TraceSubTensorHandleAncestry(slot->GetOutputHandler().GetData()); if (tensorHandle && !IsPreallocated(tensorHandle)) { unsigned int numConnections = slot->GetNumConnections(); if (handleReferenceCounts.find(tensorHandle) == handleReferenceCounts.end()) { handleReferenceCounts[tensorHandle] = numConnections; tensorHandle->Manage(); } else { handleReferenceCounts[tensorHandle] += numConnections; } } } // Loop through the input slots in the same layer and decrement the reference counter associated // to each tensor handle we encounter. Once it reaches zero, we end the lifetime of the tensor handle for (auto&& slot = layer->BeginInputSlots(); slot != layer->EndInputSlots(); ++slot) { ITensorHandle *tensorHandle = TraceSubTensorHandleAncestry( slot->GetConnectedOutputSlot()->GetOutputHandler().GetData()); if (tensorHandle && !IsPreallocated(tensorHandle)) { --handleReferenceCounts[tensorHandle]; if (handleReferenceCounts[tensorHandle] == 0u) { // Stop managing lifetime of tensor handle tensorHandle->Allocate(); handleReferenceCounts.erase(tensorHandle); } } } } return Status::Success; } const Graph& Graph::TopologicalSort() const { if (!m_LayersInOrder) { // Resets layer order. for (auto&& it : m_Layers) { it->ResetPriority(); } auto compareLayerPriority = [](const LayerList::value_type& layerA, const LayerList::value_type& layerB) { return layerA->GetPriority() < layerB->GetPriority(); }; m_Layers.sort(compareLayerPriority); m_LayersInOrder = true; } return *this; } void Graph::AddCompatibilityLayers(std::map>& backends, TensorHandleFactoryRegistry& registry) { // Returns true if the given layer could potentially need an intermediate copy/import layer (depending on its // connections to other layers). auto MayNeedCompatibilityLayer = [](const Layer& layer) { // All layers should have been associated with a valid compute device at this point. BOOST_ASSERT(layer.GetBackendId() != Compute::Undefined); // Does not need another compatibility layer if a copy or import layer is already present. return layer.GetType() != LayerType::MemCopy && layer.GetType() != LayerType::MemImport; }; auto IsCompatibilityStrategy = [](EdgeStrategy strategy) { return strategy == EdgeStrategy::CopyToTarget || strategy == EdgeStrategy::ExportToTarget; }; ForEachLayer([this, &backends, ®istry, MayNeedCompatibilityLayer, IsCompatibilityStrategy](Layer* srcLayer) { BOOST_ASSERT(srcLayer); if (!MayNeedCompatibilityLayer(*srcLayer)) { // The current layer does not need copy layers, move to the next one return; } const std::vector& srcOutputSlots = srcLayer->GetOutputSlots(); for (unsigned int srcOutputIndex = 0; srcOutputIndex < srcOutputSlots.size(); srcOutputIndex++) { OutputSlot& srcOutputSlot = srcLayer->GetOutputSlot(srcOutputIndex); const std::vector srcConnections = srcOutputSlot.GetConnections(); const std::vector srcEdgeStrategies = srcOutputSlot.GetEdgeStrategies(); for (unsigned int srcConnectionIndex = 0; srcConnectionIndex < srcConnections.size(); srcConnectionIndex++) { InputSlot* dstInputSlot = srcConnections[srcConnectionIndex]; BOOST_ASSERT(dstInputSlot); EdgeStrategy strategy = srcEdgeStrategies[srcConnectionIndex]; BOOST_ASSERT_MSG(strategy != EdgeStrategy::Undefined, "Undefined memory strategy found while adding copy layers for compatibility"); const Layer& dstLayer = dstInputSlot->GetOwningLayer(); if (MayNeedCompatibilityLayer(dstLayer) && IsCompatibilityStrategy(strategy)) { // A copy layer is needed in between the source and destination layers. // Record the operation rather than attempting to modify the graph as we go. // (invalidating iterators) const std::string compLayerName = boost::str(boost::format("[ %1% (%2%) -> %3% (%4%) ]") % srcLayer->GetName() % srcOutputIndex % dstLayer.GetName() % dstInputSlot->GetSlotIndex()); Layer* compLayer = nullptr; if (strategy == EdgeStrategy::CopyToTarget) { compLayer = InsertNewLayer(*dstInputSlot, compLayerName.c_str()); } else { BOOST_ASSERT_MSG(strategy == EdgeStrategy::ExportToTarget, "Invalid edge strategy found."); compLayer = InsertNewLayer(*dstInputSlot, compLayerName.c_str()); } compLayer->SetBackendId(dstLayer.GetBackendId()); OutputSlot& compOutputSlot = compLayer->GetOutputSlot(0); auto backendIt = backends.find(dstLayer.GetBackendId()); if (backendIt != backends.end() && backendIt->second && backendIt->second->SupportsTensorAllocatorAPI()) { auto backend = backendIt->second.get(); auto tensorHandleFactoryIds = backend->GetHandleFactoryPreferences(); bool found = false; for (auto preference : tensorHandleFactoryIds) { auto factory = registry.GetFactory(preference); if (factory) { auto srcPref = srcOutputSlot.GetTensorHandleFactoryId(); auto srcFactory = registry.GetFactory(srcPref); if (srcFactory) { bool canExportImport = (factory->GetImportFlags() & srcFactory->GetExportFlags()) != 0; if (factory->SupportsMapUnmap() || canExportImport) { compOutputSlot.SetTensorHandleFactory(preference); found = true; break; } } } } if (!found) { compOutputSlot.SetTensorHandleFactory(ITensorHandleFactory::LegacyFactoryId); } } else { compOutputSlot.SetTensorHandleFactory(ITensorHandleFactory::LegacyFactoryId); } // The output strategy of a compatibility layer is always DirectCompatibility. compOutputSlot.SetEdgeStrategy(0, EdgeStrategy::DirectCompatibility); // Recalculate the connection index on the previous layer as we have just inserted into it. const std::vector& newSourceConnections = srcOutputSlot.GetConnections(); long newSrcConnectionIndex = std::distance(newSourceConnections.begin(), std::find(newSourceConnections.begin(), newSourceConnections.end(), &compLayer->GetInputSlot(0))); // The input strategy of a compatibility layer is always DirectCompatibilty. srcOutputSlot.SetEdgeStrategy(boost::numeric_cast(newSrcConnectionIndex), EdgeStrategy::DirectCompatibility); } } } }); } void Graph::SubstituteSubgraph(SubgraphView& subgraph, IConnectableLayer* substituteLayer) { BOOST_ASSERT(substituteLayer != nullptr); ReplaceSubgraphConnections(subgraph, substituteLayer); EraseSubgraphLayers(subgraph); } void Graph::SubstituteSubgraph(SubgraphView& subgraph, const SubgraphView& substituteSubgraph) { // Look through each layer in the new subgraph and add any that are not already a member of this graph substituteSubgraph.ForEachLayer([this](Layer* layer) { if (std::find(std::begin(m_Layers), std::end(m_Layers), layer) == std::end(m_Layers)) { layer->Reparent(*this, m_Layers.end()); m_LayersInOrder = false; } }); ReplaceSubgraphConnections(subgraph, substituteSubgraph); EraseSubgraphLayers(subgraph); TopologicalSort(); } void Graph::ReplaceSubgraphConnections(const SubgraphView& subgraph, IConnectableLayer* substituteLayer) { BOOST_ASSERT(substituteLayer != nullptr); // Create a new sub-graph with only the given layer, using // the given sub-graph as a reference of which parent graph to use SubgraphView substituteSubgraph(substituteLayer); ReplaceSubgraphConnections(subgraph, substituteSubgraph); } void Graph::ReplaceSubgraphConnections(const SubgraphView& subgraph, const SubgraphView& substituteSubgraph) { BOOST_ASSERT_MSG(!substituteSubgraph.GetLayers().empty(), "New sub-graph used for substitution must not be empty"); const SubgraphView::Layers& substituteSubgraphLayers = substituteSubgraph.GetLayers(); std::for_each(substituteSubgraphLayers.begin(), substituteSubgraphLayers.end(), [&](Layer* layer) { BOOST_ASSERT_MSG(std::find(m_Layers.begin(), m_Layers.end(), layer) != m_Layers.end(), "Substitute layer is not a member of graph"); }); const SubgraphView::InputSlots& subgraphInputSlots = subgraph.GetInputSlots(); const SubgraphView::OutputSlots& subgraphOutputSlots = subgraph.GetOutputSlots(); unsigned int subgraphNumInputSlots = boost::numeric_cast(subgraphInputSlots.size()); unsigned int subgraphNumOutputSlots = boost::numeric_cast(subgraphOutputSlots.size()); const SubgraphView::InputSlots& substituteSubgraphInputSlots = substituteSubgraph.GetInputSlots(); const SubgraphView::OutputSlots& substituteSubgraphOutputSlots = substituteSubgraph.GetOutputSlots(); BOOST_ASSERT(subgraphNumInputSlots == substituteSubgraphInputSlots.size()); BOOST_ASSERT(subgraphNumOutputSlots == substituteSubgraphOutputSlots.size()); // Disconnect the sub-graph and replace it with the substitute sub-graph // Step 1: process input slots for (unsigned int inputSlotIdx = 0; inputSlotIdx < subgraphNumInputSlots; ++inputSlotIdx) { InputSlot* subgraphInputSlot = subgraphInputSlots.at(inputSlotIdx); BOOST_ASSERT(subgraphInputSlot); IOutputSlot* connectedOutputSlot = subgraphInputSlot->GetConnection(); BOOST_ASSERT(connectedOutputSlot); connectedOutputSlot->Disconnect(*subgraphInputSlot); IInputSlot* substituteInputSlot = substituteSubgraphInputSlots.at(inputSlotIdx); BOOST_ASSERT(substituteInputSlot); connectedOutputSlot->Connect(*substituteInputSlot); } // Step 2: process output slots for(unsigned int outputSlotIdx = 0; outputSlotIdx < subgraphNumOutputSlots; ++outputSlotIdx) { OutputSlot* subgraphOutputSlot = subgraphOutputSlots.at(outputSlotIdx); BOOST_ASSERT(subgraphOutputSlot); OutputSlot* substituteOutputSlot = substituteSubgraphOutputSlots.at(outputSlotIdx); BOOST_ASSERT(substituteOutputSlot); subgraphOutputSlot->MoveAllConnections(*substituteOutputSlot); } } void Graph::EraseSubgraphLayers(SubgraphView &subgraph) { for (auto layer : subgraph.GetLayers()) { EraseLayer(layer); } subgraph.Clear(); } void Graph::InferTensorInfos() { for (auto&& layer : TopologicalSort()) { for (auto&& input : layer->GetInputSlots()) { boost::ignore_unused(input); BOOST_ASSERT_MSG(input.GetConnectedOutputSlot()->IsTensorInfoSet(), "All inputs must have the TensorInfo set at this point."); } layer->ValidateTensorShapesFromInputs(); } } } // namespace armnn