diff options
-rw-r--r-- | src/armnn/SubgraphViewSelector.cpp | 297 | ||||
-rw-r--r-- | src/armnn/SubgraphViewSelector.hpp | 4 | ||||
-rw-r--r-- | src/armnn/test/SubgraphViewTests.cpp | 524 | ||||
-rw-r--r-- | src/backends/backendsCommon/test/OptimizeSubgraphViewTests.cpp | 75 |
4 files changed, 747 insertions, 153 deletions
diff --git a/src/armnn/SubgraphViewSelector.cpp b/src/armnn/SubgraphViewSelector.cpp index 4357ec4381..8798b7285d 100644 --- a/src/armnn/SubgraphViewSelector.cpp +++ b/src/armnn/SubgraphViewSelector.cpp @@ -9,6 +9,7 @@ #include <algorithm> #include <map> #include <queue> +#include <unordered_set> namespace armnn { @@ -16,28 +17,170 @@ namespace armnn namespace { +/// Intermediate data-structure to store the subgraph that a layer has been assigned to. +/// This is a "disjoint set" data structure that allows efficient merging of subgraphs, +/// which is a key part of the algorithm. Subgraphs are arranged in singly-linked trees +/// (with each node storing a pointer to its parent). Subgraphs in the same tree are considered +/// to have been merged. Merging subgraphs is performed by attaching one tree to another, +/// which is a simple pointer update. +/// +/// NOTE: Due to the way this is stored, it is almost never correct to directly compare pointers +/// to two PartialSubgraphs to check if two layers belong in the same subgraph. Instead you +/// should use IsMergedWith(). +/// +/// This structure also stores information about the dependencies of each subgraph, which is needed +/// to determine whether certain subgraphs can be merged. Checking whether a subgraph +/// depends on another subgraph is a frequent operation in the algorithm (see AssignSplitId) and so this is optimized +/// in preference to the merging of subgraphs. This leads to an approach where each subgraph stores +/// a set of all the subgraphs it depends on (for a fast lookup). In order to efficiently update this +/// set as subgraphs are merged means we also store a set of subgraphs which *depend on us* (i.e. the +/// complement of our dependencies). +class PartialSubgraph +{ +public: + /// If this subgraph has been merged with another then there is an agreed "representative" for the combined + /// subgraph, which uniquely identifies the subgraph. + PartialSubgraph* GetRepresentative() + { + // Recurse up the tree to find the root node. + if (m_Parent == nullptr) + { + return this; + } + else + { + PartialSubgraph* result = m_Parent->GetRepresentative(); + // Update our parent pointer to point directly to the root in order to speed up future calls to this method. + // This essentially "flattens" the tree. + m_Parent = result; + return result; + } + } + + /// Merges this subgraph with another. + void MergeWith(PartialSubgraph* other) + { + if (m_Parent == nullptr) + { + other = other->GetRepresentative(); + if (this == other) + { + // Already merged - no-op + return; + } + m_Parent = other; + + // Update others' dependency sets to point to the new representative rather than us. + // Keeping these up-to-date means we can rely on these sets containing representatives when + // we perform a lookup in HasAntecedent() and so don't need to resolve the representative for each element + // of the set. See description at the top of this class for more rationale. + for (PartialSubgraph* a : m_Antecedents) + { + size_t numErased = a->m_Dependants.erase(this); + BOOST_ASSERT(numErased == 1); + boost::ignore_unused(numErased); + a->m_Dependants.insert(m_Parent); + } + for (PartialSubgraph* a : m_Dependants) + { + size_t numErased = a->m_Antecedents.erase(this); + BOOST_ASSERT(numErased == 1); + boost::ignore_unused(numErased); + a->m_Antecedents.insert(m_Parent); + } + + // Merge our dependency sets into our new representative. + // We no longer need to maintain our own sets, as requests will always be forwarded to the representative. + m_Parent->m_Antecedents.insert(m_Antecedents.begin(), m_Antecedents.end()); + m_Antecedents.clear(); + m_Parent->m_Dependants.insert(m_Dependants.begin(), m_Dependants.end()); + m_Dependants.clear(); + } + else + { + // Defer request to the representative + GetRepresentative()->MergeWith(other); + } + } + + /// Checks if this subgraph has been merged with the given subgraph. + bool IsMergedWith(PartialSubgraph* other) + { + return GetRepresentative() == other->GetRepresentative(); + } + + /// Marks the given subgraph as a direct antecedent (dependency) of this one. + void AddDirectAntecedent(PartialSubgraph* antecedent) + { + if (m_Parent == nullptr) + { + antecedent = antecedent->GetRepresentative(); + + m_Antecedents.insert(antecedent); + // Also record all of its antecedents, so that we end up with direct and indirect antecedents. + // This makes the lookup in HasAntecedent() faster. + m_Antecedents.insert(antecedent->m_Antecedents.begin(), antecedent->m_Antecedents.end()); + // All of our dependents also need to include the new antecedents + for (PartialSubgraph* d : m_Dependants) + { + d->m_Antecedents.insert(antecedent); + d->m_Antecedents.insert(antecedent->m_Antecedents.begin(), antecedent->m_Antecedents.end()); + } + + // Store reverse dependencies as well, required so that we can efficiently navigate the graph + // when making updates. + antecedent->m_Dependants.insert(this); + antecedent->m_Dependants.insert(m_Dependants.begin(), m_Dependants.end()); + for (PartialSubgraph* a : antecedent->m_Antecedents) + { + a->m_Dependants.insert(this); + a->m_Dependants.insert(m_Dependants.begin(), m_Dependants.end()); + } + } + else + { + // Defer request to the representative + GetRepresentative()->AddDirectAntecedent(antecedent); + } + } + + /// Checks if this subgraph is dependent on the given subgraph, either directly or indirectly. + bool HasAntecedent(PartialSubgraph* antecedent) + { + if (m_Parent == nullptr) + { + antecedent = antecedent->GetRepresentative(); + // Thanks to keeping this set updated in MergeWith and AddDirectAntecedent, we can do an efficient lookup. + return m_Antecedents.count(antecedent) > 0; + } + else + { + // Defer request to the representative + return GetRepresentative()->HasAntecedent(antecedent); + } + } + +private: + /// Pointer to the parent node in the tree. If this is null then we are the representative for our merged subgraph. + PartialSubgraph* m_Parent; + /// The representatives of all the subgraphs which we depend on, either directly or indirectly. + std::unordered_set<PartialSubgraph*> m_Antecedents; + /// The representatives of all the subgraphs which depend on us, either directly or indirectly. + std::unordered_set<PartialSubgraph*> m_Dependants; +}; + +/// Intermediate data structure to store information associated with a particular layer. struct LayerSelectionInfo { - using SplitId = uint32_t; using LayerInfoContainer = std::map<Layer*, LayerSelectionInfo>; using LayerInfoQueue = std::queue<LayerSelectionInfo*>; - static constexpr uint32_t InitialSplitId() { return 1; } LayerSelectionInfo(Layer* layer, const SubgraphViewSelector::LayerSelectorFunction& selector) : m_Layer{layer} - , m_SplitId{0} + , m_Subgraph{nullptr} , m_IsSelected{selector(*layer)} , m_IsProcessed(false) { - // fill topology information by storing direct children - for (auto&& slot = m_Layer->BeginOutputSlots(); slot != m_Layer->EndOutputSlots(); ++slot) - { - for (InputSlot* childLayerInputSlot : slot->GetConnections()) - { - Layer& childLayer = childLayerInputSlot->GetOwningLayer(); - m_DirectChildren.push_back(&childLayer); - } - } } bool IsInputLayer() const @@ -57,7 +200,7 @@ struct LayerSelectionInfo Layer& parentLayer = parentLayerOutputSlot->GetOwningLayer(); auto parentInfo = layerInfos.find(&parentLayer); if (parentInfo == layerInfos.end() || - m_SplitId != parentInfo->second.m_SplitId) + !m_Subgraph->IsMergedWith(parentInfo->second.m_Subgraph.get())) { // Avoid collecting duplicate input slots InputSlot* inputSlot = &(*slot); @@ -80,7 +223,7 @@ struct LayerSelectionInfo Layer& childLayer = childLayerInputSlot->GetOwningLayer(); auto childInfo = layerInfos.find(&childLayer); if (childInfo == layerInfos.end() || - m_SplitId != childInfo->second.m_SplitId) + !m_Subgraph->IsMergedWith(childInfo->second.m_Subgraph.get())) { // Avoid collecting duplicate output slots OutputSlot* outputSlot = &(*slot); @@ -93,9 +236,11 @@ struct LayerSelectionInfo } } - std::vector<Layer*> m_DirectChildren; Layer* m_Layer; - SplitId m_SplitId; + /// Which subgraph this layer has been assigned to. Only valid once m_IsProcessed is true. + /// Two layers with different m_Subgraph pointers may in fact have been merged into the same subgraph - + /// see the description of the PartialSubgraph class. + std::shared_ptr<PartialSubgraph> m_Subgraph; bool m_IsSelected; bool m_IsProcessed; }; @@ -155,48 +300,67 @@ void ForEachLayerOutput(LayerSelectionInfo::LayerInfoContainer& layerInfos, void AssignSplitId(LayerSelectionInfo::LayerInfoContainer& layerInfos, LayerSelectionInfo& layerInfo) { - bool newSplit = false; - LayerSelectionInfo::SplitId minSplitId = std::numeric_limits<LayerSelectionInfo::SplitId>::max(); - LayerSelectionInfo::SplitId maxSplitId = std::numeric_limits<LayerSelectionInfo::SplitId>::lowest(); - LayerSelectionInfo::SplitId maxSelectableId = std::numeric_limits<LayerSelectionInfo::SplitId>::lowest(); - - ForEachLayerInput(layerInfos, layerInfo, [&newSplit, &minSplitId, &maxSplitId, &maxSelectableId, &layerInfo]( - LayerSelectionInfo& parentInfo) + // Check each input to see if we can attach ourselves to any of the subgraphs that have already been assigned. + ForEachLayerInput(layerInfos, layerInfo, [&](LayerSelectionInfo& parentInfo) + { + // We can only attach ourselves to the subgraph from this input if there isn't a cut here. + if (layerInfo.m_IsSelected == parentInfo.m_IsSelected) { - minSplitId = std::min(minSplitId, parentInfo.m_SplitId); - maxSplitId = std::max(maxSplitId, parentInfo.m_SplitId); - if (parentInfo.m_IsSelected && layerInfo.m_IsSelected) + // We also need to check that merging into this subgraph won't cause a dependency cycle between subgraphs. + // This will be the case if the subgraph that we will become part of is already a dependency + // of one of the subgraphs that are input to this layer, e.g: + // + // 0 | The numbers (0, 1) are the subgraph IDs of each layer and we are looking at layer X. + // / \ | + // 1 0 | We can't merge X into subgraph 0, because the left-hand input already depends on subgraph 0. + // \ / | We can however merge X into subgraph 1. + // X | + // + bool dependenciesOk = true; + ForEachLayerInput(layerInfos, layerInfo, [&](LayerSelectionInfo& otherParentInfo) { - maxSelectableId = std::max(maxSelectableId, parentInfo.m_SplitId); - } + // We call HasAntecedent() ~ n^2 times, where n is the number of inputs to this layer. + // Hence it is important that this is efficient - see PartialSubgraph class description. + if (otherParentInfo.m_Subgraph->HasAntecedent(parentInfo.m_Subgraph.get())) + { + dependenciesOk = false; + } + }); - if (layerInfo.m_IsSelected != parentInfo.m_IsSelected) + if (dependenciesOk) { - newSplit = true; + // Merge into the subgraph of this input. If we have already been merged into another subgraph + // (from another input of this layer), then merge both of them together. + if (layerInfo.m_Subgraph == nullptr) + { + layerInfo.m_Subgraph = parentInfo.m_Subgraph; + } + else + { + // We call MergeWith() ~ n times, where n is the number of inputs to this layer. + // Therefore it does not need to be as performant as HasAntecedent(). + layerInfo.m_Subgraph->MergeWith(parentInfo.m_Subgraph.get()); + } } + } + }); - }); + // If we weren't able to merge into an existing subgraph then we need to make a new one + if (layerInfo.m_Subgraph == nullptr) + { + layerInfo.m_Subgraph = std::make_shared<PartialSubgraph>(); + } - // Assign the split Id for the current layerInfo - if (newSplit) + // Record dependencies of the chosen subgraph based on the inputs of this layer. + ForEachLayerInput(layerInfos, layerInfo, [&](LayerSelectionInfo& parentInfo) { - if (maxSelectableId > minSplitId) + // These functions are called ~n times, where n is the number of inputs to this layer. + // Therefore it does not need to be as performant as HasAntecedent(). + if (!layerInfo.m_Subgraph->IsMergedWith(parentInfo.m_Subgraph.get())) { - // We can be overly aggressive when choosing to create a new split so - // here we determine if one of the parent branches are suitable candidates for continuation instead. - // Any splitId > minSplitId will come from a shorter branch...and therefore should not be from - // the split containing the original fork and thus we avoid the execution dependency. - layerInfo.m_SplitId = maxSelectableId; + layerInfo.m_Subgraph->AddDirectAntecedent(parentInfo.m_Subgraph.get()); } - else - { - layerInfo.m_SplitId = ++maxSplitId; - } - } else - { - // The branch with the highest splitId represents the shortest path of selected nodes. - layerInfo.m_SplitId = maxSplitId; - } + }); } bool IsReadyForSplitAssignment(LayerSelectionInfo::LayerInfoContainer& layerInfos, LayerSelectionInfo& layerInfo) @@ -249,7 +413,6 @@ SubgraphViewSelector::SelectSubgraphs(SubgraphView& subgraph, const LayerSelecto // This layerInfo may have been added to the queue multiple times, so skip if we have already processed it if (!layerInfo.m_IsProcessed) { - // Only process this layerInfo if all inputs have been processed if (!IsReadyForSplitAssignment(layerInfos, layerInfo)) { @@ -272,17 +435,18 @@ SubgraphViewSelector::SelectSubgraphs(SubgraphView& subgraph, const LayerSelecto } } - // Collect all selected layers keyed by split id into a map + // Collect all selected layers keyed by subgraph representative into a map using SelectionInfoPtrs = std::vector<LayerSelectionInfo*>; - std::map<uint32_t, SelectionInfoPtrs> splitMap; + std::map<PartialSubgraph*, SelectionInfoPtrs> splitMap; for (auto& info : layerInfos) { if (info.second.m_IsSelected) { - auto it = splitMap.find(info.second.m_SplitId); + auto it = splitMap.find(info.second.m_Subgraph->GetRepresentative()); if (it == splitMap.end()) { - splitMap.insert(std::make_pair(info.second.m_SplitId, SelectionInfoPtrs{&info.second})); + splitMap.insert( + std::make_pair(info.second.m_Subgraph->GetRepresentative(), SelectionInfoPtrs{&info.second})); } else { @@ -291,26 +455,23 @@ SubgraphViewSelector::SelectSubgraphs(SubgraphView& subgraph, const LayerSelecto } } - // Now each non-empty split id represents a subgraph + // Now each entry in splitMap represents a subgraph Subgraphs result; for (auto& splitGraph : splitMap) { - if (splitGraph.second.empty() == false) + SubgraphView::InputSlots inputs; + SubgraphView::OutputSlots outputs; + SubgraphView::Layers layers; + for (auto&& infoPtr : splitGraph.second) { - SubgraphView::InputSlots inputs; - SubgraphView::OutputSlots outputs; - SubgraphView::Layers layers; - for (auto&& infoPtr : splitGraph.second) - { - infoPtr->CollectNonSelectedInputs(layerInfos, inputs); - infoPtr->CollectNonSelectedOutputSlots(layerInfos, outputs); - layers.push_back(infoPtr->m_Layer); - } - // Create a new sub-graph with the new lists of input/output slots and layer - result.emplace_back(std::make_unique<SubgraphView>(std::move(inputs), - std::move(outputs), - std::move(layers))); + infoPtr->CollectNonSelectedInputs(layerInfos, inputs); + infoPtr->CollectNonSelectedOutputSlots(layerInfos, outputs); + layers.push_back(infoPtr->m_Layer); } + // Create a new sub-graph with the new lists of input/output slots and layer + result.emplace_back(std::make_unique<SubgraphView>(std::move(inputs), + std::move(outputs), + std::move(layers))); } return result; diff --git a/src/armnn/SubgraphViewSelector.hpp b/src/armnn/SubgraphViewSelector.hpp index 9d881faa7c..d4ed8f799a 100644 --- a/src/armnn/SubgraphViewSelector.hpp +++ b/src/armnn/SubgraphViewSelector.hpp @@ -14,6 +14,10 @@ namespace armnn class Layer; class Graph; +/// Algorithm that splits a Graph into Subgraphs based on a filtering of layers (e.g. which layers are appropriate for +/// a certain backend). The resulting subgraphs are guaranteed to be form a DAG (i.e. there are no dependency loops). +/// +/// The algorithm aims to produce as few subgraphs as possible. class SubgraphViewSelector final { public: diff --git a/src/armnn/test/SubgraphViewTests.cpp b/src/armnn/test/SubgraphViewTests.cpp index 3e762e2de5..0f2c5b3617 100644 --- a/src/armnn/test/SubgraphViewTests.cpp +++ b/src/armnn/test/SubgraphViewTests.cpp @@ -11,7 +11,11 @@ #include <SubgraphViewSelector.hpp> #include <backendsCommon/CpuTensorHandle.hpp> - +#include <fstream> +#include <map> +#include <queue> +#include <random> +#include <chrono> using namespace armnn; namespace @@ -513,16 +517,60 @@ BOOST_AUTO_TEST_CASE(MultipleLayersSelectedInTheMiddle) } } +BOOST_AUTO_TEST_CASE(DisjointGraphs) +{ + // The input graph has two disjoint sections and all layers are selected. + // This should result in two subgraphs being produced. + Graph graph; + + // the graph is constructed in reverse order + auto o0 = graph.AddLayer<OutputLayer>(0, "output0"); + auto n0 = graph.InsertNewLayer<ActivationLayer>(o0->GetInputSlot(0), ActivationDescriptor{}, "intermediate0"); + auto i0 = graph.InsertNewLayer<InputLayer>(n0->GetInputSlot(0), 0, "input0"); + + auto o1 = graph.AddLayer<OutputLayer>(1, "output1"); + auto n1 = graph.InsertNewLayer<ActivationLayer>(o1->GetInputSlot(0), ActivationDescriptor{}, "intermediate1"); + auto i1 = graph.InsertNewLayer<InputLayer>(n1->GetInputSlot(0), 1, "input1"); + + SubgraphViewSelector::Subgraphs subgraphs = + SubgraphViewSelector::SelectSubgraphs(graph, + // select the middle layers only + [](const Layer& l) { + return true; + }); + + // expected results to test against + auto expected1 = CreateSubgraphViewFrom({}, {}, { o0, n0, i0 }); + auto expected2 = CreateSubgraphViewFrom({}, {}, { o1, n1, i1 }); + BOOST_TEST(subgraphs.size() == 2); + if (subgraphs.size() == 2) + { + BOOST_TEST((subgraphs[0] != nullptr)); + BOOST_TEST((subgraphs[1] != nullptr)); + if (subgraphs[0].get() != nullptr && subgraphs[1].get() != nullptr) + { + if (std::find(subgraphs[0]->GetLayers().begin(), subgraphs[0]->GetLayers().end(), i0) != + subgraphs[0]->GetLayers().end()) + { + CompareSubgraphViews(subgraphs[0], expected1); + CompareSubgraphViews(subgraphs[1], expected2); + } + else + { + CompareSubgraphViews(subgraphs[0], expected2); + CompareSubgraphViews(subgraphs[1], expected1); + } + } + } +} + BOOST_AUTO_TEST_CASE(IslandInTheMiddle) { // This case represent the scenario when a non-selected X1 node placed in the middle - // of the selected M* nodes: - // - // X0 -> M1 -> M2 -> M3 -> X2 - // X0 -> M4 -> X1 -> M5 -> X2 - // + // of the selected M* nodes. + // This checks that we don't merge M6 and M3 and create a dependency loop. /* - X0 + M0 / \ M1 M4 | | @@ -530,59 +578,56 @@ BOOST_AUTO_TEST_CASE(IslandInTheMiddle) | | M3 M5 \ / - X2 + M6 */ - // The expected result for this is that M1,M2,M3,M4 will be part of one subgraph and - // M5 will be part of another subgraph and the input and output slots in the subgraphs - // will be set accordingly. - // Graph graph; OriginsDescriptor concatDescriptor(2); - auto x2 = graph.AddLayer<ConcatLayer>(concatDescriptor, "x2"); - auto m3 = graph.InsertNewLayer<ActivationLayer>(x2->GetInputSlot(0), - ActivationDescriptor{}, - "m3"); + auto m6 = graph.AddLayer<ConcatLayer>(concatDescriptor, "m6"); + auto m3 = graph.InsertNewLayer<ActivationLayer>(m6->GetInputSlot(0), + ActivationDescriptor{}, + "m3"); auto m2 = graph.InsertNewLayer<ActivationLayer>(m3->GetInputSlot(0), - ActivationDescriptor{}, - "m2"); + ActivationDescriptor{}, + "m2"); auto m1 = graph.InsertNewLayer<ActivationLayer>(m2->GetInputSlot(0), - ActivationDescriptor{}, - "m1"); - auto x0 = graph.InsertNewLayer<InputLayer>(m1->GetInputSlot(0), 0, "x0"); - - auto m5 = graph.InsertNewLayer<ActivationLayer>(x2->GetInputSlot(1), - ActivationDescriptor{}, - "m5"); - auto x1 = graph.InsertNewLayer<Convolution2dLayer>(m5->GetInputSlot(0), - Convolution2dDescriptor{}, - "x1"); + ActivationDescriptor{}, + "m1"); + auto m0 = graph.InsertNewLayer<InputLayer>(m1->GetInputSlot(0), 0, "m0"); + + auto m5 = graph.InsertNewLayer<ActivationLayer>(m6->GetInputSlot(1), + ActivationDescriptor{}, + "m5"); + auto x1 = graph.InsertNewLayer<ActivationLayer>(m5->GetInputSlot(0), + ActivationDescriptor{}, + "x1"); auto m4 = graph.InsertNewLayer<ActivationLayer>(x1->GetInputSlot(0), - ActivationDescriptor{}, - "m4"); + ActivationDescriptor{}, + "m4"); // Connect the other branch to the input layer - x0->GetOutputSlot(0).Connect(m4->GetInputSlot(0)); + m0->GetOutputSlot(0).Connect(m4->GetInputSlot(0)); // All selected 'M*' layers will be of Activation type SubgraphViewSelector::Subgraphs subgraphs = SubgraphViewSelector::SelectSubgraphs( graph, // select the middle layers only - [](const Layer & l) - { - bool toSelect = (l.GetType() == LayerType::Activation); - return toSelect; - }); + [](const Layer& l) + { + bool toSelect = std::string(l.GetName())[0] == 'm'; + return toSelect; + }); // expected results to test against - auto largerSubgraph = CreateSubgraphViewFrom(CreateInputsFrom({m1, m4}), - CreateOutputsFrom({m3, m4}), - {m1, m4, m2, m3}); + auto largerSubgraph = CreateSubgraphViewFrom(CreateInputsFrom({ m0 }), + CreateOutputsFrom({ m3, m4 }), + { m0, m1, m2, m3, m4 }); - auto smallerSubgraph = CreateSubgraphViewFrom(CreateInputsFrom({m5}), - CreateOutputsFrom({m5}), - {m5}); + auto smallerSubgraph = + CreateSubgraphViewFrom(std::vector<InputSlot*>{ &m5->GetInputSlot(0), & m6->GetInputSlot(0) }, + std::vector<OutputSlot*>{}, + { m5, m6 }); BOOST_TEST(subgraphs.size() == 2); if (subgraphs.size() == 2) @@ -595,15 +640,14 @@ BOOST_AUTO_TEST_CASE(IslandInTheMiddle) { // sort the subgraphs by layer size, so it is simpler to test std::sort(subgraphs.begin(), subgraphs.end(), - [](SubgraphViewSelector::SubgraphViewPtr & lhs, SubgraphViewSelector::SubgraphViewPtr & rhs) - { - return (lhs->GetLayers().size() < rhs->GetLayers().size()); - } + [](SubgraphViewSelector::SubgraphViewPtr& lhs, SubgraphViewSelector::SubgraphViewPtr& rhs) + { + return (lhs->GetLayers().size() < rhs->GetLayers().size()); + } ); - // one subgraph needs to be size=1 and the other one is 4 - BOOST_TEST(subgraphs[0]->GetLayers().size() == 1); - BOOST_TEST(subgraphs[1]->GetLayers().size() == 4); + BOOST_TEST(subgraphs[0]->GetLayers().size() == 2); + BOOST_TEST(subgraphs[1]->GetLayers().size() == 5); CompareSubgraphViews(subgraphs[0], smallerSubgraph); CompareSubgraphViews(subgraphs[1], largerSubgraph); @@ -804,7 +848,7 @@ BOOST_AUTO_TEST_CASE(SingleInputMultiOutput) Layer* layerX2 = graph.AddLayer<OutputLayer>(0, "layerX2"); Layer* layerX3 = graph.AddLayer<OutputLayer>(1, "layerX3"); - // X2 + // X1 // | // M1 // /| @@ -907,6 +951,386 @@ BOOST_AUTO_TEST_CASE(MultiInputMultiOutput) } } +BOOST_AUTO_TEST_CASE(ValidMerge) +{ + // Checks that a node that has multiple choices for merge candidates (M3 in this case) correctly merges with the + // one that it can (M0), and doesn't merge with the ones it can't (X2 and M2). + // + // X1 + // | + // M1 + // / \' + // X2 M2 M0 + // \ | / + // M3 + // + Graph graph; + + ActivationDescriptor activationDefaults; + OriginsDescriptor concatDescriptor(3); + + auto x1 = graph.AddLayer<InputLayer>(0, "x1"); + auto x2 = graph.AddLayer<ActivationLayer>(activationDefaults, "x2"); + auto m0 = graph.AddLayer<InputLayer>(1, "m0"); + auto m1 = graph.AddLayer<ActivationLayer>(activationDefaults, "m1"); + auto m2 = graph.AddLayer<ActivationLayer>(activationDefaults, "m2"); + auto m3 = graph.AddLayer<ConcatLayer>(concatDescriptor, "m3"); + + x1->GetOutputSlot(0).Connect(m1->GetInputSlot(0)); + m1->GetOutputSlot(0).Connect(x2->GetInputSlot(0)); + m1->GetOutputSlot(0).Connect(m2->GetInputSlot(0)); + x2->GetOutputSlot(0).Connect(m3->GetInputSlot(0)); + m2->GetOutputSlot(0).Connect(m3->GetInputSlot(1)); + m0->GetOutputSlot(0).Connect(m3->GetInputSlot(2)); + + SubgraphViewSelector::Subgraphs subgraphs = SubgraphViewSelector::SelectSubgraphs( + graph, + [](const Layer& l) { + return std::string(l.GetName())[0] == 'm'; + }); + + // expected results to test against + auto expectedSubgraph0 = + CreateSubgraphViewFrom( + CreateInputsFrom({ m1 }), + std::vector<OutputSlot*>{ &m1->GetOutputSlot(0), &m2->GetOutputSlot(0) }, + { m1, m2 }); + + auto expectedSubgraph1 = CreateSubgraphViewFrom( + std::vector<InputSlot*>{ &m3->GetInputSlot(0), & m3->GetInputSlot(1) }, + CreateOutputsFrom({ }), + { m0, m3 }); + + BOOST_TEST(subgraphs.size() == 2); + if (subgraphs.size() == 2) + { + // we need to have valid subgraph pointers here + BOOST_TEST((subgraphs[0] != nullptr)); + BOOST_TEST((subgraphs[1] != nullptr)); + + if (subgraphs[0].get() != nullptr && subgraphs[1].get() != nullptr) + { + if (subgraphs[0]->GetInputSlots().size() == 1) + { + CompareSubgraphViews(subgraphs[0], expectedSubgraph0); + CompareSubgraphViews(subgraphs[1], expectedSubgraph1); + } + else + { + CompareSubgraphViews(subgraphs[0], expectedSubgraph1); + CompareSubgraphViews(subgraphs[1], expectedSubgraph0); + } + } + } +} + +BOOST_AUTO_TEST_CASE(PropagatedDependencies) +{ + // Version of IslandInTheMiddle with longer chain + // to make sure antecedents are propagated. + /* + M0 + / \ + M1 M4 + | | + M2 X1 < the island in the middle ! + | | + | M10 + | | + | X2 < another island in the middle ! + | | + M3 M5 + \ / + M6 + */ + Graph graph; + + OriginsDescriptor concatDescriptor(2); + auto m6 = graph.AddLayer<ConcatLayer>(concatDescriptor, "m6"); + auto m3 = graph.InsertNewLayer<ActivationLayer>(m6->GetInputSlot(0), + ActivationDescriptor{}, + "m3"); + auto m2 = graph.InsertNewLayer<ActivationLayer>(m3->GetInputSlot(0), + ActivationDescriptor{}, + "m2"); + auto m1 = graph.InsertNewLayer<ActivationLayer>(m2->GetInputSlot(0), + ActivationDescriptor{}, + "m1"); + auto m0 = graph.InsertNewLayer<InputLayer>(m1->GetInputSlot(0), 0, "m0"); + + auto m5 = graph.InsertNewLayer<ActivationLayer>(m6->GetInputSlot(1), + ActivationDescriptor{}, + "m5"); + auto x2 = graph.InsertNewLayer<ActivationLayer>(m5->GetInputSlot(0), ActivationDescriptor{}, "x2"); + auto m10 = graph.InsertNewLayer<ActivationLayer>(x2->GetInputSlot(0), ActivationDescriptor{}, "m10"); + auto x1 = graph.InsertNewLayer<ActivationLayer>(m10->GetInputSlot(0), + ActivationDescriptor{}, + "x1"); + auto m4 = graph.InsertNewLayer<ActivationLayer>(x1->GetInputSlot(0), + ActivationDescriptor{}, + "m4"); + + // Connect the other branch to the input layer + m0->GetOutputSlot(0).Connect(m4->GetInputSlot(0)); + + // All selected 'M*' layers will be of Activation type + SubgraphViewSelector::Subgraphs subgraphs = + SubgraphViewSelector::SelectSubgraphs( + graph, + // select the middle layers only + [](const Layer& l) + { + bool toSelect = std::string(l.GetName())[0] == 'm'; + return toSelect; + }); + + // expected results to test against + auto largerSubgraph = CreateSubgraphViewFrom(CreateInputsFrom({ m0 }), + CreateOutputsFrom({ m3, m4 }), + { m0, m1, m2, m3, m4 }); + + auto mediumSubgraph = CreateSubgraphViewFrom(std::vector<InputSlot*>{ &m5->GetInputSlot(0), &m6->GetInputSlot(0) }, + std::vector<OutputSlot*>{}, { m5, m6 }); + + auto smallerSubgraph = + CreateSubgraphViewFrom(CreateInputsFrom({ m10 }), CreateOutputsFrom({ m10 }), { m10 }); + + BOOST_TEST(subgraphs.size() == 3); + if (subgraphs.size() == 3) + { + // we need to have valid subgraph pointers here + BOOST_TEST((subgraphs[0] != nullptr)); + BOOST_TEST((subgraphs[1] != nullptr)); + BOOST_TEST((subgraphs[2] != nullptr)); + + if (subgraphs[0].get() != nullptr && subgraphs[1].get() != nullptr && subgraphs[2].get() != nullptr) + { + // sort the subgraphs by layer size, so it is simpler to test + std::sort(subgraphs.begin(), subgraphs.end(), + [](SubgraphViewSelector::SubgraphViewPtr& lhs, SubgraphViewSelector::SubgraphViewPtr& rhs) + { + return (lhs->GetLayers().size() < rhs->GetLayers().size()); + } + ); + + CompareSubgraphViews(subgraphs[0], smallerSubgraph); + CompareSubgraphViews(subgraphs[1], mediumSubgraph); + CompareSubgraphViews(subgraphs[2], largerSubgraph); + } + } +} + +BOOST_AUTO_TEST_CASE(Random) +{ + // Creates random networks, splits them into subgraphs and checks the resulting subgraphs obey the required + // dependency rules. We can easily generate very large networks which helps cover corner cases the other + // small, manually crafted tests have missed. We can also use this to measure performance on large networks. + constexpr bool debug = false; // Enable this to dump dot files and performance timings. + + std::mt19937 randomGenerator; + + // Helper function to get a random number in [0, maxExclusive) + auto GetRandom = [&randomGenerator](auto maxExclusive) { + // Note we could use uniform_int_distribution here, but that gives inconsistent results across platforms + // which makes it harder to reproduce results. + // It appears that uniform_real_distribution is consistent across MSVC and gcc so we use that and round it. + std::uniform_real_distribution<float> uniform(0.0f, 1.0f); + return static_cast<decltype(maxExclusive)>(uniform(randomGenerator) * static_cast<float>(maxExclusive)); + }; + // Helper function to get a bool that has probability 'trueProb' of being true. + auto GetRandomFlag = [&randomGenerator](float trueProb) { + std::uniform_real_distribution<float> uniform(0.0f, 1.0f); + return uniform(randomGenerator) < trueProb; + }; + + constexpr uint32_t numTests = 100; + for (uint32_t testIdx = 0; testIdx < numTests; ++testIdx) + { + randomGenerator.seed(testIdx); // Set a deterministic seed for reproducibility. + + // Create random graph + Graph graph; + { + // First add the layers, without any connections. The following random constants determine the number of + // each layer to add, along with the chance that each layer will be 'supported' (i.e. selected for + // inclusion in the resulting subgraphs). + uint32_t numInputs = 1 + GetRandom(4u); + uint32_t numConstants = 1 + GetRandom(4u); + uint32_t numOutputs = 1 + GetRandom(4u); + uint32_t numConcats = 0 + GetRandom(500u); + uint32_t numSplits = 0 + GetRandom(500u); + float supportedProb = 0.7f; + + for (uint32_t i = 0; i < numInputs; ++i) + { + std::string name = "input" + std::to_string(i) + (GetRandomFlag(supportedProb) ? "S" : "N"); + graph.AddLayer<InputLayer>(static_cast<LayerBindingId>(i), name.c_str()); + } + for (uint32_t i = 0; i < numConstants; ++i) + { + std::string name = "constant" + std::to_string(i) + (GetRandomFlag(supportedProb) ? "S" : "N"); + graph.AddLayer<ConstantLayer>(name.c_str()); + } + for (uint32_t i = 0; i < numOutputs; ++i) + { + std::string name = "output" + std::to_string(i) + (GetRandomFlag(supportedProb) ? "S" : "N"); + graph.AddLayer<OutputLayer>(static_cast<LayerBindingId>(i), name.c_str()); + } + for (uint32_t i = 0; i < numConcats; ++i) + { + std::string name = "concat" + std::to_string(i) + (GetRandomFlag(supportedProb) ? "S" : "N"); + uint32_t numInputs = 1 + GetRandom(3u); + OriginsDescriptor concatDesc(numInputs); + graph.AddLayer<ConcatLayer>(concatDesc, name.c_str()); + } + for (uint32_t i = 0; i < numSplits; ++i) + { + std::string name = "split" + std::to_string(i) + (GetRandomFlag(supportedProb) ? "S" : "N"); + uint32_t numOutputs = 1 + GetRandom(3u); + ViewsDescriptor splitDesc(numOutputs); + graph.AddLayer<SplitterLayer>(splitDesc, name.c_str()); + } + + // Associate each layer with a "depth" parameter. This is used when creating connections to ensure + // that we don't have any loops, by only connecting to layers with a lower "depth". + // This can be thought of as distance from the "top" of the graph (assuming the graph flows top-to-bottom). + // Unfortunately this approach ends up producing very "wide" graphs, + // which probably isn't very representative of 'real' networks. + uint32_t maxLayerDepth = 5 + GetRandom(2000u); + std::map<Layer*, uint32_t> layerDepths; + std::map<uint32_t, std::vector<Layer*>> layersAtDepth; + for (Layer* layer : graph) + { + uint32_t depth; + if (layer->GetType() == LayerType::Input || layer->GetType() == LayerType::Constant) + { + // There needs to be at least one input-like layer above everything else, otherwise would be + // nothing for them to connect to! + depth = 0; + } + else + { + // Other layers are randomly assigned to later depths. + depth = 1 + GetRandom(maxLayerDepth); + } + layerDepths[layer] = depth; + layersAtDepth[depth].push_back(layer); + } + + // Connect layers to each other. Every input slot of every layer must be connected, but it doesn't + // matter if an output slot goes unused. + for (Layer* layer : graph) + { + for (uint32_t inputSlotIdx = 0; inputSlotIdx < layer->GetNumInputSlots(); ++inputSlotIdx) + { + InputSlot& inputSlot = layer->GetInputSlot(inputSlotIdx); + uint32_t maxLayerDepthToConnectTo = layerDepths[layer]; // This prevents a connection causing a loop + // Finding a layer to connect to may take multiple attempts, so keep trying until it works. + while (inputSlot.GetConnectedOutputSlot() == nullptr) + { + uint32_t layerDepth = GetRandom(maxLayerDepthToConnectTo); + const std::vector<Layer*>& layersToChooseFrom = layersAtDepth[layerDepth]; + if (layersToChooseFrom.size() == 0) + { + continue; + } + Layer* layerToConnectWith = layersToChooseFrom[GetRandom(layersToChooseFrom.size())]; + if (layerToConnectWith->GetNumOutputSlots() == 0) + { + continue; + } + uint32_t outputSlotIdx = GetRandom(layerToConnectWith->GetNumOutputSlots()); + layerToConnectWith->GetOutputSlot(outputSlotIdx).Connect(inputSlot); + } + } + } + } + + if (debug) + { + std::ofstream f("INPUT_" + std::to_string(testIdx) + ".dot"); + graph.SerializeToDot(f); + } + + // Run the splitting algorithm, selecting all nodes ending in an 'S' (as randomly assigned above). + auto startTime = std::chrono::high_resolution_clock::now(); + + SubgraphViewSelector::Subgraphs subgraphs = + SubgraphViewSelector::SelectSubgraphs(graph, + [](const Layer& l) { return std::string(l.GetName()).back() == 'S'; }); + + auto endTime = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast<std::chrono::microseconds>(endTime - startTime); + if (debug) + { + std::cout << "Test " << testIdx << ": " << duration.count() << " microseconds" << std::endl; + } + + // Build a map of which subgraph is assigned to each layer. + // This helps some of the following code. + std::map<Layer*, SubgraphView*> layerToSubgraph; + for (Layer* layer : graph) + { + size_t i = 0; + for (std::unique_ptr<SubgraphView>& subgraph : subgraphs) + { + std::string name = std::to_string(i++); + if (std::find(subgraph->begin(), subgraph->end(), layer) != subgraph->end()) + { + layerToSubgraph[layer] = subgraph.get(); + break; + } + } + } + + if (debug) + { + // Before dumping the dot file, set each Layer's BackendId property so that the dot file + // shows the resulting subgraph assignments. + for (Layer* layer : graph) + { + std::string name = "NotAssigned"; + auto subgraphIt = layerToSubgraph.find(layer); + if (subgraphIt != layerToSubgraph.end()) + { + auto subgraphIdx = std::distance(subgraphs.begin(), + std::find_if(subgraphs.begin(), subgraphs.end(), + [&](auto& s) { return s.get() == subgraphIt->second; })); + name = std::to_string(subgraphIdx); + } + layer->SetBackendId(armnn::BackendId(name)); + } + + std::ofstream f("GRAPH_" + std::to_string(testIdx) + ".dot"); + graph.SerializeToDot(f); + } + + // Check the dependencies between subgraphs to make sure that the algorithm has produced a valid result. + // Starting from each of the input slots of each subgraph, recurse up the graph and ensure that we never + // encounter a layer that belongs to the subgraph that we started from. + for (std::unique_ptr<SubgraphView>& subgraph : subgraphs) + { + for (InputSlot* inputSlot : subgraph->GetInputSlots()) + { + std::queue<Layer*> toProcess; + toProcess.push(&inputSlot->GetConnectedOutputSlot()->GetOwningLayer()); + while (toProcess.size() > 0) + { + Layer* l = toProcess.front(); + toProcess.pop(); + + BOOST_CHECK(layerToSubgraph[l] != subgraph.get()); + + for (const InputSlot& is : l->GetInputSlots()) + { + toProcess.push(&is.GetConnectedOutputSlot()->GetOwningLayer()); + } + } + } + } + } +} + BOOST_AUTO_TEST_SUITE_END() BOOST_AUTO_TEST_SUITE(IntegrationTests) diff --git a/src/backends/backendsCommon/test/OptimizeSubgraphViewTests.cpp b/src/backends/backendsCommon/test/OptimizeSubgraphViewTests.cpp index 7cb5ded773..ca3c563757 100644 --- a/src/backends/backendsCommon/test/OptimizeSubgraphViewTests.cpp +++ b/src/backends/backendsCommon/test/OptimizeSubgraphViewTests.cpp @@ -877,8 +877,13 @@ void PartiallySupportedSubgraphTestImpl() // Check the substitutions // ----------------------- - const OptimizationViews::Substitutions& substitutions = optimizationViews.GetSubstitutions(); + OptimizationViews::Substitutions substitutions = optimizationViews.GetSubstitutions(); BOOST_TEST(substitutions.size() == 2); + // Sort into a consistent order + std::sort(substitutions.begin(), substitutions.end(), [](auto s1, auto s2) { + return strcmp(s1.m_SubstitutableSubgraph.GetLayers().front()->GetName(), + s2.m_SubstitutableSubgraph.GetLayers().front()->GetName()) < 0; + }); std::vector<ExpectedSubgraphSize> expectedSubstitutableSubgraphSizes{ { 1, 1, 1 }, { 1, 1, 1 } }; @@ -914,8 +919,12 @@ void PartiallySupportedSubgraphTestImpl() // Check the failed subgraphs // -------------------------- - const OptimizationViews::Subgraphs& failedSubgraphs = optimizationViews.GetFailedSubgraphs(); + OptimizationViews::Subgraphs failedSubgraphs = optimizationViews.GetFailedSubgraphs(); BOOST_TEST(failedSubgraphs.size() == 2); + // Sort into a consistent order + std::sort(failedSubgraphs.begin(), failedSubgraphs.end(), [](auto s1, auto s2) { + return strcmp(s1.GetLayers().front()->GetName(), s2.GetLayers().front()->GetName()) < 0; + }); std::vector<ExpectedSubgraphSize> expectedFailedSubgraphSizes{ { 1, 1, 2 }, { 1, 1, 1 } }; @@ -1060,8 +1069,12 @@ void PartiallyOptimizableSubgraphTestImpl1() // Check the substitutions // ----------------------- - const OptimizationViews::Substitutions& substitutions = optimizationViews.GetSubstitutions(); + OptimizationViews::Substitutions substitutions = optimizationViews.GetSubstitutions(); BOOST_TEST(substitutions.size() == 3); + // Sort into a consistent order + std::sort(substitutions.begin(), substitutions.end(), + [](auto s1, auto s2) { return strcmp(s1.m_SubstitutableSubgraph.GetLayers().front()->GetName(), + s2.m_SubstitutableSubgraph.GetLayers().front()->GetName()) < 0; }); std::vector<ExpectedSubgraphSize> expectedSubstitutableSubgraphSizes{ { 1, 1, 1 }, { 1, 1, 1 }, @@ -1108,8 +1121,12 @@ void PartiallyOptimizableSubgraphTestImpl1() // Check the untouched subgraphs // ----------------------------- - const OptimizationViews::Subgraphs& untouchedSubgraphs = optimizationViews.GetUntouchedSubgraphs(); + OptimizationViews::Subgraphs untouchedSubgraphs = optimizationViews.GetUntouchedSubgraphs(); BOOST_TEST(untouchedSubgraphs.size() == 2); + // Sort into a consistent order + std::sort(untouchedSubgraphs.begin(), untouchedSubgraphs.end(), [](auto s1, auto s2) { + return strcmp(s1.GetLayers().front()->GetName(), s2.GetLayers().front()->GetName()) < 0; + }); std::vector<ExpectedSubgraphSize> expectedUntouchedSubgraphSizes{ { 1, 1, 1 }, { 1, 1, 1 } }; @@ -1146,7 +1163,7 @@ void PartiallyOptimizableSubgraphTestImpl2() Graph graph; LayerNameToLayerMap layersInGraph; - // Create a fully optimizable subgraph + // Create a partially optimizable subgraph SubgraphViewSelector::SubgraphViewPtr subgraphPtr = BuildPartiallyOptimizableSubgraph2(graph, layersInGraph); BOOST_TEST((subgraphPtr != nullptr)); @@ -1185,44 +1202,32 @@ void PartiallyOptimizableSubgraphTestImpl2() // ----------------------- const OptimizationViews::Substitutions& substitutions = optimizationViews.GetSubstitutions(); - BOOST_TEST(substitutions.size() == 2); - - std::vector<ExpectedSubgraphSize> expectedSubstitutableSubgraphSizes{ { 1, 1, 1 }, - { 2, 1, 2 } }; - std::vector<ExpectedSubgraphSize> expectedReplacementSubgraphSizes{ { 1, 1, 1 }, - { 2, 1, 1 } }; + BOOST_TEST(substitutions.size() == 1); - SubgraphView::InputSlots expectedSubstitutableSubgraph2InputSlots = - ConvertReferenceTypeToPointerType(layersInGraph.at("conv3 layer")->GetInputSlots()); - expectedSubstitutableSubgraph2InputSlots.push_back( - ConvertReferenceTypeToPointerType(layersInGraph.at("add layer")->GetInputSlot(0))); + ExpectedSubgraphSize expectedSubstitutableSubgraphSizes{ 2, 1, 3 }; + ExpectedSubgraphSize expectedReplacementSubgraphSizes{ 2, 1, 1 }; - std::vector<SubgraphView::InputSlots> expectedSubstitutableInputSlots - { - ConvertReferenceTypeToPointerType(layersInGraph.at("conv1 layer")->GetInputSlots()), - expectedSubstitutableSubgraph2InputSlots + SubgraphView::InputSlots expectedSubstitutableInputSlots = { + ConvertReferenceTypeToPointerType(layersInGraph.at("conv1 layer")->GetInputSlots()[0]), + ConvertReferenceTypeToPointerType(layersInGraph.at("conv3 layer")->GetInputSlots()[0]) }; - std::vector<SubgraphView::OutputSlots> expectedSubstitutableOutputSlots + SubgraphView::OutputSlots expectedSubstitutableOutputSlots = { - ConvertReferenceTypeToPointerType(layersInGraph.at("conv1 layer")->GetOutputSlots()), - ConvertReferenceTypeToPointerType(layersInGraph.at("add layer")->GetOutputSlots()) + ConvertReferenceTypeToPointerType(layersInGraph.at("add layer")->GetOutputSlots()[0]) }; - std::vector<SubgraphView::Layers> expectedSubstitutableLayers + SubgraphView::Layers expectedSubstitutableLayers { - { layersInGraph.at("conv1 layer") }, - { layersInGraph.at("conv3 layer"), - layersInGraph.at("add layer") } + layersInGraph.at("conv1 layer"), + layersInGraph.at("conv3 layer"), + layersInGraph.at("add layer") }; - for (size_t substitutionIndex = 0; substitutionIndex < substitutions.size(); substitutionIndex++) - { - CheckSubstitution(substitutions.at(substitutionIndex), - expectedSubstitutableSubgraphSizes.at(substitutionIndex), - expectedReplacementSubgraphSizes.at(substitutionIndex), - expectedSubstitutableInputSlots.at(substitutionIndex), - expectedSubstitutableOutputSlots.at(substitutionIndex), - expectedSubstitutableLayers.at(substitutionIndex)); - } + CheckSubstitution(substitutions[0], + expectedSubstitutableSubgraphSizes, + expectedReplacementSubgraphSizes, + expectedSubstitutableInputSlots, + expectedSubstitutableOutputSlots, + expectedSubstitutableLayers); // -------------------------- // Check the failed subgraphs |