18 #include <fmt/format.h>
20 #include <unordered_map>
28 : m_LayersInOrder(other.m_LayersInOrder)
29 , m_AllowExpandedDims(other.m_AllowExpandedDims)
30 , m_ShapeInferenceMethod(other.m_ShapeInferenceMethod)
31 , m_Profiler(other.m_Profiler)
33 std::unordered_map<const Layer*, Layer*> otherToClonedMap;
35 for (
auto&& otherLayer : other.m_Layers)
38 otherToClonedMap.emplace(otherLayer, layer);
42 for (
auto&& otherLayer : other.m_Layers)
44 Layer*
const thisLayer = otherToClonedMap[otherLayer];
47 for (
auto&& otherOutputSlot : otherLayer->GetOutputSlots())
49 for (
auto&& otherInputSlot : otherOutputSlot.GetConnections())
51 const Layer& otherTgtLayer = otherInputSlot->GetOwningLayer();
52 Layer*
const thisTgtLayer = otherToClonedMap[&otherTgtLayer];
56 if (otherInputSlot->IsTensorInfoOverridden())
60 outputSlot->Connect(inputSlot);
62 outputSlot->SetTensorInfo(otherOutputSlot.GetTensorInfo());
80 auto numInputSlots = it->GetNumInputSlots();
81 auto numOutputSlots = it->GetNumOutputSlots();
84 <<
":" << it->GetBackendId().Get()
85 <<
" has " << numInputSlots <<
" input slots"
86 <<
" and " << numOutputSlots <<
" output slots.";
88 for (
auto i : it->GetInputSlots())
90 std::ostringstream message;
91 auto inputTensorShape = i.GetConnectedOutputSlot()->GetTensorInfo().GetShape();
92 unsigned int numDims = inputTensorShape.GetNumDimensions();
94 message <<
"The input slot has shape [ ";
95 for (
unsigned int dim=0; dim < numDims; dim++)
97 message << inputTensorShape[dim] <<
",";
103 for (
unsigned int i = 0; i < it->GetNumOutputSlots(); i++)
106 std::ostringstream message;
107 auto outputTensorShape = layer->
GetOutputSlots()[i].GetTensorInfo().GetShape();
108 unsigned int numDims = outputTensorShape.GetNumDimensions();
110 message <<
"The output slot has shape [ ";
111 for (
unsigned int dim=0; dim < numDims; dim++)
113 message << outputTensorShape[dim] <<
",";
128 DotGraph graph(stream,
"Optimized");
147 for (
auto&& layer : m_Layers)
154 layer->SerializeLayerParameters(extractParams);
158 for (
auto&& layer : m_Layers)
162 for (
unsigned int i=0;i<layer->GetNumInputSlots(); i++)
166 DotEdge edge(stream, fromId, toId);
171 std::stringstream ss;
192 std::unordered_set<const ITensorHandle*> preallocatedTensors;
193 std::unordered_map<const ITensorHandle*, unsigned int> handleReferenceCounts;
197 auto TraceSubTensorHandleAncestry = [](
ITensorHandle*
const subTensorHandle)
200 while (ancestor && ancestor->
GetParent())
210 return tensorHandle && preallocatedTensors.find(tensorHandle) != preallocatedTensors.end();
215 for (
auto&& layer : m_Layers)
219 for (
auto&& slot = layer->BeginOutputSlots(); slot != layer->EndOutputSlots(); ++slot)
221 ITensorHandle *tensorHandle = TraceSubTensorHandleAncestry(slot->GetOutputHandler().GetData());
223 if (tensorHandle && !IsPreallocated(tensorHandle))
226 preallocatedTensors.insert(tensorHandle);
233 for (
auto&& layer : m_Layers)
237 for (
auto&& slot = layer->BeginOutputSlots(); slot != layer->EndOutputSlots(); ++slot)
239 ITensorHandle *tensorHandle = TraceSubTensorHandleAncestry(slot->GetOutputHandler().GetData());
241 if (tensorHandle && !IsPreallocated(tensorHandle))
243 unsigned int numConnections = slot->GetNumConnections();
244 if (handleReferenceCounts.find(tensorHandle) == handleReferenceCounts.end())
246 handleReferenceCounts[tensorHandle] = numConnections;
248 if (handleReferenceCounts[tensorHandle] == 0u)
256 handleReferenceCounts[tensorHandle] += numConnections;
263 for (
auto&& slot = layer->BeginInputSlots(); slot != layer->EndInputSlots(); ++slot)
266 slot->GetConnectedOutputSlot()->GetOutputHandler().GetData());
268 if (tensorHandle && !IsPreallocated(tensorHandle))
270 --handleReferenceCounts[tensorHandle];
272 if (handleReferenceCounts[tensorHandle] == 0u)
276 handleReferenceCounts.erase(tensorHandle);
287 if (!m_LayersInOrder)
290 for (
auto&& it : m_Layers)
295 auto compareLayerPriority = [](
const LayerList::value_type& layerA,
const LayerList::value_type& layerB)
297 return layerA->GetPriority() < layerB->GetPriority();
300 m_Layers.sort(compareLayerPriority);
302 m_LayersInOrder =
true;
313 auto MayNeedCompatibilityLayer = [](
const Layer& layer)
322 auto IsCompatibilityStrategy = [](
EdgeStrategy strategy)
328 ForEachLayer([
this, &backends, ®istry, MayNeedCompatibilityLayer, IsCompatibilityStrategy](
Layer* srcLayer)
332 if (!MayNeedCompatibilityLayer(*srcLayer))
338 const std::vector<OutputSlot>& srcOutputSlots = srcLayer->
GetOutputSlots();
339 for (
unsigned int srcOutputIndex = 0; srcOutputIndex < srcOutputSlots.size(); srcOutputIndex++)
342 const std::vector<InputSlot*> srcConnections = srcOutputSlot.
GetConnections();
343 const std::vector<EdgeStrategy> srcEdgeStrategies = srcOutputSlot.
GetEdgeStrategies();
344 for (
unsigned int srcConnectionIndex = 0; srcConnectionIndex < srcConnections.size(); srcConnectionIndex++)
346 InputSlot* dstInputSlot = srcConnections[srcConnectionIndex];
349 EdgeStrategy strategy = srcEdgeStrategies[srcConnectionIndex];
351 "Undefined memory strategy found while adding copy layers for compatibility");
354 if (MayNeedCompatibilityLayer(dstLayer) &&
355 IsCompatibilityStrategy(strategy))
360 const std::string compLayerName = fmt::format(
"[ {} ({}) -> {} ({}) ]",
365 Layer* compLayer =
nullptr;
368 compLayer = InsertNewLayer<MemCopyLayer>(*dstInputSlot, compLayerName.c_str());
373 compLayer = InsertNewLayer<MemImportLayer>(*dstInputSlot, compLayerName.c_str());
379 auto backendIt = backends.find(dstLayer.
GetBackendId());
380 if (backendIt != backends.end() &&
382 backendIt->second->SupportsTensorAllocatorAPI())
384 auto backend = backendIt->second.get();
385 auto tensorHandleFactoryIds = backend->GetHandleFactoryPreferences();
388 for (
auto preference : tensorHandleFactoryIds)
390 auto factory = registry.
GetFactory(preference);
394 auto srcFactory = registry.
GetFactory(srcPref);
398 bool canExportImport =
399 (factory->GetImportFlags() & srcFactory->GetExportFlags()) != 0;
401 if (factory->SupportsMapUnmap() || canExportImport)
425 const std::vector<InputSlot*>& newSourceConnections = srcOutputSlot.
GetConnections();
426 auto newSrcConnectionIndex = std::distance(newSourceConnections.begin(),
427 std::find(newSourceConnections.begin(),
428 newSourceConnections.end(),
432 srcOutputSlot.
SetEdgeStrategy(armnn::numeric_cast<unsigned int>(newSrcConnectionIndex),
456 if (std::find(std::begin(m_Layers),
458 iConnectableLayer) == std::end(m_Layers))
460 auto layer = PolymorphicDowncast<Layer*>(iConnectableLayer);
461 layer->Reparent(*
this, m_Layers.end());
462 m_LayersInOrder =
false;
466 ReplaceSubgraphConnections(subgraph, substituteSubgraph);
467 EraseSubgraphLayers(subgraph);
474 "New sub-graph used for substitution must not be empty");
477 std::for_each(substituteSubgraphLayers.begin(), substituteSubgraphLayers.end(), [&](
IConnectableLayer* layer)
480 layer = PolymorphicDowncast<Layer*>(layer);
481 ARMNN_ASSERT_MSG(std::find(m_Layers.begin(), m_Layers.end(), layer) != m_Layers.end(),
482 "Substitute layer is not a member of graph");
488 unsigned int subgraphNumInputSlots = armnn::numeric_cast<unsigned int>(subgraphInputSlots.size());
489 unsigned int subgraphNumOutputSlots = armnn::numeric_cast<unsigned int>(subgraphOutputSlots.size());
494 ARMNN_ASSERT(subgraphNumInputSlots == substituteSubgraphInputSlots.size());
495 ARMNN_ASSERT(subgraphNumOutputSlots == substituteSubgraphOutputSlots.size());
500 for (
unsigned int inputSlotIdx = 0; inputSlotIdx < subgraphNumInputSlots; ++inputSlotIdx)
502 IInputSlot* subgraphInputSlot = subgraphInputSlots.at(inputSlotIdx);
511 InputSlot* inputSlot = PolymorphicDowncast<InputSlot*>(subgraphInputSlot);
515 connectedOutputSlot->
Disconnect(*subgraphInputSlot);
517 IInputSlot* substituteInputSlot = substituteSubgraphInputSlots.at(inputSlotIdx);
519 connectedOutputSlot->
Connect(*substituteInputSlot);
524 InputSlot* newInputSlot = PolymorphicDowncast<InputSlot*>(substituteInputSlot);
531 for(
unsigned int outputSlotIdx = 0; outputSlotIdx < subgraphNumOutputSlots; ++outputSlotIdx)
533 auto subgraphOutputSlot =
534 PolymorphicDowncast<OutputSlot*>(subgraphOutputSlots.at(outputSlotIdx));
537 auto substituteOutputSlot =
538 PolymorphicDowncast<OutputSlot*>(substituteSubgraphOutputSlots.at(outputSlotIdx));
541 subgraphOutputSlot->MoveAllConnections(*substituteOutputSlot);
545 void Graph::EraseSubgraphLayers(SubgraphView &subgraph)
548 for (
auto iConnectableLayer : subgraph.GetIConnectableLayers())
550 auto layer = PolymorphicDowncast<Layer*>(iConnectableLayer);
566 for (
auto&& output: layer->GetOutputSlots())
568 if (!output.IsTensorInfoSet())
570 std::ostringstream message;
571 message <<
"Output slot TensorInfo not set on "
587 for (
auto&& input : layer->GetInputSlots())
589 const IOutputSlot* source = input.GetConnectedOutputSlot();
594 ConstructErrorMessageForUnconnectedInputs(layer, input.GetSlotIndex());
599 std::ostringstream message;
600 message <<
"Output slot TensorInfo not set on "
603 << std::quoted(layer->GetName());
610 layer->ValidateTensorShapesFromInputs();
622 void Graph::ConstructErrorMessageForUnconnectedInputs(
Layer*
const layer,
623 unsigned int slotIndex)
625 std::ostringstream message;
626 bool noWeightsAndBias =
false;
633 message << std::endl;
639 if (biasSource == NULL)
641 message <<
"Weights and bias layers not set." << std::endl;
642 noWeightsAndBias =
true;
647 if (!noWeightsAndBias)
651 message <<
"Weights layer not set." << std::endl;
655 message <<
"Bias layer not set." << std::endl;
660 std::string slotString = noWeightsAndBias ?
"1 & 2" : std::to_string(slotIndex);
661 message <<
"Input slot(s) "
665 <<
" not connected to an output slot. " << std::endl
667 << std::quoted(layer->
GetName());
668 throw LayerValidationException(message.str());
678 m_LayersInOrder =
false;