18 #include <fmt/format.h>
20 #include <unordered_map>
28 : m_LayersInOrder(other.m_LayersInOrder)
29 , m_AllowExpandedDims(other.m_AllowExpandedDims)
30 , m_ShapeInferenceMethod(other.m_ShapeInferenceMethod)
31 , m_Profiler(other.m_Profiler)
33 std::unordered_map<const Layer*, Layer*> otherToClonedMap;
35 for (
auto&& otherLayer : other.m_Layers)
38 otherToClonedMap.emplace(otherLayer, layer);
42 for (
auto&& otherLayer : other.m_Layers)
44 Layer*
const thisLayer = otherToClonedMap[otherLayer];
47 for (
auto&& otherOutputSlot : otherLayer->GetOutputSlots())
49 for (
auto&& otherInputSlot : otherOutputSlot.GetConnections())
51 const Layer& otherTgtLayer = otherInputSlot->GetOwningLayer();
52 Layer*
const thisTgtLayer = otherToClonedMap[&otherTgtLayer];
55 outputSlot->Connect(inputSlot);
57 outputSlot->SetTensorInfo(otherOutputSlot.GetTensorInfo());
75 auto numInputSlots = it->GetNumInputSlots();
76 auto numOutputSlots = it->GetNumOutputSlots();
79 <<
":" << it->GetBackendId().Get()
80 <<
" has " << numInputSlots <<
" input slots"
81 <<
" and " << numOutputSlots <<
" output slots.";
83 for (
auto i : it->GetInputSlots())
85 std::ostringstream message;
86 auto inputTensorShape = i.GetConnectedOutputSlot()->GetTensorInfo().GetShape();
87 unsigned int numDims = inputTensorShape.GetNumDimensions();
89 message <<
"The input slot has shape [ ";
90 for (
unsigned int dim=0; dim < numDims; dim++)
92 message << inputTensorShape[dim] <<
",";
98 for (
unsigned int i = 0; i < it->GetNumOutputSlots(); i++)
101 std::ostringstream message;
102 auto outputTensorShape = layer->
GetOutputSlots()[i].GetTensorInfo().GetShape();
103 unsigned int numDims = outputTensorShape.GetNumDimensions();
105 message <<
"The output slot has shape [ ";
106 for (
unsigned int dim=0; dim < numDims; dim++)
108 message << outputTensorShape[dim] <<
",";
123 DotGraph graph(stream,
"Optimized");
142 for (
auto&& layer : m_Layers)
149 layer->SerializeLayerParameters(extractParams);
153 for (
auto&& layer : m_Layers)
157 for (
unsigned int i=0;i<layer->GetNumInputSlots(); i++)
161 DotEdge edge(stream, fromId, toId);
166 std::stringstream ss;
187 std::unordered_set<const ITensorHandle*> preallocatedTensors;
188 std::unordered_map<const ITensorHandle*, unsigned int> handleReferenceCounts;
192 auto TraceSubTensorHandleAncestry = [](
ITensorHandle*
const subTensorHandle)
195 while (ancestor && ancestor->
GetParent())
205 return tensorHandle && preallocatedTensors.find(tensorHandle) != preallocatedTensors.end();
210 for (
auto&& layer : m_Layers)
214 for (
auto&& slot = layer->BeginOutputSlots(); slot != layer->EndOutputSlots(); ++slot)
216 ITensorHandle *tensorHandle = TraceSubTensorHandleAncestry(slot->GetOutputHandler().GetData());
218 if (tensorHandle && !IsPreallocated(tensorHandle))
221 preallocatedTensors.insert(tensorHandle);
228 for (
auto&& layer : m_Layers)
232 for (
auto&& slot = layer->BeginOutputSlots(); slot != layer->EndOutputSlots(); ++slot)
234 ITensorHandle *tensorHandle = TraceSubTensorHandleAncestry(slot->GetOutputHandler().GetData());
236 if (tensorHandle && !IsPreallocated(tensorHandle))
238 unsigned int numConnections = slot->GetNumConnections();
239 if (handleReferenceCounts.find(tensorHandle) == handleReferenceCounts.end())
241 handleReferenceCounts[tensorHandle] = numConnections;
243 if (handleReferenceCounts[tensorHandle] == 0u)
251 handleReferenceCounts[tensorHandle] += numConnections;
258 for (
auto&& slot = layer->BeginInputSlots(); slot != layer->EndInputSlots(); ++slot)
261 slot->GetConnectedOutputSlot()->GetOutputHandler().GetData());
263 if (tensorHandle && !IsPreallocated(tensorHandle))
265 --handleReferenceCounts[tensorHandle];
267 if (handleReferenceCounts[tensorHandle] == 0u)
271 handleReferenceCounts.erase(tensorHandle);
282 if (!m_LayersInOrder)
285 for (
auto&& it : m_Layers)
290 auto compareLayerPriority = [](
const LayerList::value_type& layerA,
const LayerList::value_type& layerB)
292 return layerA->GetPriority() < layerB->GetPriority();
295 m_Layers.sort(compareLayerPriority);
297 m_LayersInOrder =
true;
308 auto MayNeedCompatibilityLayer = [](
const Layer& layer)
317 auto IsCompatibilityStrategy = [](
EdgeStrategy strategy)
323 ForEachLayer([
this, &backends, ®istry, MayNeedCompatibilityLayer, IsCompatibilityStrategy](
Layer* srcLayer)
327 if (!MayNeedCompatibilityLayer(*srcLayer))
333 const std::vector<OutputSlot>& srcOutputSlots = srcLayer->
GetOutputSlots();
334 for (
unsigned int srcOutputIndex = 0; srcOutputIndex < srcOutputSlots.size(); srcOutputIndex++)
337 const std::vector<InputSlot*> srcConnections = srcOutputSlot.
GetConnections();
338 const std::vector<EdgeStrategy> srcEdgeStrategies = srcOutputSlot.
GetEdgeStrategies();
339 for (
unsigned int srcConnectionIndex = 0; srcConnectionIndex < srcConnections.size(); srcConnectionIndex++)
341 InputSlot* dstInputSlot = srcConnections[srcConnectionIndex];
344 EdgeStrategy strategy = srcEdgeStrategies[srcConnectionIndex];
346 "Undefined memory strategy found while adding copy layers for compatibility");
349 if (MayNeedCompatibilityLayer(dstLayer) &&
350 IsCompatibilityStrategy(strategy))
355 const std::string compLayerName = fmt::format(
"[ {} ({}) -> {} ({}) ]",
360 Layer* compLayer =
nullptr;
363 compLayer = InsertNewLayer<MemCopyLayer>(*dstInputSlot, compLayerName.c_str());
368 compLayer = InsertNewLayer<MemImportLayer>(*dstInputSlot, compLayerName.c_str());
374 auto backendIt = backends.find(dstLayer.
GetBackendId());
375 if (backendIt != backends.end() &&
377 backendIt->second->SupportsTensorAllocatorAPI())
379 auto backend = backendIt->second.get();
380 auto tensorHandleFactoryIds = backend->GetHandleFactoryPreferences();
383 for (
auto preference : tensorHandleFactoryIds)
385 auto factory = registry.
GetFactory(preference);
389 auto srcFactory = registry.
GetFactory(srcPref);
393 bool canExportImport =
394 (factory->GetImportFlags() & srcFactory->GetExportFlags()) != 0;
396 if (factory->SupportsMapUnmap() || canExportImport)
420 const std::vector<InputSlot*>& newSourceConnections = srcOutputSlot.
GetConnections();
421 auto newSrcConnectionIndex = std::distance(newSourceConnections.begin(),
422 std::find(newSourceConnections.begin(),
423 newSourceConnections.end(),
427 srcOutputSlot.
SetEdgeStrategy(armnn::numeric_cast<unsigned int>(newSrcConnectionIndex),
451 if (std::find(std::begin(m_Layers),
453 iConnectableLayer) == std::end(m_Layers))
455 auto layer = PolymorphicDowncast<Layer*>(iConnectableLayer);
456 layer->Reparent(*
this, m_Layers.end());
457 m_LayersInOrder =
false;
461 ReplaceSubgraphConnections(subgraph, substituteSubgraph);
462 EraseSubgraphLayers(subgraph);
469 "New sub-graph used for substitution must not be empty");
472 std::for_each(substituteSubgraphLayers.begin(), substituteSubgraphLayers.end(), [&](
IConnectableLayer* layer)
475 layer = PolymorphicDowncast<Layer*>(layer);
476 ARMNN_ASSERT_MSG(std::find(m_Layers.begin(), m_Layers.end(), layer) != m_Layers.end(),
477 "Substitute layer is not a member of graph");
483 unsigned int subgraphNumInputSlots = armnn::numeric_cast<unsigned int>(subgraphInputSlots.size());
484 unsigned int subgraphNumOutputSlots = armnn::numeric_cast<unsigned int>(subgraphOutputSlots.size());
489 ARMNN_ASSERT(subgraphNumInputSlots == substituteSubgraphInputSlots.size());
490 ARMNN_ASSERT(subgraphNumOutputSlots == substituteSubgraphOutputSlots.size());
495 for (
unsigned int inputSlotIdx = 0; inputSlotIdx < subgraphNumInputSlots; ++inputSlotIdx)
497 IInputSlot* subgraphInputSlot = subgraphInputSlots.at(inputSlotIdx);
507 connectedOutputSlot->
Disconnect(*subgraphInputSlot);
509 IInputSlot* substituteInputSlot = substituteSubgraphInputSlots.at(inputSlotIdx);
511 connectedOutputSlot->
Connect(*substituteInputSlot);
516 for(
unsigned int outputSlotIdx = 0; outputSlotIdx < subgraphNumOutputSlots; ++outputSlotIdx)
518 auto subgraphOutputSlot =
519 PolymorphicDowncast<OutputSlot*>(subgraphOutputSlots.at(outputSlotIdx));
522 auto substituteOutputSlot =
523 PolymorphicDowncast<OutputSlot*>(substituteSubgraphOutputSlots.at(outputSlotIdx));
526 subgraphOutputSlot->MoveAllConnections(*substituteOutputSlot);
530 void Graph::EraseSubgraphLayers(SubgraphView &subgraph)
533 for (
auto iConnectableLayer : subgraph.GetIConnectableLayers())
535 auto layer = PolymorphicDowncast<Layer*>(iConnectableLayer);
551 for (
auto&& output: layer->GetOutputSlots())
553 if (!output.IsTensorInfoSet())
555 std::ostringstream message;
556 message <<
"Output slot TensorInfo not set on "
572 for (
auto&& input : layer->GetInputSlots())
574 const IOutputSlot* source = input.GetConnectedOutputSlot();
579 ConstructErrorMessageForUnconnectedInputs(layer, input.GetSlotIndex());
584 std::ostringstream message;
585 message <<
"Output slot TensorInfo not set on "
588 << std::quoted(layer->GetName());
595 layer->ValidateTensorShapesFromInputs();
607 void Graph::ConstructErrorMessageForUnconnectedInputs(
Layer*
const layer,
608 unsigned int slotIndex)
610 std::ostringstream message;
611 bool noWeightsAndBias =
false;
618 message << std::endl;
624 if (biasSource == NULL)
626 message <<
"Weights and bias layers not set." << std::endl;
627 noWeightsAndBias =
true;
632 if (!noWeightsAndBias)
636 message <<
"Weights layer not set." << std::endl;
640 message <<
"Bias layer not set." << std::endl;
645 std::string slotString = noWeightsAndBias ?
"1 & 2" : std::to_string(slotIndex);
646 message <<
"Input slot(s) "
650 <<
" not connected to an output slot. " << std::endl
652 << std::quoted(layer->
GetName());
653 throw LayerValidationException(message.str());
663 m_LayersInOrder =
false;