18 #include <fmt/format.h> 20 #include <unordered_map> 28 : m_LayersInOrder(other.m_LayersInOrder)
29 , m_Profiler(other.m_Profiler)
31 std::unordered_map<const Layer*, Layer*> otherToClonedMap;
33 for (
auto&& otherLayer : other.m_Layers)
36 otherToClonedMap.emplace(otherLayer, layer);
40 for (
auto&& otherLayer : other.m_Layers)
42 Layer*
const thisLayer = otherToClonedMap[otherLayer];
45 for (
auto&& otherOutputSlot : otherLayer->GetOutputSlots())
47 for (
auto&& otherInputSlot : otherOutputSlot.GetConnections())
49 const Layer& otherTgtLayer = otherInputSlot->GetOwningLayer();
50 Layer*
const thisTgtLayer = otherToClonedMap[&otherTgtLayer];
53 outputSlot->Connect(inputSlot);
55 outputSlot->SetTensorInfo(otherOutputSlot.GetTensorInfo());
73 auto numInputSlots = it->GetNumInputSlots();
74 auto numOutputSlots = it->GetNumOutputSlots();
77 <<
":" << it->GetBackendId().Get()
78 <<
" has " << numInputSlots <<
" input slots" 79 <<
" and " << numOutputSlots <<
" output slots.";
81 for (
auto i : it->GetInputSlots())
83 std::ostringstream message;
84 auto inputTensorShape = i.GetConnectedOutputSlot()->GetTensorInfo().GetShape();
85 unsigned int numDims = inputTensorShape.GetNumDimensions();
87 message <<
"The input slot has shape [ ";
88 for (
unsigned int dim=0; dim < numDims; dim++)
90 message << inputTensorShape[dim] <<
",";
96 for (
unsigned int i = 0; i < it->GetNumOutputSlots(); i++)
99 std::ostringstream message;
100 auto outputTensorShape = layer->
GetOutputSlots()[i].GetTensorInfo().GetShape();
101 unsigned int numDims = outputTensorShape.GetNumDimensions();
103 message <<
"The output slot has shape [ ";
104 for (
unsigned int dim=0; dim < numDims; dim++)
106 message << outputTensorShape[dim] <<
",";
121 DotGraph graph(stream,
"Optimized");
140 for (
auto&& layer : m_Layers)
147 layer->SerializeLayerParameters(extractParams);
151 for (
auto&& layer : m_Layers)
155 for (
unsigned int i=0;i<layer->GetNumInputSlots(); i++)
159 DotEdge edge(stream, fromId, toId);
164 std::stringstream ss;
185 std::unordered_set<const ITensorHandle*> preallocatedTensors;
186 std::unordered_map<const ITensorHandle*, unsigned int> handleReferenceCounts;
190 auto TraceSubTensorHandleAncestry = [](
ITensorHandle*
const subTensorHandle)
193 while (ancestor && ancestor->
GetParent())
203 return tensorHandle && preallocatedTensors.find(tensorHandle) != preallocatedTensors.end();
208 for (
auto&& layer : m_Layers)
212 for (
auto&& slot = layer->BeginOutputSlots(); slot != layer->EndOutputSlots(); ++slot)
214 ITensorHandle *tensorHandle = TraceSubTensorHandleAncestry(slot->GetOutputHandler().GetData());
216 if (tensorHandle && !IsPreallocated(tensorHandle))
219 preallocatedTensors.insert(tensorHandle);
226 for (
auto&& layer : m_Layers)
230 for (
auto&& slot = layer->BeginOutputSlots(); slot != layer->EndOutputSlots(); ++slot)
232 ITensorHandle *tensorHandle = TraceSubTensorHandleAncestry(slot->GetOutputHandler().GetData());
234 if (tensorHandle && !IsPreallocated(tensorHandle))
236 unsigned int numConnections = slot->GetNumConnections();
237 if (handleReferenceCounts.find(tensorHandle) == handleReferenceCounts.end())
239 handleReferenceCounts[tensorHandle] = numConnections;
241 if (handleReferenceCounts[tensorHandle] == 0u)
249 handleReferenceCounts[tensorHandle] += numConnections;
256 for (
auto&& slot = layer->BeginInputSlots(); slot != layer->EndInputSlots(); ++slot)
259 slot->GetConnectedOutputSlot()->GetOutputHandler().GetData());
261 if (tensorHandle && !IsPreallocated(tensorHandle))
263 --handleReferenceCounts[tensorHandle];
265 if (handleReferenceCounts[tensorHandle] == 0u)
269 handleReferenceCounts.erase(tensorHandle);
280 if (!m_LayersInOrder)
283 for (
auto&& it : m_Layers)
288 auto compareLayerPriority = [](
const LayerList::value_type& layerA,
const LayerList::value_type& layerB)
290 return layerA->GetPriority() < layerB->GetPriority();
293 m_Layers.sort(compareLayerPriority);
295 m_LayersInOrder =
true;
306 auto MayNeedCompatibilityLayer = [](
const Layer& layer)
315 auto IsCompatibilityStrategy = [](
EdgeStrategy strategy)
321 ForEachLayer([
this, &backends, ®istry, MayNeedCompatibilityLayer, IsCompatibilityStrategy](
Layer* srcLayer)
325 if (!MayNeedCompatibilityLayer(*srcLayer))
331 const std::vector<OutputSlot>& srcOutputSlots = srcLayer->
GetOutputSlots();
332 for (
unsigned int srcOutputIndex = 0; srcOutputIndex < srcOutputSlots.size(); srcOutputIndex++)
335 const std::vector<InputSlot*> srcConnections = srcOutputSlot.
GetConnections();
336 const std::vector<EdgeStrategy> srcEdgeStrategies = srcOutputSlot.
GetEdgeStrategies();
337 for (
unsigned int srcConnectionIndex = 0; srcConnectionIndex < srcConnections.size(); srcConnectionIndex++)
339 InputSlot* dstInputSlot = srcConnections[srcConnectionIndex];
342 EdgeStrategy strategy = srcEdgeStrategies[srcConnectionIndex];
344 "Undefined memory strategy found while adding copy layers for compatibility");
347 if (MayNeedCompatibilityLayer(dstLayer) &&
348 IsCompatibilityStrategy(strategy))
353 const std::string compLayerName = fmt::format(
"[ {} ({}) -> {} ({}) ]",
358 Layer* compLayer =
nullptr;
361 compLayer = InsertNewLayer<MemCopyLayer>(*dstInputSlot, compLayerName.c_str());
366 compLayer = InsertNewLayer<MemImportLayer>(*dstInputSlot, compLayerName.c_str());
371 OutputSlot& compOutputSlot = compLayer->GetOutputSlot(0);
372 auto backendIt = backends.find(dstLayer.
GetBackendId());
373 if (backendIt != backends.end() &&
375 backendIt->second->SupportsTensorAllocatorAPI())
377 auto backend = backendIt->second.get();
378 auto tensorHandleFactoryIds = backend->GetHandleFactoryPreferences();
381 for (
auto preference : tensorHandleFactoryIds)
383 auto factory = registry.
GetFactory(preference);
387 auto srcFactory = registry.
GetFactory(srcPref);
391 bool canExportImport =
392 (factory->GetImportFlags() & srcFactory->GetExportFlags()) != 0;
394 if (factory->SupportsMapUnmap() || canExportImport)
418 const std::vector<InputSlot*>& newSourceConnections = srcOutputSlot.
GetConnections();
419 auto newSrcConnectionIndex = std::distance(newSourceConnections.begin(),
420 std::find(newSourceConnections.begin(),
421 newSourceConnections.end(),
422 &compLayer->GetInputSlot(0)));
425 srcOutputSlot.
SetEdgeStrategy(armnn::numeric_cast<unsigned int>(newSrcConnectionIndex),
449 if (std::find(std::begin(m_Layers),
451 iConnectableLayer) == std::end(m_Layers))
453 auto layer = PolymorphicDowncast<Layer*>(iConnectableLayer);
454 layer->Reparent(*
this, m_Layers.end());
455 m_LayersInOrder =
false;
459 ReplaceSubgraphConnections(subgraph, substituteSubgraph);
460 EraseSubgraphLayers(subgraph);
467 "New sub-graph used for substitution must not be empty");
470 std::for_each(substituteSubgraphLayers.begin(), substituteSubgraphLayers.end(), [&](
IConnectableLayer* layer)
473 layer = PolymorphicDowncast<Layer*>(layer);
474 ARMNN_ASSERT_MSG(std::find(m_Layers.begin(), m_Layers.end(), layer) != m_Layers.end(),
475 "Substitute layer is not a member of graph");
481 unsigned int subgraphNumInputSlots =
armnn::numeric_cast<
unsigned int>(subgraphInputSlots.size());
482 unsigned int subgraphNumOutputSlots =
armnn::numeric_cast<
unsigned int>(subgraphOutputSlots.size());
487 ARMNN_ASSERT(subgraphNumInputSlots == substituteSubgraphInputSlots.size());
488 ARMNN_ASSERT(subgraphNumOutputSlots == substituteSubgraphOutputSlots.size());
493 for (
unsigned int inputSlotIdx = 0; inputSlotIdx < subgraphNumInputSlots; ++inputSlotIdx)
495 IInputSlot* subgraphInputSlot = subgraphInputSlots.at(inputSlotIdx);
500 connectedOutputSlot->
Disconnect(*subgraphInputSlot);
502 IInputSlot* substituteInputSlot = substituteSubgraphInputSlots.at(inputSlotIdx);
504 connectedOutputSlot->
Connect(*substituteInputSlot);
508 for(
unsigned int outputSlotIdx = 0; outputSlotIdx < subgraphNumOutputSlots; ++outputSlotIdx)
510 auto subgraphOutputSlot =
511 PolymorphicDowncast<OutputSlot*>(subgraphOutputSlots.at(outputSlotIdx));
514 auto substituteOutputSlot =
515 PolymorphicDowncast<OutputSlot*>(substituteSubgraphOutputSlots.at(outputSlotIdx));
518 subgraphOutputSlot->MoveAllConnections(*substituteOutputSlot);
527 auto layer = PolymorphicDowncast<Layer*>(iConnectableLayer);
543 for (
auto&& output: layer->GetOutputSlots())
545 if (!output.IsTensorInfoSet())
547 std::ostringstream message;
548 message <<
"Output slot TensorInfo not set on " 564 for (
auto&& input : layer->GetInputSlots())
566 const IOutputSlot* source = input.GetConnectedOutputSlot();
571 ConstructErrorMessageForUnconnectedInputs(layer, input.GetSlotIndex());
576 std::ostringstream message;
577 message <<
"Output slot TensorInfo not set on " 580 << std::quoted(layer->GetName());
587 layer->ValidateTensorShapesFromInputs();
599 void Graph::ConstructErrorMessageForUnconnectedInputs(
Layer*
const layer,
600 unsigned int slotIndex)
602 std::ostringstream message;
603 bool noWeightsAndBias =
false;
612 if (biasSource == NULL)
614 message << layer->
GetName() <<
" layer weights and bias not set: ";
615 noWeightsAndBias =
true;
620 if (!noWeightsAndBias)
624 message << layer->
GetName() <<
" layer weights not set: ";
628 message << layer->
GetName() <<
" layer bias not set: ";
633 std::string slotString = noWeightsAndBias ?
"1 & 2" : std::to_string(slotIndex);
634 message <<
"Input slot(s) " 636 <<
" not connected to an output slot on " 639 << std::quoted(layer->
GetName());
Graph(bool shapeInferenceMethod=false)
const std::vector< InputSlot * > & GetConnections() const
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
NodeContent & AddContent(const std::string &content)
const IOutputSlots & GetIOutputSlots() const
void SetEdgeStrategy(unsigned int connectionIndex, EdgeStrategy strategy)
No strategy has been defined. Used internally to verify integrity of optimizations.
unsigned int GetNumInputSlots() const override
Returns the number of connectable input slots.
const TensorShape & GetShape() const
Status SerializeToDot(std::ostream &stream)
const IConnectableLayers & GetIConnectableLayers() const
const IInputSlots & GetIInputSlots() const
const std::vector< EdgeStrategy > & GetEdgeStrategies() const
Layer & GetOwningLayer() const
Source backends tensor data can be exported to destination backend tensor without copy...
void EraseLayer(Iterator pos)
Deletes the layer at the specified position.
DotAttributeSet & GetAttributeSet()
virtual void Allocate()=0
Indicate to the memory manager that this resource is no longer active.
virtual Layer * Clone(Graph &graph) const =0
Creates a dynamically-allocated copy of this layer.
#define ARMNN_LOG(severity)
virtual void Manage()=0
Indicate to the memory manager that this resource is active.
Copyright (c) 2021 ARM Limited and Contributors.
void IgnoreUnused(Ts &&...)
#define ARMNN_SCOPED_PROFILING_EVENT(backendId, name)
Destination backend can work directly with tensors on source backend.
The SubgraphView class represents a subgraph of a Graph.
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
void ForEachLayer(Func func) const
std::list< IConnectableLayer * > IConnectableLayers
#define ARMNN_ASSERT_MSG(COND, MSG)
DotAttributeSet & AddAttribute(const std::string &name, const std::stringstream &value)
NodeContent & GetContents()
An output connection slot for a layer.
std::vector< IOutputSlot * > IOutputSlots
virtual void Disconnect(IInputSlot &slot)=0
virtual ITensorHandle * GetParent() const =0
Get the parent tensor if this is a subtensor.
Validate all output shapes.
void ForEachIConnectableLayer(Func func) const
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
std::vector< IInputSlot * > IInputSlots
#define ARMNN_ASSERT(COND)
const BackendId & GetBackendId() const
const std::vector< OutputSlot > & GetOutputSlots() const
void VerifyConstantLayerSetTensorInfo() const
For each ConstantLayer in Graph, ensures TensorInfo is set on all output slots.
void SubstituteSubgraph(SubgraphView &subgraph, IConnectableLayer *substituteLayer)
Substitutes the given sub-graph with either a new layer or a new sub-graph.
const std::shared_ptr< IProfiler > & GetProfiler() const
void SetTensorHandleFactory(const ITensorHandleFactory::FactoryId &id)
std::vector< OutputSlot >::iterator BeginOutputSlots()
profiling::ProfilingGuid LayerGuid
Define LayerGuid type.
ITensorHandleFactory * GetFactory(ITensorHandleFactory::FactoryId id) const
Find a TensorHandleFactory by Id Returns nullptr if not found.
virtual bool IsTensorInfoSet() const =0
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
const OutputSlot & GetOutputSlot(unsigned int index=0) const override
Get the const output slot handle by slot index.
DotAttributeSet & GetAttributeSet()
const char * GetName() const override
Returns the name of the layer.
ITensorHandleFactory::FactoryId GetTensorHandleFactoryId() const
Graph & TopologicalSort()
Sorts layers in topological order and return this.
virtual int Connect(IInputSlot &destination)=0
std::function< void(const std::string &name, const std::string &value)> ParameterStringifyFunction
Status AllocateDynamicBuffers()
Allocates memory for all tensors under output tensor handers of each layer.
const TensorInfo & GetTensorInfo() const override
const char * GetLayerTypeAsCString(LayerType type)
void AddCompatibilityLayers(std::map< BackendId, std::unique_ptr< class IBackendInternal >> &backends, TensorHandleFactoryRegistry ®istry)
Modifies the graph in-place, removing edges connecting layers using different compute devices...
static const FactoryId LegacyFactoryId
LayerGuid GetGuid() const final
Returns the unique id of the layer.