aboutsummaryrefslogtreecommitdiff
path: root/src/armnn
diff options
context:
space:
mode:
authorMike Kelly <mike.kelly@arm.com>2023-07-07 15:43:06 +0100
committerMike Kelly <mike.kelly@arm.com>2023-07-14 00:00:53 +0100
commit4cc341cf8b5a6e6bb0543504cbbfde6fa11a2cdb (patch)
tree7cac128e9ec6f2fd27f1afdb55f44b870f39e0b3 /src/armnn
parent6963b33221c23af4a8eff19ff4a5773230b0befd (diff)
downloadarmnn-4cc341cf8b5a6e6bb0543504cbbfde6fa11a2cdb.tar.gz
IVGCVSW-7830 Add backend optimizations to remove Reshapes where possible
* Added optimization to remove reshapes for Neon and Ref Backends by using overridden TensorInfos * Added ability to delete Subgraphs during Optimization * Fixed naming error in NeonEndToEndTests and CLEndToEndTests * Added LayerNameAndTypeCheck for testing. * Fixed error where layers were not marked as altered when removed in CLBackend Signed-off-by: Mike Kelly <mike.kelly@arm.com> Change-Id: I1ac25cd4ec9821470d961831ae2c8d24882276cc
Diffstat (limited to 'src/armnn')
-rw-r--r--src/armnn/Graph.cpp10
-rw-r--r--src/armnn/Layer.cpp15
-rw-r--r--src/armnn/Layer.hpp8
-rw-r--r--src/armnn/LoadedNetwork.cpp28
-rw-r--r--src/armnn/Network.cpp22
-rw-r--r--src/armnn/layers/ConcatLayer.cpp3
-rw-r--r--src/armnn/layers/ReverseV2Layer.cpp2
-rw-r--r--src/armnn/layers/SplitterLayer.cpp5
8 files changed, 78 insertions, 15 deletions
diff --git a/src/armnn/Graph.cpp b/src/armnn/Graph.cpp
index fee1da4343..cf6f20f82b 100644
--- a/src/armnn/Graph.cpp
+++ b/src/armnn/Graph.cpp
@@ -508,12 +508,22 @@ void Graph::ReplaceSubgraphConnections(const SubgraphView& subgraph, const Subgr
if (subgraphInputSlot->GetConnection())
{
IOutputSlot* connectedOutputSlot = subgraphInputSlot->GetConnection();
+ InputSlot* inputSlot = PolymorphicDowncast<InputSlot*>(subgraphInputSlot);
+ bool isOverridden = inputSlot->IsTensorInfoOverridden();
+
ARMNN_ASSERT(connectedOutputSlot);
connectedOutputSlot->Disconnect(*subgraphInputSlot);
IInputSlot* substituteInputSlot = substituteSubgraphInputSlots.at(inputSlotIdx);
ARMNN_ASSERT(substituteInputSlot);
connectedOutputSlot->Connect(*substituteInputSlot);
+
+ if (isOverridden)
+ {
+ TensorInfo overridden = inputSlot->GetTensorInfo();
+ InputSlot* newInputSlot = PolymorphicDowncast<InputSlot*>(substituteInputSlot);
+ newInputSlot->SetTensorInfo(overridden);
+ }
}
}
diff --git a/src/armnn/Layer.cpp b/src/armnn/Layer.cpp
index 8d4811ae89..d2f8f2c982 100644
--- a/src/armnn/Layer.cpp
+++ b/src/armnn/Layer.cpp
@@ -259,7 +259,20 @@ void Layer::CollectWorkloadInputs(WorkloadDataCollector& dataCollector) const
// The graph must be well-formed at this point.
ARMNN_ASSERT(inputSlot.GetConnection());
const OutputHandler& outputHandler = inputSlot.GetConnectedOutputSlot()->GetOutputHandler();
- dataCollector.Push(outputHandler.GetData(), outputHandler.GetTensorInfo());
+
+ if (inputSlot.IsTensorInfoOverridden() && outputHandler.GetData())
+ {
+ auto handler = outputHandler.GetData()->DecorateTensorHandle(inputSlot.GetTensorInfo());
+
+ if (handler)
+ {
+ // Add overridden TensorHandle
+ dataCollector.Push(handler.get(), inputSlot.GetTensorInfo());
+ continue;
+ }
+ }
+ // Add default TensorHandle
+ dataCollector.Push(outputHandler.GetData(), inputSlot.GetTensorInfo());
}
}
diff --git a/src/armnn/Layer.hpp b/src/armnn/Layer.hpp
index 5e097f0fad..4f69e78b62 100644
--- a/src/armnn/Layer.hpp
+++ b/src/armnn/Layer.hpp
@@ -80,15 +80,15 @@ public:
/// Sets the TensorInfo for this InputSlot. This can be used to override the TensorInfo and if set will be returned
/// instead of the TensorInfo for the Connected OutputSlot.
- void SetTensorInfo(const TensorInfo tensorInfo);
+ void SetTensorInfo(const TensorInfo tensorInfo) override;
/// Gets the TensorInfo for this InputSlot. If the InputSlot's TensorInfo has not been set then this will get the
/// TensorInfo from the Connected TensorInfo.
- const TensorInfo& GetTensorInfo() const;
+ const TensorInfo& GetTensorInfo() const override;
/// Returns true if this InputSlot either has an overridden TensorInfo for this InputSlot that was set through a
/// call to SetTensorInfo() or is Connected to an OutputSlot that has its TensorInfo set.
- bool IsTensorInfoSet() const;
+ bool IsTensorInfoSet() const override;
/// Returns true if this InputSlot has an overridden TensorInfo that was set through a call to SetTensorInfo().
- bool IsTensorInfoOverridden() const;
+ bool IsTensorInfoOverridden() const override;
private:
Layer& m_OwningLayer;
diff --git a/src/armnn/LoadedNetwork.cpp b/src/armnn/LoadedNetwork.cpp
index 3f4aa34a5b..3d84054b69 100644
--- a/src/armnn/LoadedNetwork.cpp
+++ b/src/armnn/LoadedNetwork.cpp
@@ -955,10 +955,10 @@ Status LoadedNetwork::EnqueueWorkload(const InputTensors& inputTensors,
syncDesc.m_Inputs.push_back(inputTensorHandle);
WorkloadInfo info;
info.m_InputTensorInfos.push_back(
- outputLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo());
+ outputLayer->GetInputSlot(0).GetTensorInfo());
auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc, info);
ARMNN_ASSERT_MSG(syncWorkload, "No sync workload created");
- m_OutputQueue.push_back(move(syncWorkload));
+ m_OutputQueue.push_back(std::move(syncWorkload));
importedOutputIdIndex++;
}
else
@@ -1089,7 +1089,7 @@ void LoadedNetwork::EnqueueInput(const BindableLayer& layer, ITensorHandle* tens
timelineUtils->Commit();
}
- m_InputQueue.push_back(move(inputWorkload));
+ m_InputQueue.push_back(std::move(inputWorkload));
}
}
@@ -1149,7 +1149,7 @@ void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, ITensorHandle* ten
info.m_InputTensorInfos.push_back(inputTensorInfo);
auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc, info);
ARMNN_ASSERT_MSG(syncWorkload, "No sync workload created");
- m_OutputQueue.push_back(move(syncWorkload));
+ m_OutputQueue.push_back(std::move(syncWorkload));
}
else
{
@@ -1177,7 +1177,7 @@ void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, ITensorHandle* ten
timelineUtils->Commit();
}
- m_OutputQueue.push_back(move(outputWorkload));
+ m_OutputQueue.push_back(std::move(outputWorkload));
}
}
@@ -1650,7 +1650,7 @@ std::vector<ImportedOutputId> LoadedNetwork::ImportOutputs(const OutputTensors&
const InputSlot& inputSlot = layer->GetInputSlots()[0];
ITensorHandleFactory::FactoryId factoryId = inputSlot.GetConnectedOutputSlot()->GetTensorHandleFactoryId();
- const TensorInfo& tensorInfo = inputSlot.GetConnectedOutputSlot()->GetTensorInfo();
+ const TensorInfo& tensorInfo = inputSlot.GetTensorInfo();
ITensorHandleFactory* handleFactory = m_TensorHandleFactoryRegistry.GetFactory(factoryId);
ARMNN_ASSERT(handleFactory);
@@ -2093,6 +2093,14 @@ std::unique_ptr<IWorkingMemHandle> LoadedNetwork::CreateWorkingMemHandle(Network
if (found != m_ConstantTensorHandles.end())
{
ITensorHandle* tensorHandle = found->second;
+ if (slot.IsTensorInfoOverridden())
+ {
+ ITensorHandle* decorated = tensorHandle->DecorateTensorHandle(slot.GetTensorInfo()).get();
+ if (decorated)
+ {
+ tensorHandle = decorated;
+ }
+ }
workingMemDescriptor.m_Inputs.push_back(tensorHandle);
// Odd case where a constant layer is connected to an output layer
@@ -2113,6 +2121,14 @@ std::unique_ptr<IWorkingMemHandle> LoadedNetwork::CreateWorkingMemHandle(Network
HandleInfo& handleInfo = outputToHandleInfoMap.at(outputSlot);
ITensorHandle* inputTensorHandle = handleInfo.m_TensorHandle;
+ if (slot.IsTensorInfoOverridden())
+ {
+ ITensorHandle* decorated = inputTensorHandle->DecorateTensorHandle(slot.GetTensorInfo()).get();
+ if (decorated)
+ {
+ inputTensorHandle = decorated;
+ }
+ }
workingMemDescriptor.m_Inputs.push_back(inputTensorHandle);
// Store the LayerBindingId of the OutputLayer
diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp
index 2abaf44587..ae5bde17ca 100644
--- a/src/armnn/Network.cpp
+++ b/src/armnn/Network.cpp
@@ -1310,6 +1310,28 @@ OptimizationResult ApplyBackendOptimizations(OptimizedNetworkImpl* optNetObjPtr,
});
}
+ // Remove deleted sub-graphs
+ for (auto& deletedSubgraph : optimizationViews.GetDeletedSubgraphs())
+ {
+ for (auto& l : deletedSubgraph.GetIConnectableLayers())
+ {
+ Layer* deletedLayer = PolymorphicDowncast<Layer*>(l);
+ for (unsigned int in = deletedLayer->GetNumInputSlots(); in > 0; --in)
+ {
+ auto inputSlot = deletedLayer->GetInputSlot(in -1);
+ OutputSlot* parentOut = inputSlot.GetConnectedOutputSlot();
+ parentOut->Disconnect(inputSlot);
+ for (unsigned int out = deletedLayer->GetOutputSlot(in -1).GetNumConnections(); out > 0; --out)
+ {
+ InputSlot *childIn = deletedLayer->GetOutputSlot(in - 1).GetConnection(out -1);
+ deletedLayer->GetOutputSlot(in - 1).Disconnect(*childIn);
+ parentOut->Connect(*childIn);
+ }
+ }
+ optGraph.EraseLayer(deletedLayer);
+ }
+ }
+
if (!optimizationViews.GetFailedSubgraphs().empty())
{
std::stringstream warningMsg;
diff --git a/src/armnn/layers/ConcatLayer.cpp b/src/armnn/layers/ConcatLayer.cpp
index 7a1b689b2c..4629bf245e 100644
--- a/src/armnn/layers/ConcatLayer.cpp
+++ b/src/armnn/layers/ConcatLayer.cpp
@@ -120,7 +120,7 @@ void ConcatLayer::CreateTensors(const TensorHandleFactoryRegistry& registry,
// 3) the input does not come from a Constant layer or input layer
// 4) the input is only read by this concat layer
// 5) if concat along x or y (2 innermost dimensions) and the previous layers do not require padding
- // 6) the input does not have an Overridden TensorInfo
+ // 6) neither the inputs nor the output have an Overridden TensorInfo
if (slot &&
parentInfo.IsTypeSpaceMatch(info) && //(1)
factoryId == slot->GetTensorHandleFactoryId() && //(2)
@@ -128,6 +128,7 @@ void ConcatLayer::CreateTensors(const TensorHandleFactoryRegistry& registry,
slot->GetOwningLayer().GetType() != LayerType::Input && //(3)
slot->GetNumConnections() == 1 &&
canUseSubTensorOnXorY && //(5)
+ !GetOutputSlot(0).GetConnection(0)->IsTensorInfoOverridden() && //(6)
!currentLayer->GetInputSlot(i).IsTensorInfoOverridden()) //(6)
{
ARMNN_NO_DEPRECATE_WARN_BEGIN
diff --git a/src/armnn/layers/ReverseV2Layer.cpp b/src/armnn/layers/ReverseV2Layer.cpp
index 29f8b1b781..201e19819b 100644
--- a/src/armnn/layers/ReverseV2Layer.cpp
+++ b/src/armnn/layers/ReverseV2Layer.cpp
@@ -40,7 +40,7 @@ void ReverseV2Layer::ValidateTensorShapesFromInputs()
VerifyShapeInferenceType(outputShape, m_ShapeInferenceMethod);
auto inferredShapes = InferOutputShapes({
- GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape() });
+ GetInputSlot(0).GetTensorInfo().GetShape() });
ARMNN_ASSERT(inferredShapes.size() == 1);
diff --git a/src/armnn/layers/SplitterLayer.cpp b/src/armnn/layers/SplitterLayer.cpp
index 86a42305ff..dc8864a736 100644
--- a/src/armnn/layers/SplitterLayer.cpp
+++ b/src/armnn/layers/SplitterLayer.cpp
@@ -131,13 +131,14 @@ void SplitterLayer::CreateTensors(const TensorHandleFactoryRegistry& registry,
// 2) the same TensorHandleFactory is used for input and split layer output
// 3) the output does not go to a Constant layer or input layer
// 4) if split along x or y (2 innermost dimensions) and the next layers do not require padding
- // 5) none of the outputs have an Overridden TensorInfo
+ // 5) neither the input nor the outputs have an Overridden TensorInfo
if (parentInfo.IsTypeSpaceMatch(info) && //(1)
factoryId == slot->GetTensorHandleFactoryId() && //(2)
GetOutputSlot(i).GetConnection(0)->GetOwningLayer().GetType() != LayerType::Constant && //(3)
GetOutputSlot(i).GetConnection(0)->GetOwningLayer().GetType() != LayerType::Input && //(3)
canUseSubTensorOnXorY && //(4)
- !GetOutputSlot(i).GetConnection(0)->IsTensorInfoOverridden()) //(5)
+ !GetOutputSlot(i).GetConnection(0)->IsTensorInfoOverridden() && //(5)
+ !GetInputSlot(0).IsTensorInfoOverridden()) //(5)
{
ARMNN_NO_DEPRECATE_WARN_BEGIN
return factory.CreateSubTensorHandle(*inputData,