aboutsummaryrefslogtreecommitdiff
path: root/src/armnn/LoadedNetwork.cpp
diff options
context:
space:
mode:
authorsurmeh01 <surabhi.mehta@arm.com>2018-05-18 16:31:43 +0100
committertelsoa01 <telmo.soares@arm.com>2018-05-23 13:09:07 +0100
commit3537c2ca7ebf31c1673b9ec2bb0c17b0406bbae0 (patch)
tree5950603ad78ec3fe56fb31ddc7f4d52a19f5bc60 /src/armnn/LoadedNetwork.cpp
parentbceff2fb3fc68bb0aa88b886900c34b77340c826 (diff)
downloadarmnn-3537c2ca7ebf31c1673b9ec2bb0c17b0406bbae0.tar.gz
Release 18.05
Diffstat (limited to 'src/armnn/LoadedNetwork.cpp')
-rw-r--r--src/armnn/LoadedNetwork.cpp57
1 files changed, 30 insertions, 27 deletions
diff --git a/src/armnn/LoadedNetwork.cpp b/src/armnn/LoadedNetwork.cpp
index 14712d209c..3c73d4ccfe 100644
--- a/src/armnn/LoadedNetwork.cpp
+++ b/src/armnn/LoadedNetwork.cpp
@@ -5,11 +5,11 @@
#include "LoadedNetwork.hpp"
#include "Layer.hpp"
-#include "Layers.hpp"
#include "Graph.hpp"
#include "Network.hpp"
#include "Runtime.hpp"
#include "Profiling.hpp"
+#include "HeapProfiling.hpp"
#ifdef ARMCOMPUTECL_ENABLED
#include <arm_compute/core/CL/OpenCL.h>
@@ -28,13 +28,13 @@ namespace armnn
using namespace std;
std::unique_ptr<LoadedNetwork> LoadedNetwork::MakeLoadedNetwork(std::unique_ptr<OptimizedNetwork> net,
- const WorkloadFactories& workloadFactories)
+ bool useCpuRefAsFallback)
{
std::unique_ptr<LoadedNetwork> loadedNetwork;
try
{
- loadedNetwork.reset(new LoadedNetwork(std::move(net), workloadFactories));
+ loadedNetwork.reset(new LoadedNetwork(std::move(net), useCpuRefAsFallback));
}
catch (const std::runtime_error& error)
{
@@ -58,8 +58,9 @@ std::unique_ptr<LoadedNetwork> LoadedNetwork::MakeLoadedNetwork(std::unique_ptr<
return loadedNetwork;
}
-LoadedNetwork::LoadedNetwork(std::unique_ptr<OptimizedNetwork> net, const WorkloadFactories& workloadFactories)
-: m_OptimizedNetwork(std::move(net))
+LoadedNetwork::LoadedNetwork(std::unique_ptr<OptimizedNetwork> net, bool useCpuRefAsFallback)
+ : m_CpuRef(useCpuRefAsFallback)
+ , m_OptimizedNetwork(std::move(net))
{
Graph& order = m_OptimizedNetwork->GetGraph().TopologicalSort();
//first create tensor handlers
@@ -68,13 +69,13 @@ LoadedNetwork::LoadedNetwork(std::unique_ptr<OptimizedNetwork> net, const Worklo
//(for example the splitter and merger layers)
for (auto&& layer : order)
{
- layer->CreateTensorHandles(m_OptimizedNetwork->GetGraph(), *GetWorkloadFactory(*layer, workloadFactories));
+ layer->CreateTensorHandles(m_OptimizedNetwork->GetGraph(), GetWorkloadFactory(*layer));
}
//then create workloads
for (auto&& layer : order)
{
- const shared_ptr<IWorkloadFactory> workloadFactory = GetWorkloadFactory(*layer, workloadFactories);
+ const IWorkloadFactory& workloadFactory = GetWorkloadFactory(*layer);
switch (layer->GetType())
{
@@ -86,7 +87,7 @@ LoadedNetwork::LoadedNetwork(std::unique_ptr<OptimizedNetwork> net, const Worklo
}
default:
{
- auto workload = layer->CreateWorkload(m_OptimizedNetwork->GetGraph(), *workloadFactory);
+ auto workload = layer->CreateWorkload(m_OptimizedNetwork->GetGraph(), workloadFactory);
if (!workload)
{
@@ -105,6 +106,11 @@ LoadedNetwork::LoadedNetwork(std::unique_ptr<OptimizedNetwork> net, const Worklo
// set up memory
m_OptimizedNetwork->GetGraph().AllocateDynamicBuffers();
+
+ // finalize the workload factories before execution
+ m_CpuRef.Finalize();
+ m_CpuAcc.Finalize();
+ m_GpuAcc.Finalize();
}
TensorInfo LoadedNetwork::GetInputTensorInfo(LayerBindingId layerId) const
@@ -136,27 +142,26 @@ TensorInfo LoadedNetwork::GetOutputTensorInfo(LayerBindingId layerId) const
throw InvalidArgumentException(boost::str(boost::format("No output layer is associated with id %1%") % layerId));
}
-const shared_ptr<IWorkloadFactory> LoadedNetwork::GetWorkloadFactory(const Layer& layer,
- const WorkloadFactories& workloadFactories) const
+const IWorkloadFactory& LoadedNetwork::GetWorkloadFactory(const Layer& layer) const
{
- shared_ptr<IWorkloadFactory> workloadFactory;
+ const IWorkloadFactory* workloadFactory = nullptr;
switch (layer.GetComputeDevice())
{
case Compute::CpuAcc:
{
- workloadFactory = workloadFactories.m_CpuAcc;
+ workloadFactory = &m_CpuAcc;
break;
}
case Compute::GpuAcc:
{
- workloadFactory = workloadFactories.m_GpuAcc;
+ workloadFactory = &m_GpuAcc;
break;
}
case Compute::CpuRef:
default:
{
- workloadFactory = workloadFactories.m_CpuRef;
+ workloadFactory = &m_CpuRef;
break;
}
}
@@ -168,7 +173,7 @@ const shared_ptr<IWorkloadFactory> LoadedNetwork::GetWorkloadFactory(const Layer
"Factory does not support layer");
boost::ignore_unused(reasonIfUnsupported);
- return workloadFactory;
+ return *workloadFactory;
}
namespace {
@@ -266,8 +271,7 @@ private:
}
Status LoadedNetwork::EnqueueWorkload(const InputTensors& inputTensors,
- const OutputTensors& outputTensors,
- const WorkloadFactories& workloadFactories)
+ const OutputTensors& outputTensors)
{
ARMNN_UPDATE_PROFILING_EVENT_TAG();
ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "EnqueueWorkload");
@@ -293,20 +297,21 @@ Status LoadedNetwork::EnqueueWorkload(const InputTensors& inputTensors,
for (const BindableLayer* inputLayer : graph.GetInputLayers())
{
const TensorPin& pin = workloadData.GetInputTensorPin(inputLayer->GetBindingId());
- EnqueueInput(*inputLayer, pin.GetTensorHandle(), pin.GetTensorInfo(), workloadFactories);
+ EnqueueInput(*inputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
}
// for each output to the network, call EnqueueOutput with the data passed by the user
for (const BindableLayer* outputLayer : graph.GetOutputLayers())
{
const TensorPin& pin = workloadData.GetOutputTensorPin(outputLayer->GetBindingId());
- EnqueueOutput(*outputLayer, pin.GetTensorHandle(), pin.GetTensorInfo(), workloadFactories);
+ EnqueueOutput(*outputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
}
bool executionSucceeded = true;
{
ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Execute");
+ ARMNN_SCOPED_HEAP_PROFILING("Executing");
executionSucceeded = Execute();
}
@@ -316,8 +321,7 @@ Status LoadedNetwork::EnqueueWorkload(const InputTensors& inputTensors,
return executionSucceeded ? Status::Success : Status::Failure;
}
-void LoadedNetwork::EnqueueInput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo,
- const WorkloadFactories& workloadFactories)
+void LoadedNetwork::EnqueueInput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo)
{
if (layer.GetType() != LayerType::Input)
{
@@ -344,14 +348,13 @@ void LoadedNetwork::EnqueueInput(const BindableLayer& layer, ITensorHandle* tens
inputQueueDescriptor.m_Outputs.push_back(outputTensorHandle);
info.m_OutputTensorInfos.push_back(outputTensorInfo);
- shared_ptr<IWorkloadFactory> workloadFactory = GetWorkloadFactory(layer, workloadFactories);
- auto inputWorkload = workloadFactory->CreateInput(inputQueueDescriptor, info);
+ const IWorkloadFactory& workloadFactory = GetWorkloadFactory(layer);
+ auto inputWorkload = workloadFactory.CreateInput(inputQueueDescriptor, info);
BOOST_ASSERT_MSG(inputWorkload, "No input workload created");
m_WorkloadQueue.insert(m_WorkloadQueue.begin(), move(inputWorkload));
}
-void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, ITensorHandle* tensorHandle,
- const TensorInfo& tensorInfo, const WorkloadFactories& workloadFactories)
+void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo)
{
if (layer.GetType() != LayerType::Output)
{
@@ -381,8 +384,8 @@ void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, ITensorHandle* ten
outputQueueDescriptor.m_Inputs.push_back(inputTensorHandle);
info.m_InputTensorInfos.push_back(inputTensorInfo);
- shared_ptr<IWorkloadFactory> workloadFactory = GetWorkloadFactory(layer, workloadFactories);
- auto outputWorkload = workloadFactory->CreateOutput(outputQueueDescriptor, info);
+ const IWorkloadFactory& workloadFactory = GetWorkloadFactory(layer);
+ auto outputWorkload = workloadFactory.CreateOutput(outputQueueDescriptor, info);
BOOST_ASSERT_MSG(outputWorkload, "No output workload created");
m_WorkloadQueue.push_back(move(outputWorkload));
}