aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSadik Armagan <sadik.armagan@arm.com>2021-04-27 10:02:10 +0100
committerSadik Armagan <sadik.armagan@arm.com>2021-04-29 08:46:09 +0000
commita04a9d7c11f28c7e932435535e80223782f369f2 (patch)
tree9c1e86b0b4878dad12a359e60a8d2e8e051d2def
parent484d5ebb00c0db76efd76a601b5bbaa460cd2ccb (diff)
downloadarmnn-a04a9d7c11f28c7e932435535e80223782f369f2.tar.gz
IVGCVSW-5775 'Add Async Support to ExecuteNetwork'
* Enabled async mode with '-n, concurrent' and 'simultaneous-iterations' in ExecuteNetwork * Number of input files provided should be equal to number of input files provided multiply by number of simultaneous iterations divided by comma !armnn:5443 Signed-off-by: Sadik Armagan <sadik.armagan@arm.com> Change-Id: Ibeb318010430bf4ae61a02b18b1bf88f3657774c
-rw-r--r--include/armnn/IWorkingMemHandle.hpp3
-rw-r--r--src/armnn/WorkingMemHandle.cpp3
-rw-r--r--src/armnn/WorkingMemHandle.hpp6
-rw-r--r--src/backends/reference/workloads/RefDetectionPostProcessWorkload.cpp8
-rw-r--r--tests/ExecuteNetwork/ExecuteNetwork.cpp253
-rw-r--r--tests/ExecuteNetwork/ExecuteNetworkParams.cpp23
-rw-r--r--tests/ExecuteNetwork/ExecuteNetworkParams.hpp2
-rw-r--r--tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp10
-rw-r--r--tests/InferenceModel.hpp71
9 files changed, 290 insertions, 89 deletions
diff --git a/include/armnn/IWorkingMemHandle.hpp b/include/armnn/IWorkingMemHandle.hpp
index 171fa3d81c..6fb2f9fe5f 100644
--- a/include/armnn/IWorkingMemHandle.hpp
+++ b/include/armnn/IWorkingMemHandle.hpp
@@ -25,6 +25,9 @@ public:
/// Returns the NetworkId of the Network that this IWorkingMemHandle works with.
virtual NetworkId GetNetworkId() = 0;
+ /// Returns the InferenceId of the Inference that this IWorkingMemHandle works with.
+ virtual profiling::ProfilingGuid GetInferenceId() = 0;
+
/// Allocate the backing memory required for execution. If this is not called, then allocation will be
/// deferred to execution time. The mutex must be locked.
virtual void Allocate() = 0;
diff --git a/src/armnn/WorkingMemHandle.cpp b/src/armnn/WorkingMemHandle.cpp
index 0cbef82e83..b54c5baddd 100644
--- a/src/armnn/WorkingMemHandle.cpp
+++ b/src/armnn/WorkingMemHandle.cpp
@@ -26,7 +26,8 @@ WorkingMemHandle::WorkingMemHandle(
m_MemoryManagers(memoryManagers),
m_OwnedTensorHandles(std::move(ownedTensorHandles)),
m_IsAllocated(false),
- m_Mutex()
+ m_Mutex(),
+ m_InferenceId(profiling::ProfilingService::GetNextGuid())
{
}
diff --git a/src/armnn/WorkingMemHandle.hpp b/src/armnn/WorkingMemHandle.hpp
index 92b0acaec3..5ccb2b2342 100644
--- a/src/armnn/WorkingMemHandle.hpp
+++ b/src/armnn/WorkingMemHandle.hpp
@@ -38,6 +38,11 @@ public:
return m_NetworkId;
}
+ profiling::ProfilingGuid GetInferenceId() override
+ {
+ return m_InferenceId;
+ }
+
/// Allocate the backing memory required for execution. If this is not called, then allocation will be
/// deferred to execution time. The mutex must be locked.
void Allocate() override;
@@ -87,6 +92,7 @@ private:
bool m_IsAllocated;
std::mutex m_Mutex;
+ profiling::ProfilingGuid m_InferenceId;
};
} // end experimental namespace
diff --git a/src/backends/reference/workloads/RefDetectionPostProcessWorkload.cpp b/src/backends/reference/workloads/RefDetectionPostProcessWorkload.cpp
index 25c326ad37..6784e21585 100644
--- a/src/backends/reference/workloads/RefDetectionPostProcessWorkload.cpp
+++ b/src/backends/reference/workloads/RefDetectionPostProcessWorkload.cpp
@@ -46,10 +46,10 @@ void RefDetectionPostProcessWorkload::Execute(std::vector<ITensorHandle*> inputs
auto scores = MakeDecoder<float>(scoresInfo, inputs[1]->Map());
auto anchors = MakeDecoder<float>(anchorsInfo, m_Anchors->Map(false));
- float* detectionBoxes = GetOutputTensorData<float>(0, m_Data);
- float* detectionClasses = GetOutputTensorData<float>(1, m_Data);
- float* detectionScores = GetOutputTensorData<float>(2, m_Data);
- float* numDetections = GetOutputTensorData<float>(3, m_Data);
+ float* detectionBoxes = reinterpret_cast<float*>(outputs[0]->Map());
+ float* detectionClasses = reinterpret_cast<float*>(outputs[1]->Map());
+ float* detectionScores = reinterpret_cast<float*>(outputs[2]->Map());
+ float* numDetections = reinterpret_cast<float*>(outputs[3]->Map());
DetectionPostProcess(boxEncodingsInfo, scoresInfo, anchorsInfo,
detectionBoxesInfo, detectionClassesInfo,
diff --git a/tests/ExecuteNetwork/ExecuteNetwork.cpp b/tests/ExecuteNetwork/ExecuteNetwork.cpp
index 60e4ec3401..2bbb51783c 100644
--- a/tests/ExecuteNetwork/ExecuteNetwork.cpp
+++ b/tests/ExecuteNetwork/ExecuteNetwork.cpp
@@ -279,7 +279,8 @@ int MainImpl(const ExecuteNetworkParams& params,
using TContainer =
mapbox::util::variant<std::vector<float>, std::vector<int>, std::vector<unsigned char>, std::vector<int8_t>>;
- std::vector<TContainer> inputDataContainers;
+ std::vector<std::vector<TContainer>> inputs;
+ std::vector<std::vector<TContainer>> outputs;
try
{
@@ -298,6 +299,7 @@ int MainImpl(const ExecuteNetworkParams& params,
inferenceModelParams.m_CachedNetworkFilePath = params.m_CachedNetworkFilePath;
inferenceModelParams.m_NumberOfThreads = params.m_NumberOfThreads;
inferenceModelParams.m_MLGOTuningFilePath = params.m_MLGOTuningFilePath;
+ inferenceModelParams.m_AsyncEnabled = params.m_Concurrent;
for(const std::string& inputName: params.m_InputNames)
{
@@ -324,106 +326,201 @@ int MainImpl(const ExecuteNetworkParams& params,
runtime);
const size_t numInputs = inferenceModelParams.m_InputBindings.size();
- for(unsigned int i = 0; i < numInputs; ++i)
+
+ armnn::Optional<QuantizationParams> qParams = params.m_QuantizeInput ?
+ armnn::MakeOptional<QuantizationParams>(
+ model.GetInputQuantizationParams()) :
+ armnn::EmptyOptional();
+
+ for(unsigned int j = 0; j < params.m_SimultaneousIterations ; ++j)
{
- armnn::Optional<QuantizationParams> qParams = params.m_QuantizeInput ?
- armnn::MakeOptional<QuantizationParams>(
- model.GetInputQuantizationParams()) :
- armnn::EmptyOptional();
-
- armnn::Optional<std::string> dataFile = params.m_GenerateTensorData ?
- armnn::EmptyOptional() :
- armnn::MakeOptional<std::string>(
- params.m_InputTensorDataFilePaths[i]);
-
- unsigned int numElements = model.GetInputSize(i);
- if (params.m_InputTensorShapes.size() > i && params.m_InputTensorShapes[i])
+ std::vector<TContainer> inputDataContainers;
+ for(unsigned int i = 0; i < numInputs; ++i)
{
- // If the user has provided a tensor shape for the current input,
- // override numElements
- numElements = params.m_InputTensorShapes[i]->GetNumElements();
- }
+ armnn::Optional<std::string> dataFile = params.m_GenerateTensorData ?
+ armnn::EmptyOptional() :
+ armnn::MakeOptional<std::string>(
+ params.m_InputTensorDataFilePaths[(j * numInputs) + i]);
- TContainer tensorData;
- PopulateTensorWithData(tensorData,
- numElements,
- params.m_InputTypes[i],
- qParams,
- dataFile);
+ unsigned int numElements = model.GetInputSize(i);
+ if (params.m_InputTensorShapes.size() > i && params.m_InputTensorShapes[i])
+ {
+ // If the user has provided a tensor shape for the current input,
+ // override numElements
+ numElements = params.m_InputTensorShapes[i]->GetNumElements();
+ }
+
+ TContainer tensorData;
+ PopulateTensorWithData(tensorData,
+ numElements,
+ params.m_InputTypes[i],
+ qParams,
+ dataFile);
- inputDataContainers.push_back(tensorData);
+ inputDataContainers.push_back(tensorData);
+ }
+ inputs.push_back(inputDataContainers);
}
const size_t numOutputs = inferenceModelParams.m_OutputBindings.size();
- std::vector<TContainer> outputDataContainers;
- for (unsigned int i = 0; i < numOutputs; ++i)
+ for (unsigned int j = 0; j < params.m_SimultaneousIterations; ++j)
{
- if (params.m_OutputTypes[i].compare("float") == 0)
- {
- outputDataContainers.push_back(std::vector<float>(model.GetOutputSize(i)));
- }
- else if (params.m_OutputTypes[i].compare("int") == 0)
- {
- outputDataContainers.push_back(std::vector<int>(model.GetOutputSize(i)));
- }
- else if (params.m_OutputTypes[i].compare("qasymm8") == 0)
- {
- outputDataContainers.push_back(std::vector<uint8_t>(model.GetOutputSize(i)));
- }
- else if (params.m_OutputTypes[i].compare("qsymms8") == 0)
+ std::vector <TContainer> outputDataContainers;
+ for (unsigned int i = 0; i < numOutputs; ++i)
{
- outputDataContainers.push_back(std::vector<int8_t>(model.GetOutputSize(i)));
- }
- else
- {
- ARMNN_LOG(fatal) << "Unsupported tensor data type \"" << params.m_OutputTypes[i] << "\". ";
- return EXIT_FAILURE;
+ if (params.m_OutputTypes[i].compare("float") == 0)
+ {
+ outputDataContainers.push_back(std::vector<float>(model.GetOutputSize(i)));
+ } else if (params.m_OutputTypes[i].compare("int") == 0)
+ {
+ outputDataContainers.push_back(std::vector<int>(model.GetOutputSize(i)));
+ } else if (params.m_OutputTypes[i].compare("qasymm8") == 0)
+ {
+ outputDataContainers.push_back(std::vector<uint8_t>(model.GetOutputSize(i)));
+ } else if (params.m_OutputTypes[i].compare("qsymms8") == 0)
+ {
+ outputDataContainers.push_back(std::vector<int8_t>(model.GetOutputSize(i)));
+ } else
+ {
+ ARMNN_LOG(fatal) << "Unsupported tensor data type \"" << params.m_OutputTypes[i] << "\". ";
+ return EXIT_FAILURE;
+ }
}
+ outputs.push_back(outputDataContainers);
}
- for (size_t x = 0; x < params.m_Iterations; x++)
+ if (!params.m_Concurrent)
{
- // model.Run returns the inference time elapsed in EnqueueWorkload (in milliseconds)
- auto inference_duration = model.Run(inputDataContainers, outputDataContainers);
-
- if (params.m_GenerateTensorData)
+ // Synchronous Execution
+ for (size_t x = 0; x < params.m_Iterations; x++)
{
- ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
- }
+ // model.Run returns the inference time elapsed in EnqueueWorkload (in milliseconds)
+ auto inference_duration = model.Run(inputs[0], outputs[0]);
- // Print output tensors
- const auto& infosOut = model.GetOutputBindingInfos();
- for (size_t i = 0; i < numOutputs; i++)
- {
- const armnn::TensorInfo& infoOut = infosOut[i].second;
- auto outputTensorFile = params.m_OutputTensorFiles.empty() ? "" : params.m_OutputTensorFiles[i];
-
- TensorPrinter printer(inferenceModelParams.m_OutputBindings[i],
- infoOut,
- outputTensorFile,
- params.m_DequantizeOutput);
- mapbox::util::apply_visitor(printer, outputDataContainers[i]);
- }
+ if (params.m_GenerateTensorData)
+ {
+ ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
+ }
+
+ // Print output tensors
+ const auto& infosOut = model.GetOutputBindingInfos();
+ for (size_t i = 0; i < numOutputs; i++)
+ {
+ const armnn::TensorInfo& infoOut = infosOut[i].second;
+ auto outputTensorFile = params.m_OutputTensorFiles.empty() ? "" : params.m_OutputTensorFiles[i];
+
+ TensorPrinter printer(inferenceModelParams.m_OutputBindings[i],
+ infoOut,
+ outputTensorFile,
+ params.m_DequantizeOutput);
+ mapbox::util::apply_visitor(printer, outputs[0][i]);
+ }
- ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2)
- << std::fixed << inference_duration.count() << " ms\n";
+ ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2)
+ << std::fixed << inference_duration.count() << " ms\n";
- // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
- if (params.m_ThresholdTime != 0.0)
+ // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
+ if (params.m_ThresholdTime != 0.0)
+ {
+ ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2)
+ << std::fixed << params.m_ThresholdTime << " ms";
+ auto thresholdMinusInference = params.m_ThresholdTime - inference_duration.count();
+ ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2)
+ << std::fixed << thresholdMinusInference << " ms" << "\n";
+
+ if (thresholdMinusInference < 0)
+ {
+ std::string errorMessage = "Elapsed inference time is greater than provided threshold time.";
+ ARMNN_LOG(fatal) << errorMessage;
+ }
+ }
+ }
+ }
+ else
+ {
+ try
{
- ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2)
- << std::fixed << params.m_ThresholdTime << " ms";
- auto thresholdMinusInference = params.m_ThresholdTime - inference_duration.count();
- ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2)
- << std::fixed << thresholdMinusInference << " ms" << "\n";
+ ARMNN_LOG(info) << "Asynchronous Execution... \n";
+ std::vector<std::future<std::tuple<armnn::profiling::ProfilingGuid,
+ std::chrono::duration<double, std::milli>>>> inferenceResults;
+ inferenceResults.reserve(params.m_SimultaneousIterations);
+
+ // Create WorkingMemHandles for each inference
+ std::vector<std::unique_ptr<armnn::experimental::IWorkingMemHandle>> workingMemHandles;
+ workingMemHandles.reserve(params.m_SimultaneousIterations);
+ for (unsigned int i = 0; i < params.m_SimultaneousIterations; ++i)
+ {
+ workingMemHandles.push_back(model.CreateWorkingMemHandle());
+ }
+
+ // Run each inference in its own thread
+ for (unsigned int i = 0; i < params.m_SimultaneousIterations; ++i)
+ {
+ armnn::experimental::IWorkingMemHandle& workingMemHandleRef = *workingMemHandles[i].get();
+ inferenceResults.push_back(std::async(
+ std::launch::async, [&model, &workingMemHandleRef, &inputs, &outputs, i]() {
+ return model.RunAsync(workingMemHandleRef, inputs[i], outputs[i]);
+ }
+ ));
+ }
- if (thresholdMinusInference < 0)
+ // Check the results
+ for (unsigned int j = 0; j < inferenceResults.size(); ++j)
{
- std::string errorMessage = "Elapsed inference time is greater than provided threshold time.";
- ARMNN_LOG(fatal) << errorMessage;
+ // Get the results
+ auto inferenceResult = inferenceResults[j].get();
+ auto inference_duration = std::get<1>(inferenceResult);
+ auto inferenceID = std::get<0>(inferenceResult);
+
+ if (params.m_GenerateTensorData)
+ {
+ ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
+ }
+
+ // Print output tensors
+ const auto& infosOut = model.GetOutputBindingInfos();
+ for (size_t i = 0; i < numOutputs; i++)
+ {
+ const armnn::TensorInfo& infoOut = infosOut[i].second;
+ auto outputTensorFile = params.m_OutputTensorFiles.empty()
+ ? ""
+ : params.m_OutputTensorFiles[(j * numOutputs) + i];
+
+ TensorPrinter printer(inferenceModelParams.m_OutputBindings[i],
+ infoOut,
+ outputTensorFile,
+ params.m_DequantizeOutput);
+ mapbox::util::apply_visitor(printer, outputs[j][i]);
+ }
+
+ ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2)
+ << std::fixed << inference_duration.count() << " ms\n";
+
+ // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
+ if (params.m_ThresholdTime != 0.0)
+ {
+ ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2)
+ << std::fixed << params.m_ThresholdTime << " ms";
+ auto thresholdMinusInference = params.m_ThresholdTime - inference_duration.count();
+ ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2)
+ << std::fixed << thresholdMinusInference << " ms" << "\n";
+
+ if (thresholdMinusInference < 0)
+ {
+ ARMNN_LOG(fatal) << "Elapsed inference time is greater than provided threshold time. \n";
+ }
+ }
+ ARMNN_LOG(info) << "Asynchronous Execution is finished for Inference ID: " << inferenceID << " \n";
+
}
}
+ catch (const armnn::Exception& e)
+ {
+ ARMNN_LOG(fatal) << "Armnn Error: " << e.what();
+ return EXIT_FAILURE;
+ }
+
}
}
catch (const armnn::Exception& e)
diff --git a/tests/ExecuteNetwork/ExecuteNetworkParams.cpp b/tests/ExecuteNetwork/ExecuteNetworkParams.cpp
index 4e3b5e313d..8f1cb0b599 100644
--- a/tests/ExecuteNetwork/ExecuteNetworkParams.cpp
+++ b/tests/ExecuteNetwork/ExecuteNetworkParams.cpp
@@ -145,6 +145,12 @@ void ExecuteNetworkParams::ValidateParams()
CheckModelFormat(m_ModelFormat);
+ // Check number of simultaneous iterations
+ if ((m_SimultaneousIterations < 1))
+ {
+ ARMNN_LOG(fatal) << "simultaneous-iterations cannot be less than 1. ";
+ }
+
// Check input tensor shapes
if ((m_InputTensorShapes.size() != 0) &&
(m_InputTensorShapes.size() != m_InputNames.size()))
@@ -159,10 +165,19 @@ void ExecuteNetworkParams::ValidateParams()
ARMNN_LOG(fatal) << "One or more input data file paths are not valid. ";
}
- if (m_InputTensorDataFilePaths.size() != m_InputNames.size())
+ if (!m_Concurrent && m_InputTensorDataFilePaths.size() != m_InputNames.size())
{
ARMNN_LOG(fatal) << "input-name and input-tensor-data must have the same amount of elements. ";
}
+
+ if (m_InputTensorDataFilePaths.size() < m_SimultaneousIterations * m_InputNames.size())
+ {
+ ARMNN_LOG(fatal) << "There is not enough input data for " << m_SimultaneousIterations << " execution.";
+ }
+ if (m_InputTensorDataFilePaths.size() > m_SimultaneousIterations * m_InputNames.size())
+ {
+ ARMNN_LOG(fatal) << "There is more input data for " << m_SimultaneousIterations << " execution.";
+ }
}
if ((m_OutputTensorFiles.size() != 0) &&
@@ -171,6 +186,12 @@ void ExecuteNetworkParams::ValidateParams()
ARMNN_LOG(fatal) << "output-name and write-outputs-to-file must have the same amount of elements. ";
}
+ if ((m_OutputTensorFiles.size() != 0)
+ && m_OutputTensorFiles.size() != m_SimultaneousIterations * m_OutputNames.size())
+ {
+ ARMNN_LOG(fatal) << "There is not enough output data for " << m_SimultaneousIterations << " execution.";
+ }
+
if (m_InputTypes.size() == 0)
{
//Defaults the value of all inputs to "float"
diff --git a/tests/ExecuteNetwork/ExecuteNetworkParams.hpp b/tests/ExecuteNetwork/ExecuteNetworkParams.hpp
index a19eaa9346..c325df110f 100644
--- a/tests/ExecuteNetwork/ExecuteNetworkParams.hpp
+++ b/tests/ExecuteNetwork/ExecuteNetworkParams.hpp
@@ -23,6 +23,7 @@ struct ExecuteNetworkParams
std::string m_CachedNetworkFilePath;
std::vector<armnn::BackendId> m_ComputeDevices;
+ bool m_Concurrent;
bool m_DequantizeOutput;
std::string m_DynamicBackendsPath;
bool m_EnableBf16TurboMode;
@@ -49,6 +50,7 @@ struct ExecuteNetworkParams
bool m_PrintIntermediate;
bool m_QuantizeInput;
bool m_SaveCachedNetwork;
+ size_t m_SimultaneousIterations;
size_t m_SubgraphId;
double m_ThresholdTime;
int m_TuningLevel;
diff --git a/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp b/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp
index 286c970d72..042087e4f4 100644
--- a/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp
+++ b/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp
@@ -194,6 +194,10 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork",
"If left empty (the default), dynamic backends will not be used.",
cxxopts::value<std::string>(m_RuntimeOptions.m_DynamicBackendsPath))
+ ("n,concurrent",
+ "If this option is enabled inferences will be executed in parallel asynchronously.",
+ cxxopts::value<bool>(m_ExNetParams.m_Concurrent)->default_value("false")->implicit_value("true"))
+
("d,input-tensor-data",
"Path to files containing the input data as a flat array separated by whitespace. "
"Several paths can be passed by separating them with a comma. If not specified, the network will be "
@@ -278,7 +282,11 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork",
("D,armnn-tflite-delegate",
"Enable Arm NN TfLite delegate. "
"This option is depreciated please use tflite-executor instead",
- cxxopts::value<bool>(m_ExNetParams.m_EnableDelegate)->default_value("false")->implicit_value("true"));
+ cxxopts::value<bool>(m_ExNetParams.m_EnableDelegate)->default_value("false")->implicit_value("true"))
+
+ ("simultaneous-iterations",
+ "Number of simultaneous iterations to async-run the network for, default is set to 1",
+ cxxopts::value<size_t>(m_ExNetParams.m_SimultaneousIterations)->default_value("1"));
m_CxxOptions.add_options("c) Optimization")
("bf16-turbo-mode",
diff --git a/tests/InferenceModel.hpp b/tests/InferenceModel.hpp
index cab594ed48..88c704c10e 100644
--- a/tests/InferenceModel.hpp
+++ b/tests/InferenceModel.hpp
@@ -101,6 +101,7 @@ struct Params
std::string m_CachedNetworkFilePath;
unsigned int m_NumberOfThreads;
std::string m_MLGOTuningFilePath;
+ bool m_AsyncEnabled;
Params()
@@ -118,6 +119,7 @@ struct Params
, m_CachedNetworkFilePath("")
, m_NumberOfThreads(0)
, m_MLGOTuningFilePath("")
+ , m_AsyncEnabled(false)
{}
};
@@ -472,14 +474,14 @@ public:
optNet->SerializeToDot(file);
}
-
-
armnn::Status ret;
{
ARMNN_SCOPED_HEAP_PROFILING("LoadNetwork");
const auto loading_start_time = armnn::GetTimeNow();
- ret = m_Runtime->LoadNetwork(m_NetworkIdentifier, std::move(optNet));
+ armnn::INetworkProperties networkProperties(false, false, params.m_AsyncEnabled);
+ std::string errorMessage;
+ ret = m_Runtime->LoadNetwork(m_NetworkIdentifier, std::move(optNet), errorMessage, networkProperties);
ARMNN_LOG(info) << "Network loading time: " << std::setprecision(2)
<< std::fixed << armnn::GetTimeDuration(loading_start_time).count() << " ms\n";
@@ -553,7 +555,6 @@ public:
armnn::Status ret = m_Runtime->EnqueueWorkload(m_NetworkIdentifier,
MakeInputTensors(inputContainers),
MakeOutputTensors(outputContainers));
-
const auto duration = armnn::GetTimeDuration(start_time);
// if profiling is enabled print out the results
@@ -572,6 +573,63 @@ public:
}
}
+ std::tuple<armnn::profiling::ProfilingGuid, std::chrono::duration<double, std::milli>> RunAsync(
+ armnn::experimental::IWorkingMemHandle& workingMemHandleRef,
+ const std::vector<TContainer>& inputContainers,
+ std::vector<TContainer>& outputContainers)
+ {
+ for (unsigned int i = 0; i < outputContainers.size(); ++i)
+ {
+ const unsigned int expectedOutputDataSize = GetOutputSize(i);
+
+ mapbox::util::apply_visitor([expectedOutputDataSize, i](auto&& value)
+ {
+ const unsigned int actualOutputDataSize = armnn::numeric_cast<unsigned int>(value.size());
+ if (actualOutputDataSize < expectedOutputDataSize)
+ {
+ unsigned int outputIndex = i;
+ throw armnn::Exception(
+ fmt::format("Not enough data for output #{0}: expected "
+ "{1} elements, got {2}", outputIndex, expectedOutputDataSize, actualOutputDataSize));
+ }
+ },
+ outputContainers[i]);
+ }
+
+ std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
+ if (profiler)
+ {
+ profiler->EnableProfiling(m_EnableProfiling);
+ }
+
+ // Start timer to record inference time in EnqueueWorkload (in milliseconds)
+ const auto start_time = armnn::GetTimeNow();
+
+ armnn::Status ret = m_Runtime->Execute(workingMemHandleRef,
+ MakeInputTensors(inputContainers),
+ MakeOutputTensors(outputContainers));
+ auto inferenceID = workingMemHandleRef.GetInferenceId();
+
+ const auto duration = armnn::GetTimeDuration(start_time);
+
+ // if profiling is enabled print out the results
+ if (profiler && profiler->IsProfilingEnabled())
+ {
+ profiler->Print(std::cout);
+ }
+
+ if (ret == armnn::Status::Failure)
+ {
+ throw armnn::Exception(
+ fmt::format("IRuntime::Execute asynchronously failed for network #{0} on inference #{1}",
+ m_NetworkIdentifier, inferenceID));
+ }
+ else
+ {
+ return std::make_tuple(inferenceID, duration);
+ }
+ }
+
const armnn::BindingPointInfo& GetInputBindingInfo(unsigned int inputIndex = 0u) const
{
CheckInputIndexIsValid(inputIndex);
@@ -618,6 +676,11 @@ public:
return quantizationParams;
}
+ std::unique_ptr<armnn::experimental::IWorkingMemHandle> CreateWorkingMemHandle()
+ {
+ return m_Runtime->CreateWorkingMemHandle(m_NetworkIdentifier);
+ }
+
private:
armnn::NetworkId m_NetworkIdentifier;
std::shared_ptr<armnn::IRuntime> m_Runtime;