From a04a9d7c11f28c7e932435535e80223782f369f2 Mon Sep 17 00:00:00 2001 From: Sadik Armagan Date: Tue, 27 Apr 2021 10:02:10 +0100 Subject: IVGCVSW-5775 'Add Async Support to ExecuteNetwork' * Enabled async mode with '-n, concurrent' and 'simultaneous-iterations' in ExecuteNetwork * Number of input files provided should be equal to number of input files provided multiply by number of simultaneous iterations divided by comma !armnn:5443 Signed-off-by: Sadik Armagan Change-Id: Ibeb318010430bf4ae61a02b18b1bf88f3657774c --- include/armnn/IWorkingMemHandle.hpp | 3 + src/armnn/WorkingMemHandle.cpp | 3 +- src/armnn/WorkingMemHandle.hpp | 6 + .../workloads/RefDetectionPostProcessWorkload.cpp | 8 +- tests/ExecuteNetwork/ExecuteNetwork.cpp | 253 ++++++++++++++------- tests/ExecuteNetwork/ExecuteNetworkParams.cpp | 23 +- tests/ExecuteNetwork/ExecuteNetworkParams.hpp | 2 + .../ExecuteNetworkProgramOptions.cpp | 10 +- tests/InferenceModel.hpp | 71 +++++- 9 files changed, 290 insertions(+), 89 deletions(-) diff --git a/include/armnn/IWorkingMemHandle.hpp b/include/armnn/IWorkingMemHandle.hpp index 171fa3d81c..6fb2f9fe5f 100644 --- a/include/armnn/IWorkingMemHandle.hpp +++ b/include/armnn/IWorkingMemHandle.hpp @@ -25,6 +25,9 @@ public: /// Returns the NetworkId of the Network that this IWorkingMemHandle works with. virtual NetworkId GetNetworkId() = 0; + /// Returns the InferenceId of the Inference that this IWorkingMemHandle works with. + virtual profiling::ProfilingGuid GetInferenceId() = 0; + /// Allocate the backing memory required for execution. If this is not called, then allocation will be /// deferred to execution time. The mutex must be locked. virtual void Allocate() = 0; diff --git a/src/armnn/WorkingMemHandle.cpp b/src/armnn/WorkingMemHandle.cpp index 0cbef82e83..b54c5baddd 100644 --- a/src/armnn/WorkingMemHandle.cpp +++ b/src/armnn/WorkingMemHandle.cpp @@ -26,7 +26,8 @@ WorkingMemHandle::WorkingMemHandle( m_MemoryManagers(memoryManagers), m_OwnedTensorHandles(std::move(ownedTensorHandles)), m_IsAllocated(false), - m_Mutex() + m_Mutex(), + m_InferenceId(profiling::ProfilingService::GetNextGuid()) { } diff --git a/src/armnn/WorkingMemHandle.hpp b/src/armnn/WorkingMemHandle.hpp index 92b0acaec3..5ccb2b2342 100644 --- a/src/armnn/WorkingMemHandle.hpp +++ b/src/armnn/WorkingMemHandle.hpp @@ -38,6 +38,11 @@ public: return m_NetworkId; } + profiling::ProfilingGuid GetInferenceId() override + { + return m_InferenceId; + } + /// Allocate the backing memory required for execution. If this is not called, then allocation will be /// deferred to execution time. The mutex must be locked. void Allocate() override; @@ -87,6 +92,7 @@ private: bool m_IsAllocated; std::mutex m_Mutex; + profiling::ProfilingGuid m_InferenceId; }; } // end experimental namespace diff --git a/src/backends/reference/workloads/RefDetectionPostProcessWorkload.cpp b/src/backends/reference/workloads/RefDetectionPostProcessWorkload.cpp index 25c326ad37..6784e21585 100644 --- a/src/backends/reference/workloads/RefDetectionPostProcessWorkload.cpp +++ b/src/backends/reference/workloads/RefDetectionPostProcessWorkload.cpp @@ -46,10 +46,10 @@ void RefDetectionPostProcessWorkload::Execute(std::vector inputs auto scores = MakeDecoder(scoresInfo, inputs[1]->Map()); auto anchors = MakeDecoder(anchorsInfo, m_Anchors->Map(false)); - float* detectionBoxes = GetOutputTensorData(0, m_Data); - float* detectionClasses = GetOutputTensorData(1, m_Data); - float* detectionScores = GetOutputTensorData(2, m_Data); - float* numDetections = GetOutputTensorData(3, m_Data); + float* detectionBoxes = reinterpret_cast(outputs[0]->Map()); + float* detectionClasses = reinterpret_cast(outputs[1]->Map()); + float* detectionScores = reinterpret_cast(outputs[2]->Map()); + float* numDetections = reinterpret_cast(outputs[3]->Map()); DetectionPostProcess(boxEncodingsInfo, scoresInfo, anchorsInfo, detectionBoxesInfo, detectionClassesInfo, diff --git a/tests/ExecuteNetwork/ExecuteNetwork.cpp b/tests/ExecuteNetwork/ExecuteNetwork.cpp index 60e4ec3401..2bbb51783c 100644 --- a/tests/ExecuteNetwork/ExecuteNetwork.cpp +++ b/tests/ExecuteNetwork/ExecuteNetwork.cpp @@ -279,7 +279,8 @@ int MainImpl(const ExecuteNetworkParams& params, using TContainer = mapbox::util::variant, std::vector, std::vector, std::vector>; - std::vector inputDataContainers; + std::vector> inputs; + std::vector> outputs; try { @@ -298,6 +299,7 @@ int MainImpl(const ExecuteNetworkParams& params, inferenceModelParams.m_CachedNetworkFilePath = params.m_CachedNetworkFilePath; inferenceModelParams.m_NumberOfThreads = params.m_NumberOfThreads; inferenceModelParams.m_MLGOTuningFilePath = params.m_MLGOTuningFilePath; + inferenceModelParams.m_AsyncEnabled = params.m_Concurrent; for(const std::string& inputName: params.m_InputNames) { @@ -324,106 +326,201 @@ int MainImpl(const ExecuteNetworkParams& params, runtime); const size_t numInputs = inferenceModelParams.m_InputBindings.size(); - for(unsigned int i = 0; i < numInputs; ++i) + + armnn::Optional qParams = params.m_QuantizeInput ? + armnn::MakeOptional( + model.GetInputQuantizationParams()) : + armnn::EmptyOptional(); + + for(unsigned int j = 0; j < params.m_SimultaneousIterations ; ++j) { - armnn::Optional qParams = params.m_QuantizeInput ? - armnn::MakeOptional( - model.GetInputQuantizationParams()) : - armnn::EmptyOptional(); - - armnn::Optional dataFile = params.m_GenerateTensorData ? - armnn::EmptyOptional() : - armnn::MakeOptional( - params.m_InputTensorDataFilePaths[i]); - - unsigned int numElements = model.GetInputSize(i); - if (params.m_InputTensorShapes.size() > i && params.m_InputTensorShapes[i]) + std::vector inputDataContainers; + for(unsigned int i = 0; i < numInputs; ++i) { - // If the user has provided a tensor shape for the current input, - // override numElements - numElements = params.m_InputTensorShapes[i]->GetNumElements(); - } + armnn::Optional dataFile = params.m_GenerateTensorData ? + armnn::EmptyOptional() : + armnn::MakeOptional( + params.m_InputTensorDataFilePaths[(j * numInputs) + i]); - TContainer tensorData; - PopulateTensorWithData(tensorData, - numElements, - params.m_InputTypes[i], - qParams, - dataFile); + unsigned int numElements = model.GetInputSize(i); + if (params.m_InputTensorShapes.size() > i && params.m_InputTensorShapes[i]) + { + // If the user has provided a tensor shape for the current input, + // override numElements + numElements = params.m_InputTensorShapes[i]->GetNumElements(); + } + + TContainer tensorData; + PopulateTensorWithData(tensorData, + numElements, + params.m_InputTypes[i], + qParams, + dataFile); - inputDataContainers.push_back(tensorData); + inputDataContainers.push_back(tensorData); + } + inputs.push_back(inputDataContainers); } const size_t numOutputs = inferenceModelParams.m_OutputBindings.size(); - std::vector outputDataContainers; - for (unsigned int i = 0; i < numOutputs; ++i) + for (unsigned int j = 0; j < params.m_SimultaneousIterations; ++j) { - if (params.m_OutputTypes[i].compare("float") == 0) - { - outputDataContainers.push_back(std::vector(model.GetOutputSize(i))); - } - else if (params.m_OutputTypes[i].compare("int") == 0) - { - outputDataContainers.push_back(std::vector(model.GetOutputSize(i))); - } - else if (params.m_OutputTypes[i].compare("qasymm8") == 0) - { - outputDataContainers.push_back(std::vector(model.GetOutputSize(i))); - } - else if (params.m_OutputTypes[i].compare("qsymms8") == 0) + std::vector outputDataContainers; + for (unsigned int i = 0; i < numOutputs; ++i) { - outputDataContainers.push_back(std::vector(model.GetOutputSize(i))); - } - else - { - ARMNN_LOG(fatal) << "Unsupported tensor data type \"" << params.m_OutputTypes[i] << "\". "; - return EXIT_FAILURE; + if (params.m_OutputTypes[i].compare("float") == 0) + { + outputDataContainers.push_back(std::vector(model.GetOutputSize(i))); + } else if (params.m_OutputTypes[i].compare("int") == 0) + { + outputDataContainers.push_back(std::vector(model.GetOutputSize(i))); + } else if (params.m_OutputTypes[i].compare("qasymm8") == 0) + { + outputDataContainers.push_back(std::vector(model.GetOutputSize(i))); + } else if (params.m_OutputTypes[i].compare("qsymms8") == 0) + { + outputDataContainers.push_back(std::vector(model.GetOutputSize(i))); + } else + { + ARMNN_LOG(fatal) << "Unsupported tensor data type \"" << params.m_OutputTypes[i] << "\". "; + return EXIT_FAILURE; + } } + outputs.push_back(outputDataContainers); } - for (size_t x = 0; x < params.m_Iterations; x++) + if (!params.m_Concurrent) { - // model.Run returns the inference time elapsed in EnqueueWorkload (in milliseconds) - auto inference_duration = model.Run(inputDataContainers, outputDataContainers); - - if (params.m_GenerateTensorData) + // Synchronous Execution + for (size_t x = 0; x < params.m_Iterations; x++) { - ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful"; - } + // model.Run returns the inference time elapsed in EnqueueWorkload (in milliseconds) + auto inference_duration = model.Run(inputs[0], outputs[0]); - // Print output tensors - const auto& infosOut = model.GetOutputBindingInfos(); - for (size_t i = 0; i < numOutputs; i++) - { - const armnn::TensorInfo& infoOut = infosOut[i].second; - auto outputTensorFile = params.m_OutputTensorFiles.empty() ? "" : params.m_OutputTensorFiles[i]; - - TensorPrinter printer(inferenceModelParams.m_OutputBindings[i], - infoOut, - outputTensorFile, - params.m_DequantizeOutput); - mapbox::util::apply_visitor(printer, outputDataContainers[i]); - } + if (params.m_GenerateTensorData) + { + ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful"; + } + + // Print output tensors + const auto& infosOut = model.GetOutputBindingInfos(); + for (size_t i = 0; i < numOutputs; i++) + { + const armnn::TensorInfo& infoOut = infosOut[i].second; + auto outputTensorFile = params.m_OutputTensorFiles.empty() ? "" : params.m_OutputTensorFiles[i]; + + TensorPrinter printer(inferenceModelParams.m_OutputBindings[i], + infoOut, + outputTensorFile, + params.m_DequantizeOutput); + mapbox::util::apply_visitor(printer, outputs[0][i]); + } - ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2) - << std::fixed << inference_duration.count() << " ms\n"; + ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2) + << std::fixed << inference_duration.count() << " ms\n"; - // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line - if (params.m_ThresholdTime != 0.0) + // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line + if (params.m_ThresholdTime != 0.0) + { + ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2) + << std::fixed << params.m_ThresholdTime << " ms"; + auto thresholdMinusInference = params.m_ThresholdTime - inference_duration.count(); + ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2) + << std::fixed << thresholdMinusInference << " ms" << "\n"; + + if (thresholdMinusInference < 0) + { + std::string errorMessage = "Elapsed inference time is greater than provided threshold time."; + ARMNN_LOG(fatal) << errorMessage; + } + } + } + } + else + { + try { - ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2) - << std::fixed << params.m_ThresholdTime << " ms"; - auto thresholdMinusInference = params.m_ThresholdTime - inference_duration.count(); - ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2) - << std::fixed << thresholdMinusInference << " ms" << "\n"; + ARMNN_LOG(info) << "Asynchronous Execution... \n"; + std::vector>>> inferenceResults; + inferenceResults.reserve(params.m_SimultaneousIterations); + + // Create WorkingMemHandles for each inference + std::vector> workingMemHandles; + workingMemHandles.reserve(params.m_SimultaneousIterations); + for (unsigned int i = 0; i < params.m_SimultaneousIterations; ++i) + { + workingMemHandles.push_back(model.CreateWorkingMemHandle()); + } + + // Run each inference in its own thread + for (unsigned int i = 0; i < params.m_SimultaneousIterations; ++i) + { + armnn::experimental::IWorkingMemHandle& workingMemHandleRef = *workingMemHandles[i].get(); + inferenceResults.push_back(std::async( + std::launch::async, [&model, &workingMemHandleRef, &inputs, &outputs, i]() { + return model.RunAsync(workingMemHandleRef, inputs[i], outputs[i]); + } + )); + } - if (thresholdMinusInference < 0) + // Check the results + for (unsigned int j = 0; j < inferenceResults.size(); ++j) { - std::string errorMessage = "Elapsed inference time is greater than provided threshold time."; - ARMNN_LOG(fatal) << errorMessage; + // Get the results + auto inferenceResult = inferenceResults[j].get(); + auto inference_duration = std::get<1>(inferenceResult); + auto inferenceID = std::get<0>(inferenceResult); + + if (params.m_GenerateTensorData) + { + ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful"; + } + + // Print output tensors + const auto& infosOut = model.GetOutputBindingInfos(); + for (size_t i = 0; i < numOutputs; i++) + { + const armnn::TensorInfo& infoOut = infosOut[i].second; + auto outputTensorFile = params.m_OutputTensorFiles.empty() + ? "" + : params.m_OutputTensorFiles[(j * numOutputs) + i]; + + TensorPrinter printer(inferenceModelParams.m_OutputBindings[i], + infoOut, + outputTensorFile, + params.m_DequantizeOutput); + mapbox::util::apply_visitor(printer, outputs[j][i]); + } + + ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2) + << std::fixed << inference_duration.count() << " ms\n"; + + // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line + if (params.m_ThresholdTime != 0.0) + { + ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2) + << std::fixed << params.m_ThresholdTime << " ms"; + auto thresholdMinusInference = params.m_ThresholdTime - inference_duration.count(); + ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2) + << std::fixed << thresholdMinusInference << " ms" << "\n"; + + if (thresholdMinusInference < 0) + { + ARMNN_LOG(fatal) << "Elapsed inference time is greater than provided threshold time. \n"; + } + } + ARMNN_LOG(info) << "Asynchronous Execution is finished for Inference ID: " << inferenceID << " \n"; + } } + catch (const armnn::Exception& e) + { + ARMNN_LOG(fatal) << "Armnn Error: " << e.what(); + return EXIT_FAILURE; + } + } } catch (const armnn::Exception& e) diff --git a/tests/ExecuteNetwork/ExecuteNetworkParams.cpp b/tests/ExecuteNetwork/ExecuteNetworkParams.cpp index 4e3b5e313d..8f1cb0b599 100644 --- a/tests/ExecuteNetwork/ExecuteNetworkParams.cpp +++ b/tests/ExecuteNetwork/ExecuteNetworkParams.cpp @@ -145,6 +145,12 @@ void ExecuteNetworkParams::ValidateParams() CheckModelFormat(m_ModelFormat); + // Check number of simultaneous iterations + if ((m_SimultaneousIterations < 1)) + { + ARMNN_LOG(fatal) << "simultaneous-iterations cannot be less than 1. "; + } + // Check input tensor shapes if ((m_InputTensorShapes.size() != 0) && (m_InputTensorShapes.size() != m_InputNames.size())) @@ -159,10 +165,19 @@ void ExecuteNetworkParams::ValidateParams() ARMNN_LOG(fatal) << "One or more input data file paths are not valid. "; } - if (m_InputTensorDataFilePaths.size() != m_InputNames.size()) + if (!m_Concurrent && m_InputTensorDataFilePaths.size() != m_InputNames.size()) { ARMNN_LOG(fatal) << "input-name and input-tensor-data must have the same amount of elements. "; } + + if (m_InputTensorDataFilePaths.size() < m_SimultaneousIterations * m_InputNames.size()) + { + ARMNN_LOG(fatal) << "There is not enough input data for " << m_SimultaneousIterations << " execution."; + } + if (m_InputTensorDataFilePaths.size() > m_SimultaneousIterations * m_InputNames.size()) + { + ARMNN_LOG(fatal) << "There is more input data for " << m_SimultaneousIterations << " execution."; + } } if ((m_OutputTensorFiles.size() != 0) && @@ -171,6 +186,12 @@ void ExecuteNetworkParams::ValidateParams() ARMNN_LOG(fatal) << "output-name and write-outputs-to-file must have the same amount of elements. "; } + if ((m_OutputTensorFiles.size() != 0) + && m_OutputTensorFiles.size() != m_SimultaneousIterations * m_OutputNames.size()) + { + ARMNN_LOG(fatal) << "There is not enough output data for " << m_SimultaneousIterations << " execution."; + } + if (m_InputTypes.size() == 0) { //Defaults the value of all inputs to "float" diff --git a/tests/ExecuteNetwork/ExecuteNetworkParams.hpp b/tests/ExecuteNetwork/ExecuteNetworkParams.hpp index a19eaa9346..c325df110f 100644 --- a/tests/ExecuteNetwork/ExecuteNetworkParams.hpp +++ b/tests/ExecuteNetwork/ExecuteNetworkParams.hpp @@ -23,6 +23,7 @@ struct ExecuteNetworkParams std::string m_CachedNetworkFilePath; std::vector m_ComputeDevices; + bool m_Concurrent; bool m_DequantizeOutput; std::string m_DynamicBackendsPath; bool m_EnableBf16TurboMode; @@ -49,6 +50,7 @@ struct ExecuteNetworkParams bool m_PrintIntermediate; bool m_QuantizeInput; bool m_SaveCachedNetwork; + size_t m_SimultaneousIterations; size_t m_SubgraphId; double m_ThresholdTime; int m_TuningLevel; diff --git a/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp b/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp index 286c970d72..042087e4f4 100644 --- a/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp +++ b/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp @@ -194,6 +194,10 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork", "If left empty (the default), dynamic backends will not be used.", cxxopts::value(m_RuntimeOptions.m_DynamicBackendsPath)) + ("n,concurrent", + "If this option is enabled inferences will be executed in parallel asynchronously.", + cxxopts::value(m_ExNetParams.m_Concurrent)->default_value("false")->implicit_value("true")) + ("d,input-tensor-data", "Path to files containing the input data as a flat array separated by whitespace. " "Several paths can be passed by separating them with a comma. If not specified, the network will be " @@ -278,7 +282,11 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork", ("D,armnn-tflite-delegate", "Enable Arm NN TfLite delegate. " "This option is depreciated please use tflite-executor instead", - cxxopts::value(m_ExNetParams.m_EnableDelegate)->default_value("false")->implicit_value("true")); + cxxopts::value(m_ExNetParams.m_EnableDelegate)->default_value("false")->implicit_value("true")) + + ("simultaneous-iterations", + "Number of simultaneous iterations to async-run the network for, default is set to 1", + cxxopts::value(m_ExNetParams.m_SimultaneousIterations)->default_value("1")); m_CxxOptions.add_options("c) Optimization") ("bf16-turbo-mode", diff --git a/tests/InferenceModel.hpp b/tests/InferenceModel.hpp index cab594ed48..88c704c10e 100644 --- a/tests/InferenceModel.hpp +++ b/tests/InferenceModel.hpp @@ -101,6 +101,7 @@ struct Params std::string m_CachedNetworkFilePath; unsigned int m_NumberOfThreads; std::string m_MLGOTuningFilePath; + bool m_AsyncEnabled; Params() @@ -118,6 +119,7 @@ struct Params , m_CachedNetworkFilePath("") , m_NumberOfThreads(0) , m_MLGOTuningFilePath("") + , m_AsyncEnabled(false) {} }; @@ -472,14 +474,14 @@ public: optNet->SerializeToDot(file); } - - armnn::Status ret; { ARMNN_SCOPED_HEAP_PROFILING("LoadNetwork"); const auto loading_start_time = armnn::GetTimeNow(); - ret = m_Runtime->LoadNetwork(m_NetworkIdentifier, std::move(optNet)); + armnn::INetworkProperties networkProperties(false, false, params.m_AsyncEnabled); + std::string errorMessage; + ret = m_Runtime->LoadNetwork(m_NetworkIdentifier, std::move(optNet), errorMessage, networkProperties); ARMNN_LOG(info) << "Network loading time: " << std::setprecision(2) << std::fixed << armnn::GetTimeDuration(loading_start_time).count() << " ms\n"; @@ -553,7 +555,6 @@ public: armnn::Status ret = m_Runtime->EnqueueWorkload(m_NetworkIdentifier, MakeInputTensors(inputContainers), MakeOutputTensors(outputContainers)); - const auto duration = armnn::GetTimeDuration(start_time); // if profiling is enabled print out the results @@ -572,6 +573,63 @@ public: } } + std::tuple> RunAsync( + armnn::experimental::IWorkingMemHandle& workingMemHandleRef, + const std::vector& inputContainers, + std::vector& outputContainers) + { + for (unsigned int i = 0; i < outputContainers.size(); ++i) + { + const unsigned int expectedOutputDataSize = GetOutputSize(i); + + mapbox::util::apply_visitor([expectedOutputDataSize, i](auto&& value) + { + const unsigned int actualOutputDataSize = armnn::numeric_cast(value.size()); + if (actualOutputDataSize < expectedOutputDataSize) + { + unsigned int outputIndex = i; + throw armnn::Exception( + fmt::format("Not enough data for output #{0}: expected " + "{1} elements, got {2}", outputIndex, expectedOutputDataSize, actualOutputDataSize)); + } + }, + outputContainers[i]); + } + + std::shared_ptr profiler = m_Runtime->GetProfiler(m_NetworkIdentifier); + if (profiler) + { + profiler->EnableProfiling(m_EnableProfiling); + } + + // Start timer to record inference time in EnqueueWorkload (in milliseconds) + const auto start_time = armnn::GetTimeNow(); + + armnn::Status ret = m_Runtime->Execute(workingMemHandleRef, + MakeInputTensors(inputContainers), + MakeOutputTensors(outputContainers)); + auto inferenceID = workingMemHandleRef.GetInferenceId(); + + const auto duration = armnn::GetTimeDuration(start_time); + + // if profiling is enabled print out the results + if (profiler && profiler->IsProfilingEnabled()) + { + profiler->Print(std::cout); + } + + if (ret == armnn::Status::Failure) + { + throw armnn::Exception( + fmt::format("IRuntime::Execute asynchronously failed for network #{0} on inference #{1}", + m_NetworkIdentifier, inferenceID)); + } + else + { + return std::make_tuple(inferenceID, duration); + } + } + const armnn::BindingPointInfo& GetInputBindingInfo(unsigned int inputIndex = 0u) const { CheckInputIndexIsValid(inputIndex); @@ -618,6 +676,11 @@ public: return quantizationParams; } + std::unique_ptr CreateWorkingMemHandle() + { + return m_Runtime->CreateWorkingMemHandle(m_NetworkIdentifier); + } + private: armnn::NetworkId m_NetworkIdentifier; std::shared_ptr m_Runtime; -- cgit v1.2.1