6 #define LOG_TAG "arm-armnn-sl" 11 #include <DefaultExecution.h> 12 #include <LegacyUtils.h> 13 #include <nnapi/IBurst.h> 14 #include <nnapi/IPreparedModel.h> 15 #include <nnapi/Result.h> 16 #include <nnapi/SharedMemory.h> 17 #include <nnapi/TypeUtils.h> 18 #include <nnapi/Types.h> 19 #include <nnapi/Validation.h> 29 static const Timing g_NoTiming = {};
35 unsigned long MicrosecondsDuration(android::nn::TimePoint endPoint, android::nn::TimePoint startPoint)
37 return static_cast<unsigned long>(std::chrono::duration_cast<std::chrono::microseconds>(
38 endPoint - startPoint).count());
41 bool ValidateRequestArgument(
const Request::Argument& requestArg,
const armnn::TensorInfo& tensorInfo)
43 if (requestArg.dimensions.size() != 0)
47 VLOG(DRIVER) <<
"Mismatched dimensions (request argument: " 48 << requestArg.dimensions.size() <<
" expected: " << tensorInfo.
GetNumDimensions();
54 if (requestArg.dimensions[d] != 0 && requestArg.dimensions[d] != tensorInfo.
GetShape()[d])
56 VLOG(DRIVER) <<
"Mismatched dimensions " << d
57 <<
" (request argument: " << requestArg.dimensions[d]
58 <<
" expected: " << tensorInfo.
GetShape()[d];
67 armnn::Tensor GetTensorForRequestArgument(
const Request::Argument& requestArg,
69 const std::vector<::android::nn::RunTimePoolInfo>& requestPools)
71 if (!ValidateRequestArgument(requestArg, tensorInfo))
76 if (requestArg.lifetime == Request::Argument::LifeTime::POINTER)
80 else if (requestArg.lifetime == Request::Argument::LifeTime::POOL)
87 inline std::string BuildTensorName(
const char* tensorNamePrefix, std::size_t index)
89 return tensorNamePrefix + std::to_string(index);
92 bool IsPointerTypeMemory(
const Request& request)
94 for (
auto& input : request.inputs)
96 if (input.lifetime != Request::Argument::LifeTime::POINTER)
102 for (
auto& output: request.outputs)
104 if (output.lifetime != Request::Argument::LifeTime::POINTER)
120 void ArmnnPreparedModel::Init()
123 m_Runtime->GetProfiler(m_NetworkId)->EnableProfiling(m_GpuProfilingEnabled);
129 const std::string& requestInputsAndOutputsDumpDir,
130 const bool gpuProfilingEnabled,
132 : m_NetworkId(networkId)
135 , m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir)
136 , m_GpuProfilingEnabled(gpuProfilingEnabled)
137 , m_ModelPriority(priority)
138 , m_PrepareFromCache(false)
145 const std::string& requestInputsAndOutputsDumpDir,
146 const bool gpuProfilingEnabled,
148 const bool prepareModelFromCache)
149 : m_NetworkId(networkId)
151 , m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir)
152 , m_GpuProfilingEnabled(gpuProfilingEnabled)
153 , m_ModelPriority(priority)
154 , m_PrepareFromCache(prepareModelFromCache)
160 ErrorStatus ArmnnPreparedModel::PrepareMemoryForInputs(
162 const Request& request,
163 const std::vector<android::nn::RunTimePoolInfo>& memPools)
const 165 inputs.reserve(request.inputs.size());
166 for (
unsigned int i = 0; i < request.inputs.size(); i++)
168 const auto& inputArg = request.inputs[i];
174 const armnn::Tensor inputTensor = GetTensorForRequestArgument(inputArg, inputTensorInfo, memPools);
178 VLOG(DRIVER) <<
"Cannot execute request. Error converting request input " << i <<
"to tensor.";
179 return ErrorStatus::GENERAL_FAILURE;
181 inputs.emplace_back(i, inputTensor);
184 return ErrorStatus::NONE;
187 ErrorStatus ArmnnPreparedModel::PrepareMemoryForOutputs(
189 std::vector<OutputShape> &outputShapes,
190 const Request& request,
191 const std::vector<android::nn::RunTimePoolInfo>& memPools)
const 193 outputs.reserve(request.outputs.size());
194 for (
unsigned int i = 0; i < request.outputs.size(); i++)
196 auto& outputArg = request.outputs[i];
199 armnn::Tensor outputTensor = GetTensorForRequestArgument(outputArg, outputTensorInfo, memPools);
202 VLOG(DRIVER) <<
"Cannot execute request. Error converting request output " << i <<
"to tensor.";
203 return ErrorStatus::GENERAL_FAILURE;
206 const size_t outputSize = outputTensorInfo.
GetNumBytes();
208 unsigned int count = 0;
209 std::for_each(outputArg.dimensions.begin(), outputArg.dimensions.end(), [&](
auto dim)
213 outputTensorInfo.
GetShape()[count] = dim;
217 outputTensorInfo.
GetShape()[count] = outputArg.dimensions.size();
223 outputs.emplace_back(i, outputTensor);
226 if (outputArg.location.length < outputSize)
228 VLOG(DRIVER) <<
"ArmnnPreparedModel::Execute failed outputArg.location.length " 229 << std::to_string(outputArg.location.length).c_str()
230 <<
" < outputSize " << std::to_string(outputSize).c_str();
231 outputShapes[i].isSufficient =
false;
232 return ErrorStatus::OUTPUT_INSUFFICIENT_SIZE;
236 if (outputArg.lifetime == Request::Argument::LifeTime::POOL)
238 size_t bufferSize = memPools.at(outputArg.location.poolIndex).getSize();
239 if (bufferSize < outputSize)
241 VLOG(DRIVER) <<
"ArmnnPreparedModel::Execute failed bufferSize " 242 << std::to_string(outputArg.location.length).c_str()
243 <<
" < outputSize " << std::to_string(outputSize).c_str();
244 outputShapes[i].isSufficient =
false;
245 return ErrorStatus::OUTPUT_INSUFFICIENT_SIZE;
249 return ErrorStatus::NONE;
254 std::vector<android::nn::RunTimePoolInfo>& memPools,
255 const Request& request,
256 const bool pointerMemory)
const 262 if (!pointerMemory && !setRunTimePoolInfosFromMemoryPools(&memPools, request.pools))
264 return ErrorStatus::INVALID_ARGUMENT;
267 if (PrepareMemoryForInputs(inputs, request, memPools) != ErrorStatus::NONE)
269 VLOG(DRIVER) <<
"Failed when preparing memory for Inputs";
270 return ErrorStatus::GENERAL_FAILURE;
273 std::vector<OutputShape> outputShapes(request.outputs.size());
275 auto errorStatus = PrepareMemoryForOutputs(outputs, outputShapes, request, memPools);
276 if (errorStatus != ErrorStatus::NONE)
283 VLOG(DRIVER) <<
"armnn::Exception caught while preparing for EnqueueWorkload: " << e.
what();
284 return ErrorStatus::GENERAL_FAILURE;
286 catch (std::exception& e)
288 VLOG(DRIVER) <<
"std::exception caught while preparing for EnqueueWorkload: " << e.what();
289 return ErrorStatus::GENERAL_FAILURE;
292 return ErrorStatus::NONE;
296 const Request& request,
297 MeasureTiming measureTiming,
298 const OptionalTimePoint& deadline,
299 const OptionalDuration&,
300 const std::vector<android::nn::TokenValuePair>& hints,
301 const std::vector<android::nn::ExtensionNameAndPrefix>& extensionNameToPrefix)
const 303 VLOG(DRIVER) <<
"CanonicalDriver::PreparedModel::execute()";
306 if (measureTiming == MeasureTiming::YES)
312 if (!m_PrepareFromCache)
314 const auto modelRequest = validateRequestForModel(request, m_Model);
315 if (!modelRequest.ok())
317 return NN_ERROR(ErrorStatus::INVALID_ARGUMENT) << modelRequest.error();
319 VLOG(DRIVER) <<
"ArmnnPreparedModel::execute(): " <<
GetModelSummary(m_Model).c_str();
321 if (hasDeadlinePassed(deadline))
323 return NN_ERROR(ErrorStatus::MISSED_DEADLINE_PERSISTENT);
328 auto memPools = std::make_shared<std::vector<android::nn::RunTimePoolInfo>>();
331 auto inputTensors = std::make_shared<armnn::InputTensors>();
332 auto outputTensors = std::make_shared<armnn::OutputTensors>();
334 auto isPointerTypeMemory = IsPointerTypeMemory(request);
335 ErrorStatus theErrorStatus = PrepareMemoryForIO(*inputTensors,
339 isPointerTypeMemory);
341 switch(theErrorStatus)
343 case ErrorStatus::OUTPUT_INSUFFICIENT_SIZE:
344 return NN_ERROR(ErrorStatus::OUTPUT_INSUFFICIENT_SIZE);
345 case ErrorStatus::GENERAL_FAILURE:
346 return NN_ERROR(ErrorStatus::GENERAL_FAILURE);
347 case ErrorStatus::INVALID_ARGUMENT:
348 return NN_ERROR(ErrorStatus::INVALID_ARGUMENT);
353 std::vector<OutputShape> outputShapes(outputTensors->size());
354 for (
unsigned int i = 0; i < outputTensors->size(); i++)
356 std::pair<int, armnn::Tensor> outputTensorPair = (*outputTensors)[i];
364 VLOG(DRIVER) <<
"ArmnnPreparedModel::execute(...) before ExecuteGraph";
365 auto errorStatus =
ExecuteGraph(memPools, *inputTensors, *outputTensors, ctx, isPointerTypeMemory);
366 if (errorStatus != ErrorStatus::NONE)
368 return NN_ERROR(errorStatus) <<
"execute() failed";
370 VLOG(DRIVER) <<
"ArmnnPreparedModel::execute(...) after ExecuteGraph";
372 return std::make_pair(outputShapes, theTiming);
376 std::shared_ptr<std::vector<android::nn::RunTimePoolInfo>>& pMemPools,
380 const bool pointerMemory)
const 382 VLOG(DRIVER) <<
"ArmnnPreparedModel::ExecuteGraph(...)";
384 DumpTensorsIfRequired(
"Input", inputTensors);
385 std::vector<armnn::ImportedInputId> importedInputIds;
386 std::vector<armnn::ImportedOutputId> importedOutputIds;
394 VLOG(DRIVER) <<
"ArmnnPreparedModel::ExecuteGraph m_AsyncModelExecutionEnabled false";
396 if (!importedInputIds.empty())
404 inputTensors.begin(), inputTensors.end(),
405 [&importedId](std::pair<armnn::LayerBindingId, class armnn::ConstTensor>& element) {
406 return (element.first == static_cast<int>(importedId));
412 if (!importedOutputIds.empty())
420 outputTensors.begin(), outputTensors.end(),
421 [&importedId](std::pair<armnn::LayerBindingId, class armnn::Tensor>& element) {
422 return (element.first == static_cast<int>(importedId));
424 outputTensors.end());
439 VLOG(DRIVER) <<
"ArmnnPreparedModel:ExecuteGraph EnqueueWorkload failed";
440 return ErrorStatus::GENERAL_FAILURE;
445 VLOG(DRIVER) <<
"armnn:Exception caught from EnqueueWorkload: " << e.
what();
446 return ErrorStatus::GENERAL_FAILURE;
448 catch (std::exception& e)
450 VLOG(DRIVER) <<
"std::exception caught from EnqueueWorkload: " << e.what();
451 return ErrorStatus::GENERAL_FAILURE;
454 if (!pointerMemory && (!importedInputIds.empty() || !importedOutputIds.empty()))
458 DumpTensorsIfRequired(
"Output", outputTensors);
466 VLOG(DRIVER) <<
"ArmnnPreparedModel::execute timing - Device = " 467 << timing.timeOnDevice <<
"Driver = " << timing.timeInDriver;
469 return ErrorStatus::NONE;
474 return m_ModelPriority;
479 const Request& request,
480 const std::vector<SyncFence>& waitFor,
481 MeasureTiming measureTiming,
482 const OptionalTimePoint& deadline,
483 const OptionalDuration&,
484 const OptionalDuration&,
485 const std::vector<android::nn::TokenValuePair>& hints,
486 const std::vector<android::nn::ExtensionNameAndPrefix>& extensionNameToPrefix)
const 488 VLOG(DRIVER) <<
"ArmnnPreparedModel::executeFenced()";
490 if (!m_PrepareFromCache) {
491 const auto modelRequest = validateRequestForModel(request, m_Model);
492 if (!modelRequest.ok())
494 return NN_ERROR(ErrorStatus::INVALID_ARGUMENT) << modelRequest.error();
496 VLOG(DRIVER) <<
"ArmnnPreparedModel::executeFenced(): " <<
GetModelSummary(m_Model).c_str();
498 if (hasDeadlinePassed(deadline))
500 return NN_ERROR(ErrorStatus::MISSED_DEADLINE_PERSISTENT);
504 if (measureTiming == MeasureTiming::YES)
511 for (
const auto& syncFence : waitFor)
513 if (!syncFence.getSharedHandle())
515 return NN_ERROR(ErrorStatus::INVALID_ARGUMENT);
517 if (syncFence.syncWait({}) != SyncFence::FenceState::SIGNALED)
519 return NN_ERROR(ErrorStatus::GENERAL_FAILURE) <<
"syncWait failed";
523 android::nn::TimePoint fenceExecutionStart;
524 if (measureTiming == MeasureTiming::YES)
526 fenceExecutionStart = Clock::now();
531 auto memPools = std::make_shared<std::vector<android::nn::RunTimePoolInfo>>();
534 auto inputTensors = std::make_shared<armnn::InputTensors>();
535 auto outputTensors = std::make_shared<armnn::OutputTensors>();
537 auto isPointerTypeMemory = IsPointerTypeMemory(request);
538 ErrorStatus theErrorStatus = PrepareMemoryForIO(*inputTensors,
542 isPointerTypeMemory);
544 if (theErrorStatus != ErrorStatus::NONE)
546 return NN_ERROR(ErrorStatus::INVALID_ARGUMENT) <<
"executeFenced() failed";
549 Timing timingSinceLaunch = {};
550 Timing timingAfterFence = {};
551 if (measureTiming == MeasureTiming::YES)
554 timingAfterFence.timeInDriver = ctx.
driverEnd - fenceExecutionStart;
555 VLOG(DRIVER) <<
"executeFenced timingSinceLaunch = " << timingAfterFence.timeOnDevice;
556 VLOG(DRIVER) <<
"executeFenced timingAfterFence = " << timingAfterFence.timeInDriver;
559 VLOG(DRIVER) <<
"ArmnnCanonicalPreparedModel::executeFenced(...) before ExecuteGraph";
560 auto errorStatus =
ExecuteGraph(memPools, *inputTensors, *outputTensors, ctx, isPointerTypeMemory);
561 VLOG(DRIVER) <<
"ArmnnCanonicalPreparedModel::executeFenced(...) after ExecuteGraph";
563 ExecuteFencedInfoCallback armnnFencedExecutionCallback =
564 [timingSinceLaunch, timingAfterFence, errorStatus]() {
566 GeneralResult<std::pair<Timing, Timing>> result;
570 case ErrorStatus::OUTPUT_INSUFFICIENT_SIZE:
571 result.error().code = (ErrorStatus::OUTPUT_INSUFFICIENT_SIZE);
572 case ErrorStatus::GENERAL_FAILURE:
573 result.error().code = (ErrorStatus::GENERAL_FAILURE);
574 case ErrorStatus::INVALID_ARGUMENT:
575 result.error().code = (ErrorStatus::INVALID_ARGUMENT);
578 result.value() = std::make_pair(timingSinceLaunch, timingAfterFence);
583 return std::make_pair(SyncFence::createAsSignaled(), std::move(armnnFencedExecutionCallback ));
587 const Request& request,
588 MeasureTiming measureTiming,
589 const OptionalDuration& loopTimeoutDuration,
590 const std::vector<android::nn::TokenValuePair>& hints,
591 const std::vector<android::nn::ExtensionNameAndPrefix>& extensionNameToPrefix)
const 593 VLOG(DRIVER) <<
"ArmnnPreparedModel::createReusableExecution()";
594 return std::make_shared<DefaultExecution>(shared_from_this(),
597 loopTimeoutDuration);
611 template<
typename TensorBindingCollection>
612 void ArmnnPreparedModel::DumpTensorsIfRequired(
char const* tensorNamePrefix,
613 const TensorBindingCollection& tensorBindings)
const 615 if (!m_RequestInputsAndOutputsDumpDir.empty())
617 const std::string requestName = std::to_string(m_NetworkId) +
".dump";
618 for (std::size_t i = 0u; i < tensorBindings.size(); ++i)
622 BuildTensorName(tensorNamePrefix, i),
623 tensorBindings[i].second);
630 VLOG(DRIVER) <<
"ArmnnPreparedModel::~ArmnnPreparedModel()";
632 if (m_GpuProfilingEnabled)
634 auto profiler = m_Runtime->
GetProfiler(m_NetworkId);
639 m_RequestInputsAndOutputsDumpDir,
650 std::vector<std::vector<char>> storage;
652 for (
unsigned int i = 0; i < numInputs; i++)
658 storage.emplace_back(inputTensorInfo.
GetNumBytes());
661 inputTensors.emplace_back(i, inputTensor);
665 for (
unsigned int i = 0; i < numOutputs; i++)
668 storage.emplace_back(outputTensorInfo.
GetNumBytes());
669 const armnn::Tensor outputTensor(outputTensorInfo, storage.back().data());
671 outputTensors.emplace_back(i, outputTensor);
675 auto memPools = std::make_shared<std::vector<::android::nn::RunTimePoolInfo>>();
682 return errorStatus == ErrorStatus::NONE;
::android::nn::MeasureTiming measureTimings
const std::shared_ptr< IProfiler > GetProfiler(NetworkId networkId) const
Gets the profiler corresponding to the given network id.
const TensorShape & GetShape() const
GeneralResult< SharedBurst > configureExecutionBurst() const override
GeneralResult< std::pair< SyncFence, ExecuteFencedInfoCallback > > executeFenced(const Request &request, const std::vector< SyncFence > &waitFor, MeasureTiming measureTiming, const OptionalTimePoint &deadline, const OptionalDuration &loopTimeoutDuration, const OptionalDuration &timeoutDurationAfterFence, const std::vector< android::nn::TokenValuePair > &hints, const std::vector< android::nn::ExtensionNameAndPrefix > &extensionNameToPrefix) const override
::android::nn::ErrorStatus ErrorStatus
android::nn::TimePoint deviceStart
ErrorStatus ExecuteGraph(std::shared_ptr< std::vector< android::nn::RunTimePoolInfo >> &pMemPools, armnn::InputTensors &inputTensors, armnn::OutputTensors &outputTensors, CanonicalExecutionContext callback, const bool pointerMemory=false) const
execute the graph prepared from the request
void * GetMemoryFromPool(DataLocation location, const std::vector< android::nn::RunTimePoolInfo > &memPools)
Returns a pointer to a specific location in a pool`.
unsigned int GetNumBytes() const
OutputShape ComputeShape(const armnn::TensorInfo &info)
TensorInfo GetInputTensorInfo(NetworkId networkId, LayerBindingId layerId) const
virtual const char * what() const noexcept override
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
std::string GetModelSummary(const Model &model)
MemoryType GetMemoryArea() const
void DumpTensor(const std::string &dumpDir, const std::string &requestName, const std::string &tensorName, const TensorType &tensor)
bool ExecuteWithDummyInputs(unsigned int numInputs, unsigned int numOutputs) const
Executes this model with dummy inputs (e.g.
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
::android::nn::Model Model
Helper classes.
android::nn::TimePoint driverStart
Priority GetModelPriority() const
void * GetMemoryFromPointer(const Request::Argument &requestArg)
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Status UnloadNetwork(NetworkId networkId)
Unloads a network from the IRuntime.
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Status EnqueueWorkload(NetworkId networkId, const InputTensors &inputTensors, const OutputTensors &outputTensors, std::vector< ImportedInputId > preImportedInputIds={}, std::vector< ImportedOutputId > preImportedOutputIds={})
Evaluates a network using input in inputTensors and outputs filled into outputTensors.
virtual ~ArmnnPreparedModel()
const TensorInfo & GetInfo() const
unsigned int ImportedInputId
ArmnnPreparedModel(armnn::NetworkId networkId, armnn::IRuntime *runtime, const Model &model, const std::string &requestInputsAndOutputsDumpDir, const bool gpuProfilingEnabled, Priority priority=Priority::MEDIUM)
std::vector< ImportedInputId > ImportInputs(NetworkId networkId, const InputTensors &inputTensors, MemorySource forceImportMemorySource=MemorySource::Undefined)
ImportInputs separates the importing and mapping of InputTensors from network execution.
TensorInfo GetOutputTensorInfo(NetworkId networkId, LayerBindingId layerId) const
android::nn::TimePoint deviceEnd
ExecutionResult< std::pair< std::vector< OutputShape >, Timing > > execute(const Request &request, MeasureTiming measureTiming, const OptionalTimePoint &deadline, const OptionalDuration &loopTimeoutDuration, const std::vector< android::nn::TokenValuePair > &hints, const std::vector< android::nn::ExtensionNameAndPrefix > &extensionNameToPrefix) const override
Base class for all ArmNN exceptions so that users can filter to just those.
android::nn::TimePoint driverEnd
void SetConstant(const bool IsConstant=true)
Marks the data corresponding to this tensor info as constant.
std::any getUnderlyingResource() const override
std::vector< ImportedOutputId > ImportOutputs(NetworkId networkId, const OutputTensors &outputTensors, MemorySource forceImportMemorySource=MemorySource::Undefined)
ImportOutputs separates the importing and mapping of OutputTensors from network execution.
GeneralResult< android::nn::SharedExecution > createReusableExecution(const Request &request, MeasureTiming measureTiming, const OptionalDuration &loopTimeoutDuration, const std::vector< android::nn::TokenValuePair > &hints, const std::vector< android::nn::ExtensionNameAndPrefix > &extensionNameToPrefix) const override
void CommitPools(std::vector<::android::nn::RunTimePoolInfo > &memPools)
unsigned int GetNumDimensions() const
void DumpJsonProfilingIfRequired(bool gpuProfilingEnabled, const std::string &dumpDir, armnn::NetworkId networkId, const armnn::IProfiler *profiler)