6 #define LOG_TAG "arm-armnn-sl"
11 #include <DefaultExecution.h>
12 #include <LegacyUtils.h>
13 #include <nnapi/IBurst.h>
14 #include <nnapi/IPreparedModel.h>
15 #include <nnapi/Result.h>
16 #include <nnapi/SharedMemory.h>
17 #include <nnapi/TypeUtils.h>
18 #include <nnapi/Types.h>
19 #include <nnapi/Validation.h>
29 static const Timing g_NoTiming = {};
35 unsigned long MicrosecondsDuration(android::nn::TimePoint endPoint, android::nn::TimePoint startPoint)
37 return static_cast<unsigned long>(std::chrono::duration_cast<std::chrono::microseconds>(
38 endPoint - startPoint).count());
41 bool ValidateRequestArgument(
const Request::Argument& requestArg,
const armnn::TensorInfo& tensorInfo)
43 if (requestArg.dimensions.size() != 0)
47 VLOG(DRIVER) <<
"Mismatched dimensions (request argument: "
48 << requestArg.dimensions.size() <<
" expected: " << tensorInfo.
GetNumDimensions();
54 if (requestArg.dimensions[d] != 0 && requestArg.dimensions[d] != tensorInfo.
GetShape()[d])
56 VLOG(DRIVER) <<
"Mismatched dimensions " << d
57 <<
" (request argument: " << requestArg.dimensions[d]
58 <<
" expected: " << tensorInfo.
GetShape()[d];
67 armnn::Tensor GetTensorForRequestArgument(
const Request::Argument& requestArg,
69 const std::vector<::android::nn::RunTimePoolInfo>& requestPools)
71 if (!ValidateRequestArgument(requestArg, tensorInfo))
76 if (requestArg.lifetime == Request::Argument::LifeTime::POINTER)
80 else if (requestArg.lifetime == Request::Argument::LifeTime::POOL)
87 inline std::string BuildTensorName(
const char* tensorNamePrefix, std::size_t index)
89 return tensorNamePrefix + std::to_string(index);
92 bool IsPointerTypeMemory(
const Request& request)
94 for (
auto& input : request.inputs)
96 if (input.lifetime != Request::Argument::LifeTime::POINTER)
102 for (
auto& output: request.outputs)
104 if (output.lifetime != Request::Argument::LifeTime::POINTER)
120 void ArmnnPreparedModel::Init()
123 m_Runtime->GetProfiler(m_NetworkId)->EnableProfiling(m_GpuProfilingEnabled);
129 const std::string& requestInputsAndOutputsDumpDir,
130 const bool gpuProfilingEnabled,
132 : m_NetworkId(networkId)
135 , m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir)
136 , m_GpuProfilingEnabled(gpuProfilingEnabled)
137 , m_ModelPriority(priority)
138 , m_PrepareFromCache(false)
145 const std::string& requestInputsAndOutputsDumpDir,
146 const bool gpuProfilingEnabled,
148 const bool prepareModelFromCache)
149 : m_NetworkId(networkId)
151 , m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir)
152 , m_GpuProfilingEnabled(gpuProfilingEnabled)
153 , m_ModelPriority(priority)
154 , m_PrepareFromCache(prepareModelFromCache)
160 ErrorStatus ArmnnPreparedModel::PrepareMemoryForInputs(
162 const Request& request,
163 const std::vector<android::nn::RunTimePoolInfo>& memPools)
const
165 inputs.reserve(request.inputs.size());
166 for (
unsigned int i = 0; i < request.inputs.size(); i++)
168 const auto& inputArg = request.inputs[i];
174 const armnn::Tensor inputTensor = GetTensorForRequestArgument(inputArg, inputTensorInfo, memPools);
178 VLOG(DRIVER) <<
"Cannot execute request. Error converting request input " << i <<
"to tensor.";
179 return ErrorStatus::GENERAL_FAILURE;
181 inputs.emplace_back(i, inputTensor);
184 return ErrorStatus::NONE;
187 ErrorStatus ArmnnPreparedModel::PrepareMemoryForOutputs(
189 std::vector<OutputShape> &outputShapes,
190 const Request& request,
191 const std::vector<android::nn::RunTimePoolInfo>& memPools)
const
193 outputs.reserve(request.outputs.size());
194 for (
unsigned int i = 0; i < request.outputs.size(); i++)
196 auto& outputArg = request.outputs[i];
199 armnn::Tensor outputTensor = GetTensorForRequestArgument(outputArg, outputTensorInfo, memPools);
202 VLOG(DRIVER) <<
"Cannot execute request. Error converting request output " << i <<
"to tensor.";
203 return ErrorStatus::GENERAL_FAILURE;
206 const size_t outputSize = outputTensorInfo.
GetNumBytes();
208 unsigned int count = 0;
209 std::for_each(outputArg.dimensions.begin(), outputArg.dimensions.end(), [&](
auto dim)
213 outputTensorInfo.GetShape()[count] = dim;
217 outputTensorInfo.GetShape()[count] = outputArg.dimensions.size();
223 outputs.emplace_back(i, outputTensor);
226 if (outputArg.location.length < outputSize)
228 VLOG(DRIVER) <<
"ArmnnPreparedModel::Execute failed outputArg.location.length "
229 << std::to_string(outputArg.location.length).c_str()
230 <<
" < outputSize " << std::to_string(outputSize).c_str();
231 outputShapes[i].isSufficient =
false;
232 return ErrorStatus::OUTPUT_INSUFFICIENT_SIZE;
236 if (outputArg.lifetime == Request::Argument::LifeTime::POOL)
238 size_t bufferSize = memPools.at(outputArg.location.poolIndex).getSize();
239 if (bufferSize < outputSize)
241 VLOG(DRIVER) <<
"ArmnnPreparedModel::Execute failed bufferSize "
242 << std::to_string(outputArg.location.length).c_str()
243 <<
" < outputSize " << std::to_string(outputSize).c_str();
244 outputShapes[i].isSufficient =
false;
245 return ErrorStatus::OUTPUT_INSUFFICIENT_SIZE;
249 return ErrorStatus::NONE;
254 std::vector<android::nn::RunTimePoolInfo>& memPools,
255 const Request& request,
256 const bool pointerMemory)
const
262 if (!pointerMemory && !setRunTimePoolInfosFromMemoryPools(&memPools, request.pools))
264 return ErrorStatus::INVALID_ARGUMENT;
267 if (PrepareMemoryForInputs(inputs, request, memPools) != ErrorStatus::NONE)
269 VLOG(DRIVER) <<
"Failed when preparing memory for Inputs";
270 return ErrorStatus::GENERAL_FAILURE;
273 std::vector<OutputShape> outputShapes(request.outputs.size());
275 auto errorStatus = PrepareMemoryForOutputs(outputs, outputShapes, request, memPools);
276 if (errorStatus != ErrorStatus::NONE)
283 VLOG(DRIVER) <<
"armnn::Exception caught while preparing for EnqueueWorkload: " << e.
what();
284 return ErrorStatus::GENERAL_FAILURE;
286 catch (std::exception& e)
288 VLOG(DRIVER) <<
"std::exception caught while preparing for EnqueueWorkload: " << e.what();
289 return ErrorStatus::GENERAL_FAILURE;
292 return ErrorStatus::NONE;
296 const Request& request,
297 MeasureTiming measureTiming,
298 const OptionalTimePoint& deadline,
299 const OptionalDuration&,
300 const std::vector<android::nn::TokenValuePair>& hints,
301 const std::vector<android::nn::ExtensionNameAndPrefix>& extensionNameToPrefix)
const
303 VLOG(DRIVER) <<
"CanonicalDriver::PreparedModel::execute()";
306 if (measureTiming == MeasureTiming::YES)
312 if (!m_PrepareFromCache)
314 const auto modelRequest = validateRequestForModel(request, m_Model);
315 if (!modelRequest.ok())
317 return NN_ERROR(ErrorStatus::INVALID_ARGUMENT) << modelRequest.error();
319 VLOG(DRIVER) <<
"ArmnnPreparedModel::execute(): " <<
GetModelSummary(m_Model).c_str();
321 if (hasDeadlinePassed(deadline))
323 return NN_ERROR(ErrorStatus::MISSED_DEADLINE_PERSISTENT);
328 auto memPools = std::make_shared<std::vector<android::nn::RunTimePoolInfo>>();
331 auto inputTensors = std::make_shared<armnn::InputTensors>();
332 auto outputTensors = std::make_shared<armnn::OutputTensors>();
334 auto isPointerTypeMemory = IsPointerTypeMemory(request);
335 ErrorStatus theErrorStatus = PrepareMemoryForIO(*inputTensors,
339 isPointerTypeMemory);
341 switch(theErrorStatus)
343 case ErrorStatus::OUTPUT_INSUFFICIENT_SIZE:
344 return NN_ERROR(ErrorStatus::OUTPUT_INSUFFICIENT_SIZE);
345 case ErrorStatus::GENERAL_FAILURE:
346 return NN_ERROR(ErrorStatus::GENERAL_FAILURE);
347 case ErrorStatus::INVALID_ARGUMENT:
348 return NN_ERROR(ErrorStatus::INVALID_ARGUMENT);
353 std::vector<OutputShape> outputShapes(outputTensors->size());
354 for (
unsigned int i = 0; i < outputTensors->size(); i++)
356 std::pair<int, armnn::Tensor> outputTensorPair = (*outputTensors)[i];
364 VLOG(DRIVER) <<
"ArmnnPreparedModel::execute(...) before ExecuteGraph";
365 auto errorStatus =
ExecuteGraph(memPools, *inputTensors, *outputTensors, ctx, isPointerTypeMemory);
366 if (errorStatus != ErrorStatus::NONE)
368 return NN_ERROR(errorStatus) <<
"execute() failed";
370 VLOG(DRIVER) <<
"ArmnnPreparedModel::execute(...) after ExecuteGraph";
372 return std::make_pair(outputShapes, theTiming);
376 std::shared_ptr<std::vector<android::nn::RunTimePoolInfo>>& pMemPools,
380 const bool pointerMemory)
const
382 VLOG(DRIVER) <<
"ArmnnPreparedModel::ExecuteGraph(...)";
384 DumpTensorsIfRequired(
"Input", inputTensors);
385 std::vector<armnn::ImportedInputId> importedInputIds;
386 std::vector<armnn::ImportedOutputId> importedOutputIds;
394 VLOG(DRIVER) <<
"ArmnnPreparedModel::ExecuteGraph m_AsyncModelExecutionEnabled false";
396 if (!importedInputIds.empty())
404 inputTensors.begin(), inputTensors.end(),
405 [&importedId](std::pair<armnn::LayerBindingId, class armnn::ConstTensor>& element) {
406 return (element.first == static_cast<int>(importedId));
412 if (!importedOutputIds.empty())
420 outputTensors.begin(), outputTensors.end(),
421 [&importedId](std::pair<armnn::LayerBindingId, class armnn::Tensor>& element) {
422 return (element.first == static_cast<int>(importedId));
424 outputTensors.end());
439 VLOG(DRIVER) <<
"ArmnnPreparedModel:ExecuteGraph EnqueueWorkload failed";
440 return ErrorStatus::GENERAL_FAILURE;
445 VLOG(DRIVER) <<
"armnn:Exception caught from EnqueueWorkload: " << e.
what();
446 return ErrorStatus::GENERAL_FAILURE;
448 catch (std::exception& e)
450 VLOG(DRIVER) <<
"std::exception caught from EnqueueWorkload: " << e.what();
451 return ErrorStatus::GENERAL_FAILURE;
454 if (!pointerMemory && (!importedInputIds.empty() || !importedOutputIds.empty()))
458 DumpTensorsIfRequired(
"Output", outputTensors);
466 VLOG(DRIVER) <<
"ArmnnPreparedModel::execute timing - Device = "
467 << timing.timeOnDevice <<
"Driver = " << timing.timeInDriver;
469 return ErrorStatus::NONE;
474 return m_ModelPriority;
479 const Request& request,
480 const std::vector<SyncFence>& waitFor,
481 MeasureTiming measureTiming,
482 const OptionalTimePoint& deadline,
483 const OptionalDuration&,
484 const OptionalDuration&,
485 const std::vector<android::nn::TokenValuePair>& hints,
486 const std::vector<android::nn::ExtensionNameAndPrefix>& extensionNameToPrefix)
const
488 VLOG(DRIVER) <<
"ArmnnPreparedModel::executeFenced()";
490 if (!m_PrepareFromCache) {
491 const auto modelRequest = validateRequestForModel(request, m_Model);
492 if (!modelRequest.ok())
494 return NN_ERROR(ErrorStatus::INVALID_ARGUMENT) << modelRequest.error();
496 VLOG(DRIVER) <<
"ArmnnPreparedModel::executeFenced(): " <<
GetModelSummary(m_Model).c_str();
498 if (hasDeadlinePassed(deadline))
500 return NN_ERROR(ErrorStatus::MISSED_DEADLINE_PERSISTENT);
504 if (measureTiming == MeasureTiming::YES)
511 for (
const auto& syncFence : waitFor)
513 if (!syncFence.getSharedHandle())
515 return NN_ERROR(ErrorStatus::INVALID_ARGUMENT);
517 if (syncFence.syncWait({}) != SyncFence::FenceState::SIGNALED)
519 return NN_ERROR(ErrorStatus::GENERAL_FAILURE) <<
"syncWait failed";
523 android::nn::TimePoint fenceExecutionStart;
524 if (measureTiming == MeasureTiming::YES)
526 fenceExecutionStart = Clock::now();
531 auto memPools = std::make_shared<std::vector<android::nn::RunTimePoolInfo>>();
534 auto inputTensors = std::make_shared<armnn::InputTensors>();
535 auto outputTensors = std::make_shared<armnn::OutputTensors>();
537 auto isPointerTypeMemory = IsPointerTypeMemory(request);
538 ErrorStatus theErrorStatus = PrepareMemoryForIO(*inputTensors,
542 isPointerTypeMemory);
544 if (theErrorStatus != ErrorStatus::NONE)
546 return NN_ERROR(ErrorStatus::INVALID_ARGUMENT) <<
"executeFenced() failed";
549 Timing timingSinceLaunch = {};
550 Timing timingAfterFence = {};
551 if (measureTiming == MeasureTiming::YES)
554 timingAfterFence.timeInDriver = ctx.
driverEnd - fenceExecutionStart;
555 VLOG(DRIVER) <<
"executeFenced timingSinceLaunch = " << timingAfterFence.timeOnDevice;
556 VLOG(DRIVER) <<
"executeFenced timingAfterFence = " << timingAfterFence.timeInDriver;
559 VLOG(DRIVER) <<
"ArmnnCanonicalPreparedModel::executeFenced(...) before ExecuteGraph";
560 auto errorStatus =
ExecuteGraph(memPools, *inputTensors, *outputTensors, ctx, isPointerTypeMemory);
561 VLOG(DRIVER) <<
"ArmnnCanonicalPreparedModel::executeFenced(...) after ExecuteGraph";
563 ExecuteFencedInfoCallback armnnFencedExecutionCallback =
564 [timingSinceLaunch, timingAfterFence, errorStatus]() {
566 GeneralResult<std::pair<Timing, Timing>> result;
570 case ErrorStatus::OUTPUT_INSUFFICIENT_SIZE:
571 result.error().code = (ErrorStatus::OUTPUT_INSUFFICIENT_SIZE);
572 case ErrorStatus::GENERAL_FAILURE:
573 result.error().code = (ErrorStatus::GENERAL_FAILURE);
574 case ErrorStatus::INVALID_ARGUMENT:
575 result.error().code = (ErrorStatus::INVALID_ARGUMENT);
578 result.value() = std::make_pair(timingSinceLaunch, timingAfterFence);
583 return std::make_pair(SyncFence::createAsSignaled(), std::move(armnnFencedExecutionCallback ));
587 const Request& request,
588 MeasureTiming measureTiming,
589 const OptionalDuration& loopTimeoutDuration,
590 const std::vector<android::nn::TokenValuePair>& hints,
591 const std::vector<android::nn::ExtensionNameAndPrefix>& extensionNameToPrefix)
const
593 VLOG(DRIVER) <<
"ArmnnPreparedModel::createReusableExecution()";
594 return std::make_shared<DefaultExecution>(shared_from_this(),
597 loopTimeoutDuration);
611 template<
typename TensorBindingCollection>
612 void ArmnnPreparedModel::DumpTensorsIfRequired(
char const* tensorNamePrefix,
613 const TensorBindingCollection& tensorBindings)
const
615 if (!m_RequestInputsAndOutputsDumpDir.empty())
617 const std::string requestName = std::to_string(m_NetworkId) +
".dump";
618 for (std::size_t i = 0u; i < tensorBindings.size(); ++i)
622 BuildTensorName(tensorNamePrefix, i),
623 tensorBindings[i].second);
630 VLOG(DRIVER) <<
"ArmnnPreparedModel::~ArmnnPreparedModel()";
632 if (m_GpuProfilingEnabled)
634 auto profiler = m_Runtime->
GetProfiler(m_NetworkId);
639 m_RequestInputsAndOutputsDumpDir,
650 std::vector<std::vector<char>> storage;
652 for (
unsigned int i = 0; i < numInputs; i++)
658 storage.emplace_back(inputTensorInfo.
GetNumBytes());
661 inputTensors.emplace_back(i, inputTensor);
665 for (
unsigned int i = 0; i < numOutputs; i++)
668 storage.emplace_back(outputTensorInfo.
GetNumBytes());
669 const armnn::Tensor outputTensor(outputTensorInfo, storage.back().data());
671 outputTensors.emplace_back(i, outputTensor);
675 auto memPools = std::make_shared<std::vector<::android::nn::RunTimePoolInfo>>();
682 return errorStatus == ErrorStatus::NONE;