1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
|
//
// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#pragma once
#include "ArmnnDriver.hpp"
#include "ArmnnDriverImpl.hpp"
#include "ModelToINetworkTransformer.hpp"
#include <armnn/ArmNN.hpp>
#include <BufferTracker.h>
#include <CpuExecutor.h>
#include <nnapi/IExecution.h>
#include <nnapi/IPreparedModel.h>
#include <nnapi/Result.h>
#include <nnapi/Types.h>
#include <memory>
#include <tuple>
#include <utility>
#include <vector>
#include <string>
namespace armnn_driver
{
struct CanonicalExecutionContext
{
::android::nn::MeasureTiming measureTimings =
::android::nn::MeasureTiming::NO;
android::nn::TimePoint driverStart;
android::nn::TimePoint driverEnd;
android::nn::TimePoint deviceStart;
android::nn::TimePoint deviceEnd;
};
class ArmnnPreparedModel final : public IPreparedModel,
public std::enable_shared_from_this<ArmnnPreparedModel>
{
public:
ArmnnPreparedModel(armnn::NetworkId networkId,
armnn::IRuntime* runtime,
const Model& model,
const std::string& requestInputsAndOutputsDumpDir,
const bool gpuProfilingEnabled,
Priority priority = Priority::MEDIUM);
ArmnnPreparedModel(armnn::NetworkId networkId,
armnn::IRuntime* runtime,
const std::string& requestInputsAndOutputsDumpDir,
const bool gpuProfilingEnabled,
Priority priority = Priority::MEDIUM,
const bool prepareModelFromCache = false);
virtual ~ArmnnPreparedModel();
ExecutionResult<std::pair<std::vector<OutputShape>, Timing>> execute(
const Request& request,
MeasureTiming measureTiming,
const OptionalTimePoint& deadline,
const OptionalDuration& loopTimeoutDuration,
const std::vector<android::nn::TokenValuePair>& hints,
const std::vector<android::nn::ExtensionNameAndPrefix>& extensionNameToPrefix) const override;
GeneralResult<std::pair<SyncFence, ExecuteFencedInfoCallback>> executeFenced(
const Request& request,
const std::vector<SyncFence>& waitFor,
MeasureTiming measureTiming,
const OptionalTimePoint& deadline,
const OptionalDuration& loopTimeoutDuration,
const OptionalDuration& timeoutDurationAfterFence,
const std::vector<android::nn::TokenValuePair>& hints,
const std::vector<android::nn::ExtensionNameAndPrefix>& extensionNameToPrefix) const override;
GeneralResult<android::nn::SharedExecution> createReusableExecution(
const Request& request,
MeasureTiming measureTiming,
const OptionalDuration& loopTimeoutDuration,
const std::vector<android::nn::TokenValuePair>& hints,
const std::vector<android::nn::ExtensionNameAndPrefix>& extensionNameToPrefix) const override;
GeneralResult<SharedBurst> configureExecutionBurst() const override;
std::any getUnderlyingResource() const override;
/// execute the graph prepared from the request
ErrorStatus ExecuteGraph(
std::shared_ptr<std::vector<android::nn::RunTimePoolInfo>>& pMemPools,
armnn::InputTensors& inputTensors,
armnn::OutputTensors& outputTensors,
CanonicalExecutionContext callback) const;
Priority GetModelPriority() const;
/// Executes this model with dummy inputs (e.g. all zeroes).
/// \return false on failure, otherwise true
bool ExecuteWithDummyInputs(unsigned int numInputs, unsigned int numOutputs) const;
private:
void Init();
ErrorStatus PrepareMemoryForInputs(
armnn::InputTensors& inputs,
const Request& request,
const std::vector<android::nn::RunTimePoolInfo>& memPools) const;
ErrorStatus PrepareMemoryForOutputs(
armnn::OutputTensors& outputs,
std::vector<OutputShape> &outputShapes,
const Request& request,
const std::vector<android::nn::RunTimePoolInfo>& memPools) const;
ErrorStatus PrepareMemoryForIO(armnn::InputTensors& inputs,
armnn::OutputTensors& outputs,
std::vector<android::nn::RunTimePoolInfo>& memPools,
const Request& request) const;
template <typename TensorBindingCollection>
void DumpTensorsIfRequired(char const* tensorNamePrefix, const TensorBindingCollection& tensorBindings) const;
/// schedule the graph prepared from the request for execution
armnn::NetworkId m_NetworkId;
armnn::IRuntime* m_Runtime;
const Model m_Model;
const std::string& m_RequestInputsAndOutputsDumpDir;
const bool m_GpuProfilingEnabled;
Priority m_ModelPriority;
const bool m_PrepareFromCache;
};
}
|