1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
|
# Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
# SPDX-License-Identifier: MIT
import os
from typing import List, Tuple
import pyarmnn as ann
import numpy as np
class ArmnnNetworkExecutor:
def __init__(self, model_file: str, backends: list):
"""
Creates an inference executor for a given network and a list of backends.
Args:
model_file: User-specified model file.
backends: List of backends to optimize network.
"""
self.model_file = model_file
self.backends = backends
self.network_id, self.runtime, self.input_binding_info, self.output_binding_info = self.create_network()
self.output_tensors = ann.make_output_tensors(self.output_binding_info)
def run(self, input_data_list: list) -> List[np.ndarray]:
"""
Creates input tensors from input data and executes inference with the loaded network.
Args:
input_data_list: List of input frames.
Returns:
list: Inference results as a list of ndarrays.
"""
input_tensors = ann.make_input_tensors(self.input_binding_info, input_data_list)
self.runtime.EnqueueWorkload(self.network_id, input_tensors, self.output_tensors)
output = ann.workload_tensors_to_ndarray(self.output_tensors)
return output
def create_network(self):
"""
Creates a network based on the model file and a list of backends.
Returns:
net_id: Unique ID of the network to run.
runtime: Runtime context for executing inference.
input_binding_info: Contains essential information about the model input.
output_binding_info: Used to map output tensor and its memory.
"""
if not os.path.exists(self.model_file):
raise FileNotFoundError(f'Model file not found for: {self.model_file}')
_, ext = os.path.splitext(self.model_file)
if ext == '.tflite':
parser = ann.ITfLiteParser()
else:
raise ValueError("Supplied model file type is not supported. Supported types are [ tflite ]")
network = parser.CreateNetworkFromBinaryFile(self.model_file)
# Specify backends to optimize network
preferred_backends = []
for b in self.backends:
preferred_backends.append(ann.BackendId(b))
# Select appropriate device context and optimize the network for that device
options = ann.CreationOptions()
runtime = ann.IRuntime(options)
opt_network, messages = ann.Optimize(network, preferred_backends, runtime.GetDeviceSpec(),
ann.OptimizerOptions())
print(f'Preferred backends: {self.backends}\n{runtime.GetDeviceSpec()}\n'
f'Optimization warnings: {messages}')
# Load the optimized network onto the Runtime device
net_id, _ = runtime.LoadNetwork(opt_network)
# Get input and output binding information
graph_id = parser.GetSubgraphCount() - 1
input_names = parser.GetSubgraphInputTensorNames(graph_id)
input_binding_info = []
for input_name in input_names:
in_bind_info = parser.GetNetworkInputBindingInfo(graph_id, input_name)
input_binding_info.append(in_bind_info)
output_names = parser.GetSubgraphOutputTensorNames(graph_id)
output_binding_info = []
for output_name in output_names:
out_bind_info = parser.GetNetworkOutputBindingInfo(graph_id, output_name)
output_binding_info.append(out_bind_info)
return net_id, runtime, input_binding_info, output_binding_info
def get_data_type(self):
"""
Get the input data type of the initiated network.
Returns:
numpy data type or None if doesn't exist in the if condition.
"""
if self.input_binding_info[0][1].GetDataType() == ann.DataType_Float32:
return np.float32
elif self.input_binding_info[0][1].GetDataType() == ann.DataType_QAsymmU8:
return np.uint8
elif self.input_binding_info[0][1].GetDataType() == ann.DataType_QAsymmS8:
return np.int8
else:
return None
def get_shape(self):
"""
Get the input shape of the initiated network.
Returns:
tuple: The Shape of the network input.
"""
return tuple(self.input_binding_info[0][1].GetShape())
def get_input_quantization_scale(self, idx):
"""
Get the input quantization scale of the initiated network.
Returns:
The quantization scale of the network input.
"""
return self.input_binding_info[idx][1].GetQuantizationScale()
def get_input_quantization_offset(self, idx):
"""
Get the input quantization offset of the initiated network.
Returns:
The quantization offset of the network input.
"""
return self.input_binding_info[idx][1].GetQuantizationOffset()
def is_output_quantized(self, idx):
"""
Get True/False if output tensor is quantized or not respectively.
Returns:
True if output is quantized and False otherwise.
"""
return self.output_binding_info[idx][1].IsQuantized()
def get_output_quantization_scale(self, idx):
"""
Get the output quantization offset of the initiated network.
Returns:
The quantization offset of the network output.
"""
return self.output_binding_info[idx][1].GetQuantizationScale()
def get_output_quantization_offset(self, idx):
"""
Get the output quantization offset of the initiated network.
Returns:
The quantization offset of the network output.
"""
return self.output_binding_info[idx][1].GetQuantizationOffset()
|