aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDiqing Zhong <diqing.zhong@arm.com>2021-01-11 12:52:48 +0100
committerDiqing Zhong <diqing.zhong@arm.com>2021-01-19 11:29:05 +0100
commitdb5124c2b5e10b34c61b3e016bb597ba1c1574df (patch)
tree82bbd348c6a0ff9d3d4e6a44067c032922385d0b
parent9a0cff1cd1334f4d3e7dfb542ad0be4f0e71a9de (diff)
downloadethos-u-vela-db5124c2b5e10b34c61b3e016bb597ba1c1574df.tar.gz
MLBEDSW-3144: Add weights compression ratio
- Also removed the original bit_per_element Change-Id: I51bfbd28e14f316aae2d542bb610a3ed57b8b53b Signed-off-by: Diqing Zhong <diqing.zhong@arm.com>
-rw-r--r--ethosu/vela/nn_graph.py6
-rw-r--r--ethosu/vela/npu_serialisation.py10
-rw-r--r--ethosu/vela/stats_writer.py23
-rw-r--r--ethosu/vela/tensor_allocation.py14
4 files changed, 22 insertions, 31 deletions
diff --git a/ethosu/vela/nn_graph.py b/ethosu/vela/nn_graph.py
index d2c848a..71d4e61 100644
--- a/ethosu/vela/nn_graph.py
+++ b/ethosu/vela/nn_graph.py
@@ -512,9 +512,9 @@ class Graph:
self.subgraphs = []
self.metadata = []
self.memory_used = {}
- self.bits_per_element = {}
- self.total_size = {}
- self.total_elements = {}
+ self.weights_compression_ratio = 0
+ self.total_original_weights = 0
+ self.total_compressed_weights = 0
self.weight_cache = None # See CompressedWeightCache
def get_root_subgraph(self):
diff --git a/ethosu/vela/npu_serialisation.py b/ethosu/vela/npu_serialisation.py
index a11907b..fc6b96b 100644
--- a/ethosu/vela/npu_serialisation.py
+++ b/ethosu/vela/npu_serialisation.py
@@ -72,16 +72,6 @@ def serialise_npu_subgraph_into_tensors(nng, sg, arch, scratch_tens, scratch_fas
command_stream_size_bytes = len(payload_bytes)
- # Adjust the bits per element calculation to exclude metadata generated by Vela
- nng.total_size[flash_area] = nng.total_size.get(flash_area, 0) - flash_size - command_stream_size_bytes
- nng.total_elements[flash_area] = nng.total_elements.get(flash_area, 0) - flash_size - command_stream_size_bytes
- nng.total_size[scratch_area] = nng.total_size.get(scratch_area, 0) - scratch_size
- nng.total_elements[scratch_area] = nng.total_elements.get(scratch_area, 0) - scratch_size
-
- if scratch_area != scratch_fast_area:
- nng.total_size[scratch_fast_area] = nng.total_size.get(scratch_fast_area, 0)
- nng.total_elements[scratch_fast_area] = nng.total_elements.get(scratch_fast_area, 0)
-
if flash_tens == scratch_tens is None:
# First Npu subgraph, create scratch and flash tensors
sg.scratch_tensor = make_memory_tensor(
diff --git a/ethosu/vela/stats_writer.py b/ethosu/vela/stats_writer.py
index 70b3ffb..1fb6702 100644
--- a/ethosu/vela/stats_writer.py
+++ b/ethosu/vela/stats_writer.py
@@ -58,7 +58,7 @@ def write_summary_metrics_csv(nng, summary_filename, arch):
"passes_after_fusing",
]
labels += [area.identifier_name() + "_memory_used" for area in mem_areas]
- labels += ["on_chip_flash_bits_per_element", "off_chip_flash_bits_per_element"]
+ labels += ["weights_compression_ratio"]
for mem_area in mem_areas:
labels += [
@@ -107,11 +107,7 @@ def write_summary_metrics_csv(nng, summary_filename, arch):
data_items += [midpoint_fps, nng.batch_size, midpoint_inference_time, n_passes, n_cascaded_passes]
data_items += [nng.memory_used.get(mem_area, 0) / 1024.0 for mem_area in mem_areas]
-
- data_items += [
- nng.bits_per_element.get(MemArea.OnChipFlash, 0.0),
- nng.bits_per_element.get(MemArea.OffChipFlash, 0.0),
- ]
+ data_items += [nng.weights_compression_ratio]
for mem_area in mem_areas:
bws = nng.bandwidths[mem_area]
@@ -231,7 +227,7 @@ def print_performance_metrics_for_strat(
num_cascaded_passes,
n_operations=0,
cpu_operations=None,
- bits_per_element=None,
+ weights_compression_ratio=None,
show_cpu_operations=False,
f=sys.stdout,
):
@@ -268,11 +264,7 @@ def print_performance_metrics_for_strat(
aug_label = label + " used"
- extra = ""
- if (mem_area == MemArea.OnChipFlash or mem_area == MemArea.OffChipFlash) and bits_per_element is not None:
- extra = f" ({bits_per_element[mem_area]:.2f} bits per element)"
-
- print(f"Total {aug_label:25} {memory_used[mem_area] / 1024.0:12.2f} KiB{extra}", file=f)
+ print(f"Total {aug_label:25} {memory_used[mem_area] / 1024.0:12.2f} KiB", file=f)
print(file=f)
print(f"{num_passes:d} passes fused into {num_cascaded_passes:d}", file=f)
@@ -329,6 +321,11 @@ def print_performance_metrics_for_strat(
)
print(file=f)
+ if weights_compression_ratio != 0:
+ print(
+ f"Weights Compression Ratio {weights_compression_ratio:12.2f}", file=f,
+ )
+
print(
f"Neural network macs {int(macs):12d} MACs/batch", file=f,
)
@@ -368,7 +365,7 @@ def print_performance_metrics(nng, arch, show_cpu_operations=False, f=sys.stdout
n_cascaded_passes,
n_operations,
cpu_operations,
- nng.bits_per_element,
+ nng.weights_compression_ratio,
show_cpu_operations,
f,
)
diff --git a/ethosu/vela/tensor_allocation.py b/ethosu/vela/tensor_allocation.py
index 7f66579..1e5eb85 100644
--- a/ethosu/vela/tensor_allocation.py
+++ b/ethosu/vela/tensor_allocation.py
@@ -202,8 +202,11 @@ def allocate_tensors(
else:
sg.memory_used_per_type[mem_type] += total_sz
- nng.total_size[mem_area] = nng.total_size.get(mem_area, 0) + sum(tens.storage_size() for tens in lrs.ranges)
- nng.total_elements[mem_area] = nng.total_elements.get(mem_area, 0) + sum(tens.elements() for tens in lrs.ranges)
+ if mem_area == arch.fast_storage_mem_area:
+ for tens in lrs.ranges:
+ if tens.purpose == TensorPurpose.Weights:
+ nng.total_compressed_weights += tens.storage_size()
+ nng.total_original_weights += tens.elements() * tens.element_size()
print_allocation(lrs, mem_area, mem_type_set, sg, verbose_allocation)
@@ -214,9 +217,10 @@ def allocate_tensors(
if sg == nng.get_root_subgraph():
nng.memory_used = sg.memory_used
- for mem_area in nng.total_elements.keys():
+ if mem_area == arch.fast_storage_mem_area:
try:
- nng.bits_per_element[mem_area] = nng.total_size[mem_area] * 8 / nng.total_elements[mem_area]
+ nng.weights_compression_ratio = nng.total_compressed_weights / nng.total_original_weights
except ZeroDivisionError:
- nng.bits_per_element[mem_area] = 0.0
+ nng.weights_compression_ratio = 0.0
+
return True