From db5124c2b5e10b34c61b3e016bb597ba1c1574df Mon Sep 17 00:00:00 2001 From: Diqing Zhong Date: Mon, 11 Jan 2021 12:52:48 +0100 Subject: MLBEDSW-3144: Add weights compression ratio - Also removed the original bit_per_element Change-Id: I51bfbd28e14f316aae2d542bb610a3ed57b8b53b Signed-off-by: Diqing Zhong --- ethosu/vela/nn_graph.py | 6 +++--- ethosu/vela/npu_serialisation.py | 10 ---------- ethosu/vela/stats_writer.py | 23 ++++++++++------------- ethosu/vela/tensor_allocation.py | 14 +++++++++----- 4 files changed, 22 insertions(+), 31 deletions(-) diff --git a/ethosu/vela/nn_graph.py b/ethosu/vela/nn_graph.py index d2c848ad..71d4e614 100644 --- a/ethosu/vela/nn_graph.py +++ b/ethosu/vela/nn_graph.py @@ -512,9 +512,9 @@ class Graph: self.subgraphs = [] self.metadata = [] self.memory_used = {} - self.bits_per_element = {} - self.total_size = {} - self.total_elements = {} + self.weights_compression_ratio = 0 + self.total_original_weights = 0 + self.total_compressed_weights = 0 self.weight_cache = None # See CompressedWeightCache def get_root_subgraph(self): diff --git a/ethosu/vela/npu_serialisation.py b/ethosu/vela/npu_serialisation.py index a11907b2..fc6b96b7 100644 --- a/ethosu/vela/npu_serialisation.py +++ b/ethosu/vela/npu_serialisation.py @@ -72,16 +72,6 @@ def serialise_npu_subgraph_into_tensors(nng, sg, arch, scratch_tens, scratch_fas command_stream_size_bytes = len(payload_bytes) - # Adjust the bits per element calculation to exclude metadata generated by Vela - nng.total_size[flash_area] = nng.total_size.get(flash_area, 0) - flash_size - command_stream_size_bytes - nng.total_elements[flash_area] = nng.total_elements.get(flash_area, 0) - flash_size - command_stream_size_bytes - nng.total_size[scratch_area] = nng.total_size.get(scratch_area, 0) - scratch_size - nng.total_elements[scratch_area] = nng.total_elements.get(scratch_area, 0) - scratch_size - - if scratch_area != scratch_fast_area: - nng.total_size[scratch_fast_area] = nng.total_size.get(scratch_fast_area, 0) - nng.total_elements[scratch_fast_area] = nng.total_elements.get(scratch_fast_area, 0) - if flash_tens == scratch_tens is None: # First Npu subgraph, create scratch and flash tensors sg.scratch_tensor = make_memory_tensor( diff --git a/ethosu/vela/stats_writer.py b/ethosu/vela/stats_writer.py index 70b3ffb7..1fb6702b 100644 --- a/ethosu/vela/stats_writer.py +++ b/ethosu/vela/stats_writer.py @@ -58,7 +58,7 @@ def write_summary_metrics_csv(nng, summary_filename, arch): "passes_after_fusing", ] labels += [area.identifier_name() + "_memory_used" for area in mem_areas] - labels += ["on_chip_flash_bits_per_element", "off_chip_flash_bits_per_element"] + labels += ["weights_compression_ratio"] for mem_area in mem_areas: labels += [ @@ -107,11 +107,7 @@ def write_summary_metrics_csv(nng, summary_filename, arch): data_items += [midpoint_fps, nng.batch_size, midpoint_inference_time, n_passes, n_cascaded_passes] data_items += [nng.memory_used.get(mem_area, 0) / 1024.0 for mem_area in mem_areas] - - data_items += [ - nng.bits_per_element.get(MemArea.OnChipFlash, 0.0), - nng.bits_per_element.get(MemArea.OffChipFlash, 0.0), - ] + data_items += [nng.weights_compression_ratio] for mem_area in mem_areas: bws = nng.bandwidths[mem_area] @@ -231,7 +227,7 @@ def print_performance_metrics_for_strat( num_cascaded_passes, n_operations=0, cpu_operations=None, - bits_per_element=None, + weights_compression_ratio=None, show_cpu_operations=False, f=sys.stdout, ): @@ -268,11 +264,7 @@ def print_performance_metrics_for_strat( aug_label = label + " used" - extra = "" - if (mem_area == MemArea.OnChipFlash or mem_area == MemArea.OffChipFlash) and bits_per_element is not None: - extra = f" ({bits_per_element[mem_area]:.2f} bits per element)" - - print(f"Total {aug_label:25} {memory_used[mem_area] / 1024.0:12.2f} KiB{extra}", file=f) + print(f"Total {aug_label:25} {memory_used[mem_area] / 1024.0:12.2f} KiB", file=f) print(file=f) print(f"{num_passes:d} passes fused into {num_cascaded_passes:d}", file=f) @@ -329,6 +321,11 @@ def print_performance_metrics_for_strat( ) print(file=f) + if weights_compression_ratio != 0: + print( + f"Weights Compression Ratio {weights_compression_ratio:12.2f}", file=f, + ) + print( f"Neural network macs {int(macs):12d} MACs/batch", file=f, ) @@ -368,7 +365,7 @@ def print_performance_metrics(nng, arch, show_cpu_operations=False, f=sys.stdout n_cascaded_passes, n_operations, cpu_operations, - nng.bits_per_element, + nng.weights_compression_ratio, show_cpu_operations, f, ) diff --git a/ethosu/vela/tensor_allocation.py b/ethosu/vela/tensor_allocation.py index 7f66579e..1e5eb852 100644 --- a/ethosu/vela/tensor_allocation.py +++ b/ethosu/vela/tensor_allocation.py @@ -202,8 +202,11 @@ def allocate_tensors( else: sg.memory_used_per_type[mem_type] += total_sz - nng.total_size[mem_area] = nng.total_size.get(mem_area, 0) + sum(tens.storage_size() for tens in lrs.ranges) - nng.total_elements[mem_area] = nng.total_elements.get(mem_area, 0) + sum(tens.elements() for tens in lrs.ranges) + if mem_area == arch.fast_storage_mem_area: + for tens in lrs.ranges: + if tens.purpose == TensorPurpose.Weights: + nng.total_compressed_weights += tens.storage_size() + nng.total_original_weights += tens.elements() * tens.element_size() print_allocation(lrs, mem_area, mem_type_set, sg, verbose_allocation) @@ -214,9 +217,10 @@ def allocate_tensors( if sg == nng.get_root_subgraph(): nng.memory_used = sg.memory_used - for mem_area in nng.total_elements.keys(): + if mem_area == arch.fast_storage_mem_area: try: - nng.bits_per_element[mem_area] = nng.total_size[mem_area] * 8 / nng.total_elements[mem_area] + nng.weights_compression_ratio = nng.total_compressed_weights / nng.total_original_weights except ZeroDivisionError: - nng.bits_per_element[mem_area] = 0.0 + nng.weights_compression_ratio = 0.0 + return True -- cgit v1.2.1