diff options
Diffstat (limited to 'ethosu/vela')
-rw-r--r-- | ethosu/vela/api.py | 5 | ||||
-rw-r--r-- | ethosu/vela/compiler_driver.py | 2 | ||||
-rw-r--r-- | ethosu/vela/nn_graph.py | 4 | ||||
-rw-r--r-- | ethosu/vela/stats_writer.py | 37 | ||||
-rw-r--r-- | ethosu/vela/tensor_allocation.py | 4 | ||||
-rw-r--r-- | ethosu/vela/vela.py | 9 | ||||
-rw-r--r-- | ethosu/vela/weight_compressor.py | 33 |
7 files changed, 60 insertions, 34 deletions
diff --git a/ethosu/vela/api.py b/ethosu/vela/api.py index f972133d..e91c0bdb 100644 --- a/ethosu/vela/api.py +++ b/ethosu/vela/api.py @@ -416,15 +416,16 @@ def npu_encode_weights( :param ofm_block_depth: the depth of blocks for processing :param is_depthwise: a boolean indicating these weights are used for a depthwise traversal :param block_traversal: indicates how these weights are traversed on sub-kernel basis - :return: a bytearray of compressed weights + :return: a bytearray of encoded weights """ from .architecture_features import Accelerator from . import weight_compressor acc = Accelerator.from_npu_accelerator(accelerator) - return weight_compressor.encode_weights( + encoded_weights, _ = weight_compressor.encode_weights( acc, weights_volume, dilation_xy, ifm_bitdepth, ofm_block_depth, is_depthwise, block_traversal ) + return encoded_weights def npu_encode_bias(bias: numpy.int64, scale: int, shift: int): diff --git a/ethosu/vela/compiler_driver.py b/ethosu/vela/compiler_driver.py index a3c01001..26d350ea 100644 --- a/ethosu/vela/compiler_driver.py +++ b/ethosu/vela/compiler_driver.py @@ -61,6 +61,7 @@ Note the difference between ArchitectureFeatures and CompilerOptions verbose_high_level_command_stream=False, verbose_register_command_stream=False, verbose_operators=False, + verbose_weights=False, show_cpu_operations=False, tensor_allocator=TensorAllocator.Greedy, timing=False, @@ -77,6 +78,7 @@ Note the difference between ArchitectureFeatures and CompilerOptions self.verbose_high_level_command_stream = verbose_high_level_command_stream self.verbose_register_command_stream = verbose_register_command_stream self.verbose_operators = verbose_operators + self.verbose_weights = verbose_weights self.show_cpu_operations = show_cpu_operations self.tensor_allocator = tensor_allocator self.timing = timing diff --git a/ethosu/vela/nn_graph.py b/ethosu/vela/nn_graph.py index 2d4b0c87..677a385a 100644 --- a/ethosu/vela/nn_graph.py +++ b/ethosu/vela/nn_graph.py @@ -517,9 +517,9 @@ class Graph: self.subgraphs = [] self.metadata = [] self.memory_used = {} - self.weights_compression_ratio = 0 self.total_original_weights = 0 - self.total_compressed_weights = 0 + self.total_npu_weights = 0 + self.total_npu_encoded_weights = 0 self.weight_cache = None # See CompressedWeightCache def get_root_subgraph(self): diff --git a/ethosu/vela/stats_writer.py b/ethosu/vela/stats_writer.py index 597fd151..18b8092e 100644 --- a/ethosu/vela/stats_writer.py +++ b/ethosu/vela/stats_writer.py @@ -58,7 +58,9 @@ def write_summary_metrics_csv(nng, summary_filename, arch): "passes_after_fusing", ] labels += [area.identifier_name() + "_memory_used" for area in mem_areas] - labels += ["weights_compression_ratio"] + labels += ["total_original_weights"] + labels += ["total_npu_weights"] + labels += ["total_npu_encoded_weights"] for mem_area in mem_areas: labels += [ @@ -107,7 +109,9 @@ def write_summary_metrics_csv(nng, summary_filename, arch): data_items += [midpoint_fps, nng.batch_size, midpoint_inference_time, n_passes, n_cascaded_passes] data_items += [nng.memory_used.get(mem_area, 0) / 1024.0 for mem_area in mem_areas] - data_items += [nng.weights_compression_ratio] + data_items += [nng.total_original_weights] + data_items += [nng.total_npu_weights] + data_items += [nng.total_npu_encoded_weights] for mem_area in mem_areas: bws = nng.bandwidths[mem_area] @@ -228,8 +232,8 @@ def print_performance_metrics_for_strat( num_cascaded_passes, n_operations=0, cpu_operations=None, - weights_compression_ratio=None, show_cpu_operations=False, + weights_data=None, f=sys.stdout, ): @@ -327,10 +331,11 @@ def print_performance_metrics_for_strat( ) print(file=f) - if weights_compression_ratio: - print( - f"Weights Compression Ratio {weights_compression_ratio:12.2f}", file=f, - ) + if weights_data: + print(f"Original Weights Size {weights_data['original'] / 1024.0:12.2f} KiB", file=f) + print(f"NPU Weights Size {weights_data['npu'] / 1024.0:12.2f} KiB", file=f) + print(f"NPU Encoded Weights Size {weights_data['npu_encoded'] / 1024.0:12.2f} KiB", file=f) + print(file=f) print( f"Neural network macs {int(macs):12d} MACs/batch", file=f, @@ -354,12 +359,21 @@ def print_performance_metrics_for_strat( print(file=f) -def print_performance_metrics(nng, arch, show_cpu_operations=False, f=sys.stdout): +def print_performance_metrics(nng, arch, show_cpu_operations=False, verbose_weights=False, f=sys.stdout): n_passes = sum(len(sg.passes) for sg in nng.subgraphs) n_cascaded_passes = sum(len(sg.cascaded_passes) for sg in nng.subgraphs) n_operations = sum(len(ps.ops) for sg in nng.subgraphs for ps in sg.passes) cpu_operations = sum((ps.ops for sg in nng.subgraphs for ps in sg.passes if ps.placement == PassPlacement.Cpu), []) min_mem_usage = max(sg.min_mem_usage for sg in nng.subgraphs) + weights_data = ( + { + "original": nng.total_original_weights, + "npu": nng.total_npu_weights, + "npu_encoded": nng.total_npu_encoded_weights, + } + if verbose_weights + else None + ) return print_performance_metrics_for_strat( arch, nng.name, @@ -373,12 +387,7 @@ def print_performance_metrics(nng, arch, show_cpu_operations=False, f=sys.stdout n_cascaded_passes, n_operations, cpu_operations, - nng.weights_compression_ratio, show_cpu_operations, + weights_data, f, ) - - -def write_human_friendly_metrics(nng, arch, filename): - f = open(filename, "w") - print_performance_metrics(nng, arch, f=f) diff --git a/ethosu/vela/tensor_allocation.py b/ethosu/vela/tensor_allocation.py index 0ad30e5f..7ffc6f3d 100644 --- a/ethosu/vela/tensor_allocation.py +++ b/ethosu/vela/tensor_allocation.py @@ -217,9 +217,5 @@ def allocate_tensors( if sg == nng.get_root_subgraph(): nng.memory_used = sg.memory_used - try: - nng.weights_compression_ratio = nng.total_compressed_weights / nng.total_original_weights - except ZeroDivisionError: - nng.weights_compression_ratio = 0.0 return True diff --git a/ethosu/vela/vela.py b/ethosu/vela/vela.py index c9551861..aa74ecf3 100644 --- a/ethosu/vela/vela.py +++ b/ethosu/vela/vela.py @@ -77,7 +77,12 @@ def process(input_name, enable_debug_db, arch, model_reader_options, compiler_op summary_csv_file = "{0}_summary_{1}.csv".format(output_basename, arch.system_config) stats_writer.write_summary_metrics_csv(nng, summary_csv_file, arch) - stats_writer.print_performance_metrics(nng, show_cpu_operations=compiler_options.show_cpu_operations, arch=arch) + stats_writer.print_performance_metrics( + nng, + show_cpu_operations=compiler_options.show_cpu_operations, + verbose_weights=compiler_options.verbose_weights, + arch=arch, + ) output_filename = output_basename + "_vela.tflite" if input_name.endswith(".tflite"): @@ -284,6 +289,7 @@ def main(args=None): "--verbose-register-command-stream", action="store_true", help="Verbose register command stream" ) parser.add_argument("--verbose-operators", action="store_true", help="Verbose operator list") + parser.add_argument("--verbose-weights", action="store_true", help="Verbose weights information") parser.add_argument( "--show-cpu-operations", action="store_true", help="Show the operations that fall back to the CPU" ) @@ -456,6 +462,7 @@ def main(args=None): verbose_high_level_command_stream=args.verbose_high_level_command_stream, verbose_register_command_stream=args.verbose_register_command_stream, verbose_operators=args.verbose_operators, + verbose_weights=args.verbose_weights, show_cpu_operations=args.show_cpu_operations, tensor_allocator=args.tensor_allocator, timing=args.timing, diff --git a/ethosu/vela/weight_compressor.py b/ethosu/vela/weight_compressor.py index bb7cd674..7ce237ca 100644 --- a/ethosu/vela/weight_compressor.py +++ b/ethosu/vela/weight_compressor.py @@ -68,7 +68,7 @@ def encode_weights( :param is_depthwise: a boolean indicating these weights are used for a depthwise traversal :param block_traversal: indicates how these weights are traversed on sub-kernel basis - :return: a bytearray of compressed weights + :return: a tuple with a bytearray of encoded weights and the size of the unencoded weights """ # Check arg types assert isinstance(accelerator, Accelerator) @@ -104,7 +104,7 @@ def encode_weights( dilation=dilation_xy, ) encoded_stream = encode(raw_stream) - return encoded_stream + return encoded_stream, len(raw_stream) def encode_bias(bias: np.int64, scale: int, shift: int): @@ -161,15 +161,23 @@ class CompressedWeightCache: def __init__(self): self.cache = {} # maps from WeightCompressionConfig to a tensor clone containing compressed weights + def has_tensor_with_same_compression(self, wcc): + return self.cache.get(wcc) is not None + def get_tensor_with_same_compression(self, wcc): - return self.cache.get(wcc) + cache_obj = self.cache.get(wcc) + return cache_obj[0] if cache_obj else None + + def get_unencoded_size_with_same_compression(self, wcc): + cache_obj = self.cache.get(wcc) + return cache_obj[1] if cache_obj else None - def add(self, tens): + def add(self, tens, unencoded_size): # Adds the compressed weights from the tensor to the cache wcc = tens.weight_compression_config # Clone the tensor to make sure that nothing related to the weight compression is modified tens_clone = tens.clone("_weights{}_{}".format(wcc.ofm_block_depth, wcc.ofm_depth_step)) - self.cache[wcc] = tens_clone + self.cache[wcc] = (tens_clone, unencoded_size) def encode(weight_stream): @@ -300,7 +308,7 @@ def compress_weights(arch, nng, tens, npu_block_type, ofm_block_depth, ofm_depth # Cache hit, copy weights from the cache tens.copy_compressed_weight_info(tens_cached) set_storage_shape(tens) - return + return nng.weight_cache.get_unencoded_size_with_same_compression(wcc) # No cache hit, perform the compression assert tens.quantization is not None assert tens.quantization.scale_f32 is not None @@ -321,6 +329,7 @@ def compress_weights(arch, nng, tens, npu_block_type, ofm_block_depth, ofm_depth encoded_streams_substream_offsets = [] offset = 0 max_single_buffer_len = 0 + unencoded_size = 0 ifm_bitdepth = tens.consumer_list[0].inputs[0].dtype.size_in_bits() ifm_depth = weights.shape[-2] @@ -371,7 +380,7 @@ def compress_weights(arch, nng, tens, npu_block_type, ofm_block_depth, ofm_depth block_depth = (ofm_block_depth + arch.ncores - 1 - core) // arch.ncores encoded_substream = [] if block_depth != 0: - encoded_substream = encode_weights( + encoded_substream, raw_stream_size = encode_weights( accelerator=arch.accelerator_config, weights_volume=core_weights, dilation_xy=dilation, @@ -380,6 +389,7 @@ def compress_weights(arch, nng, tens, npu_block_type, ofm_block_depth, ofm_depth is_depthwise=is_depthwise, block_traversal=block_traversal, ) + unencoded_size += raw_stream_size encoded_stream.extend(encoded_substream) substream_offsets.append(len(encoded_stream)) @@ -408,7 +418,8 @@ def compress_weights(arch, nng, tens, npu_block_type, ofm_block_depth, ofm_depth tens.compressed_values_substream_offsets = encoded_streams_substream_offsets tens.brick_size = brick_size set_storage_shape(tens) - nng.weight_cache.add(tens) + nng.weight_cache.add(tens, unencoded_size) + return unencoded_size def calc_scales_and_pack_biases(tens, arch, ofm_depth_step, rescale_for_faf=False): @@ -525,11 +536,11 @@ def update_pass_weight_and_scale_tensors(nng, arch): ofm_depth_step = ps.block_config[-1] else: ofm_depth_step = tens.shape[-1] - compress_weights( + nng.total_npu_weights += compress_weights( arch, nng, tens, op.type.npu_block_type, ps.block_config[-1], ofm_depth_step, op.get_dilation_h_w() ) - nng.total_compressed_weights += tens.weight_compressed_offsets[-1] - nng.total_original_weights += tens.elements() * tens.element_size() + nng.total_npu_encoded_weights += tens.weight_compressed_offsets[-1] + nng.total_original_weights += int(tens.elements() * tens.element_size()) # Update source tensor if needs_dma: |