diff options
author | Fredrik Svedberg <fredrik.svedberg@arm.com> | 2021-04-23 14:36:42 +0200 |
---|---|---|
committer | patrik.gustavsson <patrik.gustavsson@arm.com> | 2021-05-06 12:52:21 +0000 |
commit | f5c07c45b48717fb6391adb35cb73ca7cd5734c3 (patch) | |
tree | 199fc4646a403af01009c8e10aafec0bad1e910e /ethosu/vela/weight_compressor.py | |
parent | a0b8d5f1bc32092cb85df07fb68e5582d01def32 (diff) | |
download | ethos-u-vela-f5c07c45b48717fb6391adb35cb73ca7cd5734c3.tar.gz |
[MLBEDSW-4254] Improve weight information in summary
Improved weight information showed in summary if --verbose-weights
option is used.
Signed-off-by: Fredrik Svedberg <fredrik.svedberg@arm.com>
Change-Id: Iac142f2a813bf1c05aa9da3f8a384466e2914d06
Diffstat (limited to 'ethosu/vela/weight_compressor.py')
-rw-r--r-- | ethosu/vela/weight_compressor.py | 33 |
1 files changed, 22 insertions, 11 deletions
diff --git a/ethosu/vela/weight_compressor.py b/ethosu/vela/weight_compressor.py index bb7cd674..7ce237ca 100644 --- a/ethosu/vela/weight_compressor.py +++ b/ethosu/vela/weight_compressor.py @@ -68,7 +68,7 @@ def encode_weights( :param is_depthwise: a boolean indicating these weights are used for a depthwise traversal :param block_traversal: indicates how these weights are traversed on sub-kernel basis - :return: a bytearray of compressed weights + :return: a tuple with a bytearray of encoded weights and the size of the unencoded weights """ # Check arg types assert isinstance(accelerator, Accelerator) @@ -104,7 +104,7 @@ def encode_weights( dilation=dilation_xy, ) encoded_stream = encode(raw_stream) - return encoded_stream + return encoded_stream, len(raw_stream) def encode_bias(bias: np.int64, scale: int, shift: int): @@ -161,15 +161,23 @@ class CompressedWeightCache: def __init__(self): self.cache = {} # maps from WeightCompressionConfig to a tensor clone containing compressed weights + def has_tensor_with_same_compression(self, wcc): + return self.cache.get(wcc) is not None + def get_tensor_with_same_compression(self, wcc): - return self.cache.get(wcc) + cache_obj = self.cache.get(wcc) + return cache_obj[0] if cache_obj else None + + def get_unencoded_size_with_same_compression(self, wcc): + cache_obj = self.cache.get(wcc) + return cache_obj[1] if cache_obj else None - def add(self, tens): + def add(self, tens, unencoded_size): # Adds the compressed weights from the tensor to the cache wcc = tens.weight_compression_config # Clone the tensor to make sure that nothing related to the weight compression is modified tens_clone = tens.clone("_weights{}_{}".format(wcc.ofm_block_depth, wcc.ofm_depth_step)) - self.cache[wcc] = tens_clone + self.cache[wcc] = (tens_clone, unencoded_size) def encode(weight_stream): @@ -300,7 +308,7 @@ def compress_weights(arch, nng, tens, npu_block_type, ofm_block_depth, ofm_depth # Cache hit, copy weights from the cache tens.copy_compressed_weight_info(tens_cached) set_storage_shape(tens) - return + return nng.weight_cache.get_unencoded_size_with_same_compression(wcc) # No cache hit, perform the compression assert tens.quantization is not None assert tens.quantization.scale_f32 is not None @@ -321,6 +329,7 @@ def compress_weights(arch, nng, tens, npu_block_type, ofm_block_depth, ofm_depth encoded_streams_substream_offsets = [] offset = 0 max_single_buffer_len = 0 + unencoded_size = 0 ifm_bitdepth = tens.consumer_list[0].inputs[0].dtype.size_in_bits() ifm_depth = weights.shape[-2] @@ -371,7 +380,7 @@ def compress_weights(arch, nng, tens, npu_block_type, ofm_block_depth, ofm_depth block_depth = (ofm_block_depth + arch.ncores - 1 - core) // arch.ncores encoded_substream = [] if block_depth != 0: - encoded_substream = encode_weights( + encoded_substream, raw_stream_size = encode_weights( accelerator=arch.accelerator_config, weights_volume=core_weights, dilation_xy=dilation, @@ -380,6 +389,7 @@ def compress_weights(arch, nng, tens, npu_block_type, ofm_block_depth, ofm_depth is_depthwise=is_depthwise, block_traversal=block_traversal, ) + unencoded_size += raw_stream_size encoded_stream.extend(encoded_substream) substream_offsets.append(len(encoded_stream)) @@ -408,7 +418,8 @@ def compress_weights(arch, nng, tens, npu_block_type, ofm_block_depth, ofm_depth tens.compressed_values_substream_offsets = encoded_streams_substream_offsets tens.brick_size = brick_size set_storage_shape(tens) - nng.weight_cache.add(tens) + nng.weight_cache.add(tens, unencoded_size) + return unencoded_size def calc_scales_and_pack_biases(tens, arch, ofm_depth_step, rescale_for_faf=False): @@ -525,11 +536,11 @@ def update_pass_weight_and_scale_tensors(nng, arch): ofm_depth_step = ps.block_config[-1] else: ofm_depth_step = tens.shape[-1] - compress_weights( + nng.total_npu_weights += compress_weights( arch, nng, tens, op.type.npu_block_type, ps.block_config[-1], ofm_depth_step, op.get_dilation_h_w() ) - nng.total_compressed_weights += tens.weight_compressed_offsets[-1] - nng.total_original_weights += tens.elements() * tens.element_size() + nng.total_npu_encoded_weights += tens.weight_compressed_offsets[-1] + nng.total_original_weights += int(tens.elements() * tens.element_size()) # Update source tensor if needs_dma: |