diff options
Diffstat (limited to 'ethosu/vela')
-rw-r--r-- | ethosu/vela/weight_compressor.py | 6 |
1 files changed, 4 insertions, 2 deletions
diff --git a/ethosu/vela/weight_compressor.py b/ethosu/vela/weight_compressor.py index df7ff751..93000408 100644 --- a/ethosu/vela/weight_compressor.py +++ b/ethosu/vela/weight_compressor.py @@ -24,6 +24,7 @@ from .data_type import DataType from .errors import UnsupportedFeatureError from .nn_graph import SchedulingStrategy from .numeric_util import round_up +from .numeric_util import round_up_divide from .operation import NpuBlockType from .scaling import quantise_scale from .scaling import reduced_quantise_scale @@ -402,6 +403,7 @@ def calc_scales_and_pack_biases(tens, arch, ofm_depth_step, rescale_for_faf=Fals tens.compressed_values_substream_offsets = [] total_elements = len(quantised_scales) + alignment_bytes = 0 for i in range(0, total_elements, ofm_depth_step): # Extract streams from brick to generate substreams for each core stream = bytearray() @@ -417,6 +419,7 @@ def calc_scales_and_pack_biases(tens, arch, ofm_depth_step, rescale_for_faf=Fals remainder = (len(stream)) % 16 if remainder > 0: stream.extend(bytearray(16 - remainder)) + alignment_bytes += 16 - remainder substream_offsets.append(len(stream)) @@ -424,8 +427,7 @@ def calc_scales_and_pack_biases(tens, arch, ofm_depth_step, rescale_for_faf=Fals tens.compressed_values.append(stream) tens.compressed_values_substream_offsets.append(substream_offsets) - tens.storage_shape = [total_elements] - + tens.storage_shape = [total_elements + round_up_divide(alignment_bytes, tens.element_size_bytes)] def update_pass_weight_and_scale_tensors(nng, arch): |