diff options
Diffstat (limited to 'ethosu/vela')
-rw-r--r-- | ethosu/vela/architecture_features.py | 2 | ||||
-rw-r--r-- | ethosu/vela/compiler_driver.py | 4 | ||||
-rw-r--r-- | ethosu/vela/scheduler.py | 11 | ||||
-rw-r--r-- | ethosu/vela/tensor.py | 13 | ||||
-rw-r--r-- | ethosu/vela/test/testutil.py | 1 | ||||
-rw-r--r-- | ethosu/vela/vela.py | 8 |
6 files changed, 30 insertions, 9 deletions
diff --git a/ethosu/vela/architecture_features.py b/ethosu/vela/architecture_features.py index 5453f2cf..8b968a3e 100644 --- a/ethosu/vela/architecture_features.py +++ b/ethosu/vela/architecture_features.py @@ -184,6 +184,7 @@ Note the difference between ArchitectureFeatures and CompilerOptions global_memory_clock_scale, max_blockdep, softmax_support, + weight_estimation_scaling, ): accelerator_config = accelerator_config.lower() self.vela_config = vela_config @@ -215,6 +216,7 @@ Note the difference between ArchitectureFeatures and CompilerOptions ) self.max_blockdep = max_blockdep + self.weight_estimation_scaling = weight_estimation_scaling dpu_min_height = accel_config.ofm_ublock.height dpu_min_width = accel_config.ofm_ublock.width diff --git a/ethosu/vela/compiler_driver.py b/ethosu/vela/compiler_driver.py index 5e9e38fb..94900ad5 100644 --- a/ethosu/vela/compiler_driver.py +++ b/ethosu/vela/compiler_driver.py @@ -212,7 +212,9 @@ def compiler_driver(nng, arch, options, scheduler_options): if root_sg is not None and (arch.feature_map_storage_mem_area != arch.fast_storage_mem_area): if root_sg.memory_used_per_type.get(MemType.Scratch_fast, 0) > arch.sram_size: raise VelaError( - "Sram limit {} bytes, has been exceeded by the scratch fast tensor {} bytes".format( + "Sram limit {} bytes, has been exceeded by the scratch fast tensor {} bytes. " + "Increasing the value of --weight-estimation-scaling may help to resolve the issue. " + "See OPTIONS.md for more information.".format( arch.sram_size, root_sg.memory_used_per_type.get(MemType.Scratch_fast, 0) ) ) diff --git a/ethosu/vela/scheduler.py b/ethosu/vela/scheduler.py index f3b3a79c..9a8215d5 100644 --- a/ethosu/vela/scheduler.py +++ b/ethosu/vela/scheduler.py @@ -608,7 +608,10 @@ class DynamicProgrammingScheduler: base_sram_used = 0 for tens in ps.intermediates: if tens.mem_area == self.mem_area: - base_sram_used += tens.storage_size() + if tens.purpose == TensorPurpose.Weights: + base_sram_used = tens.storage_size(self.arch.weight_estimation_scaling) + else: + base_sram_used += tens.storage_size() all_block_configs = self.get_block_configs(ps) for block_config in all_block_configs: @@ -718,7 +721,7 @@ class DynamicProgrammingScheduler: ) ] sram_used += ifm_tensor.storage_size_for_sub_purpose( - TensorSubPurpose.RollingBufferY, rolling_buffer_y, None + self.arch, TensorSubPurpose.RollingBufferY, rolling_buffer_y, None ) all_candidates.extend(self.append_sram_rewrite_list(sram_used, rewrite_list, [strat_opt])) @@ -779,7 +782,9 @@ class DynamicProgrammingScheduler: for tens in ps.intermediates: if tens.mem_area == self.mem_area: if tens.purpose == TensorPurpose.Weights: - sram_used += tens.storage_size_for_sub_purpose(TensorSubPurpose.DoubleBuffer, block_config[3]) + sram_used += tens.storage_size_for_sub_purpose( + self.arch, TensorSubPurpose.DoubleBuffer, block_config[3] + ) rewrite_list.append( ( SchedulerRewrite.ChangeTensorSubPurpose, diff --git a/ethosu/vela/tensor.py b/ethosu/vela/tensor.py index d4f6a409..3ad9b253 100644 --- a/ethosu/vela/tensor.py +++ b/ethosu/vela/tensor.py @@ -439,20 +439,25 @@ class Tensor: def has_fully_defined_shape(self): return shape_fully_defined(self.shape) - def storage_size(self): - raw_size = self.storage_elements() * self.element_size() + def storage_size(self, scale=1.0): + raw_size = self.storage_elements() * self.element_size() * scale if raw_size == 0: raw_size = 1 # force it to take up space rounded_size = numeric_util.round_up(numeric_util.round_up_to_int(raw_size), self.alignment) return rounded_size - def storage_size_for_sub_purpose(self, sub_purpose, param_a=None, param_b=None): + def storage_size_for_sub_purpose(self, arch, sub_purpose, param_a=None, param_b=None): alt_shape = self.storage_shape_for_sub_purpose(sub_purpose, param_a, param_b) elems = shape_num_elements(alt_shape) if elems is None: return 0 if sub_purpose == TensorSubPurpose.DoubleBuffer: - raw_size = elems * self.element_size() * self.compression_scale_for_worst_weight_stream + raw_size = ( + elems + * self.element_size() + * self.compression_scale_for_worst_weight_stream + * arch.weight_estimation_scaling + ) else: # Rolling buffers are used for intermediate data in ifm streaming # These will all use the NHCWB16 format, and need to be aligned to 16 in the C-dimension diff --git a/ethosu/vela/test/testutil.py b/ethosu/vela/test/testutil.py index 116afa40..68866fc7 100644 --- a/ethosu/vela/test/testutil.py +++ b/ethosu/vela/test/testutil.py @@ -38,6 +38,7 @@ def create_arch(): global_memory_clock_scale=1.0, max_blockdep=0, softmax_support=True, + weight_estimation_scaling=1.0, ) diff --git a/ethosu/vela/vela.py b/ethosu/vela/vela.py index 97cc8736..19080926 100644 --- a/ethosu/vela/vela.py +++ b/ethosu/vela/vela.py @@ -260,7 +260,12 @@ def main(args=None): choices=[True, False], help="Control if Softmax should be transformed into a set of npu operations (default: %(default)s)", ) - + parser.add_argument( + "--weight-estimation-scaling", + type=float, + default=1.0, + help=("Performs an additional scaling of weight compression scale estimate (default: %(default)s)"), + ) args = parser.parse_args(args=args) # Read configuration file @@ -291,6 +296,7 @@ def main(args=None): global_memory_clock_scale=args.global_memory_clock_scale, max_blockdep=args.max_block_dependency, softmax_support=args.softmax_support, + weight_estimation_scaling=args.weight_estimation_scaling, ) compiler_options = compiler_driver.CompilerOptions( |