diff options
author | Jacob Bohlin <jacob.bohlin@arm.com> | 2020-08-28 13:25:14 +0200 |
---|---|---|
committer | Jacob Bohlin <jacob.bohlin@arm.com> | 2020-09-03 10:50:10 +0200 |
commit | 0628a8c0136eebf3af8db7fd40b7aed94ff5d670 (patch) | |
tree | 65e7a49264f2df1f969122d8d7fb812773c0d4db /ethosu/vela/live_range.py | |
parent | d2e3355813a33ccefaf112750f86c4f04d6ea12c (diff) | |
download | ethos-u-vela-0628a8c0136eebf3af8db7fd40b7aed94ff5d670.tar.gz |
MLBEDSW-2567: CLI option to specify allocation alignment
Added the CLI option. Only applies to CPU tensors. Added an
AllocationError which is raised when Allocation fails.
Signed-off-by: Jacob Bohlin <jacob.bohlin@arm.com>
Change-Id: I89164dea3ac7b7add7bc40aec2ce8fe50600105d
Diffstat (limited to 'ethosu/vela/live_range.py')
-rw-r--r-- | ethosu/vela/live_range.py | 29 |
1 files changed, 14 insertions, 15 deletions
diff --git a/ethosu/vela/live_range.py b/ethosu/vela/live_range.py index fe00b622..156090f7 100644 --- a/ethosu/vela/live_range.py +++ b/ethosu/vela/live_range.py @@ -23,12 +23,13 @@ from .tensor import Tensor class LiveRange: - def __init__(self, tens): + def __init__(self, tens, alignment): self.tensors = [] # Tensors that are assigned to the same LiveRange will be allocated to the same address self.start_time = 99999999999 self.end_time = -1 self.size = 0 self.name = "" + self.alignment = alignment if tens: self.add_tensor(tens) @@ -100,15 +101,10 @@ class LiveRange: return addr def get_alignment(self): - # Get max alignment of LiveRange's tensors - if self.tensors: - alignment = 0 - for tens in self.tensors: - alignment = max(alignment, tens.alignment) + return self.alignment - return alignment - - return Tensor.AllocationQuantum + def set_alignment(self, alignment): + self.alignment = max(self.alignment, alignment) def merge_memory_op_ranges(sg, lr_graph, tensor_should_be_ignored, target_mem_area): @@ -135,14 +131,15 @@ class LiveRangeGraph: self.processed_subgraphs = set() self.current_time = 0 - def get_or_create_range(self, tens): + def get_or_create_range(self, tens, alignment=Tensor.AllocationQuantum): for rng in self.ranges.values(): # Return the live range of the tensor (or it's cpu/npu clone) if any(tensor in rng.tensors for tensor in [tens, tens.npu_tensor, tens.cpu_tensor]): + rng.set_alignment(alignment) return rng # No live range found for the tensor, create a new one - rng = LiveRange(tens) + rng = LiveRange(tens, alignment) self.ranges[tens] = rng return rng @@ -225,6 +222,7 @@ def extract_live_ranges_from_cascaded_passes( use_ifm_ofm_overlap=True, ignore_subgraph_input_output_tensors=False, lr_graph=None, + allocation_alignment=Tensor.AllocationQuantum, ): if lr_graph is None: lr_graph = LiveRangeGraph() @@ -277,7 +275,7 @@ def extract_live_ranges_from_cascaded_passes( for tens in cps.inputs: if tensor_should_be_ignored(tens, target_mem_area, target_mem_type_set): continue - rng = lr_graph.get_or_create_range(tens) + rng = lr_graph.get_or_create_range(tens, allocation_alignment) rng.mark_usage(time_for_pass) cps_primary_op = cps.passes[0].primary_op @@ -285,6 +283,7 @@ def extract_live_ranges_from_cascaded_passes( if cps_primary_op and cps_primary_op.type == "NpuOp" and MemType.Permanent_CPU not in target_mem_type_set: # If the primary-op is an NpuOp that means this is where an Npu subgraph # is called. Go into said subgraph and extract live ranges before continuing. + # Use default allocation alignment of 16 for Npu tensors npu_sg = cps_primary_op.attrs["subgraph"] lr_graph = extract_live_ranges_from_cascaded_passes( npu_sg, @@ -302,13 +301,13 @@ def extract_live_ranges_from_cascaded_passes( for tens in cps.intermediates: if tensor_should_be_ignored(tens, target_mem_area, target_mem_type_set): continue - rng = lr_graph.get_or_create_range(tens) + rng = lr_graph.get_or_create_range(tens, allocation_alignment) rng.mark_usage(time_for_pass) for tens in cps.outputs: if tensor_should_be_ignored(tens, target_mem_area, target_mem_type_set): continue - rng = lr_graph.get_or_create_range(tens) + rng = lr_graph.get_or_create_range(tens, allocation_alignment) output_time = time_for_pass if not mark_output_tensors_overlapping_with_input_tensors and is_element_wise: output_time += 1 @@ -338,7 +337,7 @@ def extract_live_ranges_from_cascaded_passes( for tens in sg.output_tensors: if tensor_should_be_ignored(tens, target_mem_area, target_mem_type_set): continue - rng = lr_graph.get_or_create_range(tens) + rng = lr_graph.get_or_create_range(tens, allocation_alignment) rng.mark_usage(end_time) # Add subgraph to set of processed subgraphs |