diff options
author | Jacob Bohlin <jacob.bohlin@arm.com> | 2020-09-11 10:04:15 +0200 |
---|---|---|
committer | patrik.gustavsson <patrik.gustavsson@arm.com> | 2020-09-17 08:18:50 +0000 |
commit | 1a66697b80a527af6d6dd1ed235199264696767e (patch) | |
tree | 447f19903eedb0ed163348769da28267ccf3bf47 /ethosu/vela/live_range.py | |
parent | 1356c2ab034738bcf51822de18911cc499fa2e8e (diff) | |
download | ethos-u-vela-1a66697b80a527af6d6dd1ed235199264696767e.tar.gz |
MLBEDSW-2809: Redo the Tensor addressing
Added a static class TensorAddressMap that stores all Tensor addresses
based on their equivalence_id. Made the "address" field into a property
which getter and setter looks up/sets the tensor's address in
TensorAddressMap.
This makes the references to cpu_tensor/npu_tensor obsolete and they
have been removed.
Addition to scheduler: avoid SRAM spilling if an op has consumers in
other subgraphs.
Minor rework in LUTState; it will now assign a unique equivalence_id to
the SHRAM lut tensor to avoid issues with addressing. The equivalent
checks in LUTState now compares the values of the LUT instead of the the
equivalence_id.
Updated LUT unit tests accordingly.
Signed-off-by: Jacob Bohlin <jacob.bohlin@arm.com>
Change-Id: I41de5a8a4e5f07b77d6544d8d4034b754993e503
Diffstat (limited to 'ethosu/vela/live_range.py')
-rw-r--r-- | ethosu/vela/live_range.py | 32 |
1 files changed, 7 insertions, 25 deletions
diff --git a/ethosu/vela/live_range.py b/ethosu/vela/live_range.py index 156090f7..9a8ee580 100644 --- a/ethosu/vela/live_range.py +++ b/ethosu/vela/live_range.py @@ -84,21 +84,11 @@ class LiveRange: return self.name < other.name def set_address(self, address): - # Set address of all unaddressed tensors in LiveRange + # Set address of all tensors in LiveRange for tens in self.tensors: - if tens.address is None: - addr = address - else: - # Limit to single tensor for the lr if the tensor address already assigned - assert len(self.tensors) == 1 - addr = tens.address - tens.address = addr - # Also need to set the address to the tensor's cpu/npu clones - if tens.cpu_tensor is not None: - tens.cpu_tensor.address = addr - if tens.npu_tensor is not None: - tens.npu_tensor.address = addr - return addr + tens.address = address + + return address def get_alignment(self): return self.alignment @@ -113,10 +103,6 @@ def merge_memory_op_ranges(sg, lr_graph, tensor_should_be_ignored, target_mem_ar # For memory only passes, e.g. Reshape. Add input and output tensor to the same LiveRange input_tensor = ps.inputs[0] output_tensor = ps.outputs[0] - # If the input or output tensor is tied to a Cpu tensor, i.e. a subgraph input - # or output, fuse the live-range with the Cpu tensors' live-range instead. - input_tensor = input_tensor.cpu_tensor if input_tensor.cpu_tensor is not None else input_tensor - output_tensor = output_tensor.cpu_tensor if output_tensor.cpu_tensor is not None else output_tensor if not tensor_should_be_ignored(input_tensor, target_mem_area) and not tensor_should_be_ignored( output_tensor, target_mem_area ): @@ -132,9 +118,9 @@ class LiveRangeGraph: self.current_time = 0 def get_or_create_range(self, tens, alignment=Tensor.AllocationQuantum): - for rng in self.ranges.values(): - # Return the live range of the tensor (or it's cpu/npu clone) - if any(tensor in rng.tensors for tensor in [tens, tens.npu_tensor, tens.cpu_tensor]): + # Return the live range of the tensor (or any of its clones) + for existing_tensor, rng in self.ranges.items(): + if tens.equivalent(existing_tensor): rng.set_alignment(alignment) return rng @@ -252,10 +238,6 @@ def extract_live_ranges_from_cascaded_passes( # For memory only passes, e.g. Reshape. Add input and output tensor to the same LiveRange input_tensor = ps.inputs[0] output_tensor = ps.outputs[0] - # If the input or output tensor is tied to a Cpu tensor, i.e. a subgraph input - # or output, fuse the live-range with the Cpu tensors' live-range instead. - input_tensor = input_tensor.cpu_tensor if input_tensor.cpu_tensor is not None else input_tensor - output_tensor = output_tensor.cpu_tensor if output_tensor.cpu_tensor is not None else output_tensor if not tensor_should_be_ignored(input_tensor, target_mem_area, target_mem_type_set) and not ( tensor_should_be_ignored(output_tensor, target_mem_area, target_mem_type_set) ): |