aboutsummaryrefslogtreecommitdiff
path: root/ethosu/vela/lut.py
diff options
context:
space:
mode:
authorJacob Bohlin <jacob.bohlin@arm.com>2020-09-11 10:04:15 +0200
committerpatrik.gustavsson <patrik.gustavsson@arm.com>2020-09-17 08:18:50 +0000
commit1a66697b80a527af6d6dd1ed235199264696767e (patch)
tree447f19903eedb0ed163348769da28267ccf3bf47 /ethosu/vela/lut.py
parent1356c2ab034738bcf51822de18911cc499fa2e8e (diff)
downloadethos-u-vela-1a66697b80a527af6d6dd1ed235199264696767e.tar.gz
MLBEDSW-2809: Redo the Tensor addressing
Added a static class TensorAddressMap that stores all Tensor addresses based on their equivalence_id. Made the "address" field into a property which getter and setter looks up/sets the tensor's address in TensorAddressMap. This makes the references to cpu_tensor/npu_tensor obsolete and they have been removed. Addition to scheduler: avoid SRAM spilling if an op has consumers in other subgraphs. Minor rework in LUTState; it will now assign a unique equivalence_id to the SHRAM lut tensor to avoid issues with addressing. The equivalent checks in LUTState now compares the values of the LUT instead of the the equivalence_id. Updated LUT unit tests accordingly. Signed-off-by: Jacob Bohlin <jacob.bohlin@arm.com> Change-Id: I41de5a8a4e5f07b77d6544d8d4034b754993e503
Diffstat (limited to 'ethosu/vela/lut.py')
-rw-r--r--ethosu/vela/lut.py6
1 files changed, 4 insertions, 2 deletions
diff --git a/ethosu/vela/lut.py b/ethosu/vela/lut.py
index 0e8dcc95..e3373ca2 100644
--- a/ethosu/vela/lut.py
+++ b/ethosu/vela/lut.py
@@ -42,9 +42,9 @@ class LUTState:
self.tensors = []
def get_equivalent(self, lut_tens):
- # Returns existing lut with same equivalence id, None if not found
+ # Returns existing lut with the same values, None if not found
for t in self.tensors:
- if t.equivalent(lut_tens):
+ if np.array_equal(t.values, lut_tens.values):
return t
return None
@@ -60,6 +60,7 @@ class LUTState:
end2 = start2 + tens.storage_size()
if not numeric_util.overlaps(start, end, start2, end2):
new_state.tensors.append(tens)
+
return new_state
def find_best_address(self, start, stop, step):
@@ -129,6 +130,7 @@ def optimize_high_level_cmd_stream(sg, arch):
# Place the LUT in the last 2 blocks of SHRAM
# Alignment is always on the size of the LUT, 256 for 256-byte LUT, 1K for 1K LUT, etc
address = lut_state.find_best_address(lut_start, lut_end, lut_tens.storage_size())
+ lut_tens.equivalence_id = uuid.uuid4()
lut_tens.address = address
cmd.ps.primary_op.attrs["lut_index"] = (address - lut_start) // slot_size
lut_state = lut_state.put(lut_tens)