diff options
author | Jacob Bohlin <jacob.bohlin@arm.com> | 2020-09-11 10:04:15 +0200 |
---|---|---|
committer | patrik.gustavsson <patrik.gustavsson@arm.com> | 2020-09-17 08:18:50 +0000 |
commit | 1a66697b80a527af6d6dd1ed235199264696767e (patch) | |
tree | 447f19903eedb0ed163348769da28267ccf3bf47 /ethosu/vela/lut.py | |
parent | 1356c2ab034738bcf51822de18911cc499fa2e8e (diff) | |
download | ethos-u-vela-1a66697b80a527af6d6dd1ed235199264696767e.tar.gz |
MLBEDSW-2809: Redo the Tensor addressing
Added a static class TensorAddressMap that stores all Tensor addresses
based on their equivalence_id. Made the "address" field into a property
which getter and setter looks up/sets the tensor's address in
TensorAddressMap.
This makes the references to cpu_tensor/npu_tensor obsolete and they
have been removed.
Addition to scheduler: avoid SRAM spilling if an op has consumers in
other subgraphs.
Minor rework in LUTState; it will now assign a unique equivalence_id to
the SHRAM lut tensor to avoid issues with addressing. The equivalent
checks in LUTState now compares the values of the LUT instead of the the
equivalence_id.
Updated LUT unit tests accordingly.
Signed-off-by: Jacob Bohlin <jacob.bohlin@arm.com>
Change-Id: I41de5a8a4e5f07b77d6544d8d4034b754993e503
Diffstat (limited to 'ethosu/vela/lut.py')
-rw-r--r-- | ethosu/vela/lut.py | 6 |
1 files changed, 4 insertions, 2 deletions
diff --git a/ethosu/vela/lut.py b/ethosu/vela/lut.py index 0e8dcc95..e3373ca2 100644 --- a/ethosu/vela/lut.py +++ b/ethosu/vela/lut.py @@ -42,9 +42,9 @@ class LUTState: self.tensors = [] def get_equivalent(self, lut_tens): - # Returns existing lut with same equivalence id, None if not found + # Returns existing lut with the same values, None if not found for t in self.tensors: - if t.equivalent(lut_tens): + if np.array_equal(t.values, lut_tens.values): return t return None @@ -60,6 +60,7 @@ class LUTState: end2 = start2 + tens.storage_size() if not numeric_util.overlaps(start, end, start2, end2): new_state.tensors.append(tens) + return new_state def find_best_address(self, start, stop, step): @@ -129,6 +130,7 @@ def optimize_high_level_cmd_stream(sg, arch): # Place the LUT in the last 2 blocks of SHRAM # Alignment is always on the size of the LUT, 256 for 256-byte LUT, 1K for 1K LUT, etc address = lut_state.find_best_address(lut_start, lut_end, lut_tens.storage_size()) + lut_tens.equivalence_id = uuid.uuid4() lut_tens.address = address cmd.ps.primary_op.attrs["lut_index"] = (address - lut_start) // slot_size lut_state = lut_state.put(lut_tens) |