aboutsummaryrefslogtreecommitdiff
path: root/ethosu/vela/greedy_allocation.py
diff options
context:
space:
mode:
Diffstat (limited to 'ethosu/vela/greedy_allocation.py')
-rw-r--r--ethosu/vela/greedy_allocation.py43
1 files changed, 27 insertions, 16 deletions
diff --git a/ethosu/vela/greedy_allocation.py b/ethosu/vela/greedy_allocation.py
index 1cbfce3f..661644a9 100644
--- a/ethosu/vela/greedy_allocation.py
+++ b/ethosu/vela/greedy_allocation.py
@@ -16,6 +16,7 @@
# Description:
# Allocate tensor addresses using a greedy algorithm.
from . import numeric_util
+from .errors import AllocationError
class GreedyAllocator:
@@ -37,24 +38,25 @@ class GreedyAllocator:
best_offset = numeric_util.round_up(current_top, new_lr.get_alignment())
best_offset_fit = (1 << 64) - 1
+ aligned_size = numeric_util.round_up(size, new_lr.get_alignment())
current_offset = 0
for start_addr, lr in self.current_allocs:
aligned_current_offset = numeric_util.round_up(current_offset, new_lr.get_alignment())
- if aligned_current_offset + size <= start_addr and start_addr - current_offset < best_offset_fit:
+ if aligned_current_offset + aligned_size <= start_addr and start_addr - current_offset < best_offset_fit:
best_offset = current_offset
best_offset_fit = start_addr - current_offset
current_offset = start_addr + lr.size
best_offset = new_lr.set_address(best_offset)
- self.memory_required = max(self.memory_required, best_offset + size)
+ self.memory_required = max(self.memory_required, best_offset + aligned_size)
self.current_allocs.append((best_offset, new_lr))
self.current_allocs = list(sorted(self.current_allocs))
def dealloc(self, lr_to_dealloc):
self.current_allocs = [(start_addr, lr) for start_addr, lr in self.current_allocs if lr != lr_to_dealloc]
- def allocate_live_ranges(self, verbose_allocation):
+ def allocate_live_ranges(self, verbose_allocation, alignment):
lrs = set()
for lr in self.live_ranges.ranges.values():
lrs.add((lr.start_time, lr.end_time, lr))
@@ -68,25 +70,34 @@ class GreedyAllocator:
self.alloc(new_lr)
- assert self.verify_allocation()
+ self.verify_allocation(alignment)
return self.memory_required
- def verify_allocation(self):
+ def verify_allocation(self, alignment):
lrs = list(self.live_ranges.ranges.values())
for n in lrs:
+ for tens in n.tensors:
+ if not all(op and op.run_on_npu for op in tens.ops + tens.consumer_list):
+ # This is a CPU tensor, verify alignment
+ if tens.address % alignment != 0:
+ raise AllocationError("Tensor {} not aligned to {} bytes".format(tens.name, alignment))
+
for m in lrs:
if n != m and n.overlaps_ranges(m):
overlap, tens_n, tens_m = n.overlaps_address(m)
if overlap and not (tens_n.equivalent(tens_m) and tens_n.address == tens_m.address):
- print("Solution failed, overlapping buffer!")
- print(tens_n.address, tens_n.address + n.size, n.name)
- print(tens_m.address, tens_m.address + m.size, m.name)
- print()
- return False
-
- return True
-
-
-def allocate_live_ranges(nng, arch, live_ranges, mem_area, verbose_allocation=False):
+ raise AllocationError(
+ "Overlapping buffers: {}: {} -> {} and {}: {} -> {}".format(
+ n.name,
+ tens_n.address,
+ tens_n.address + n.size,
+ m.name,
+ tens_m.address,
+ tens_m.address + m.size,
+ )
+ )
+
+
+def allocate_live_ranges(nng, arch, live_ranges, mem_area, alignment, verbose_allocation=False):
g = GreedyAllocator(nng, arch, live_ranges, mem_area)
- return g.allocate_live_ranges(verbose_allocation)
+ return g.allocate_live_ranges(verbose_allocation, alignment)