aboutsummaryrefslogtreecommitdiff
path: root/ethosu/vela/greedy_allocation.py
diff options
context:
space:
mode:
Diffstat (limited to 'ethosu/vela/greedy_allocation.py')
-rw-r--r--ethosu/vela/greedy_allocation.py95
1 files changed, 95 insertions, 0 deletions
diff --git a/ethosu/vela/greedy_allocation.py b/ethosu/vela/greedy_allocation.py
new file mode 100644
index 00000000..6b3d2c1e
--- /dev/null
+++ b/ethosu/vela/greedy_allocation.py
@@ -0,0 +1,95 @@
+# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Description:
+# Allocate tensor addresses using a greedy algorithm.
+
+from . import numeric_util
+
+
+class GreedyAllocator:
+ def __init__(self, nng, arch, live_ranges, mem_area):
+ self.nng = nng
+ self.arch = arch
+ self.mem_area = mem_area
+
+ self.live_ranges = live_ranges
+ self.memory_required = 0
+
+ self.current_allocs = []
+
+ def alloc(self, new_lr):
+ size = new_lr.size
+ current_top = 0
+ if self.current_allocs:
+ current_top = max(start_addr + lr.size for start_addr, lr in self.current_allocs)
+ best_offset = numeric_util.round_up(current_top, new_lr.get_alignment())
+ best_offset_fit = (1 << 64) - 1
+
+ current_offset = 0
+ for start_addr, lr in self.current_allocs:
+ aligned_current_offset = numeric_util.round_up(current_offset, new_lr.get_alignment())
+ if aligned_current_offset + size <= start_addr and start_addr - current_offset < best_offset_fit:
+ best_offset = current_offset
+ best_offset_fit = start_addr - current_offset
+
+ current_offset = start_addr + lr.size
+
+ self.memory_required = max(self.memory_required, best_offset + size)
+ new_lr.set_address(best_offset)
+ self.current_allocs.append((best_offset, new_lr))
+ self.current_allocs = list(sorted(self.current_allocs))
+
+ def dealloc(self, lr_to_dealloc):
+ self.current_allocs = [(start_addr, lr) for start_addr, lr in self.current_allocs if lr != lr_to_dealloc]
+
+ def allocate_live_ranges(self, verbose_allocation):
+ lrs = set()
+ for lr in self.live_ranges.ranges.values():
+ lrs.add((lr.start_time, lr.end_time, lr))
+
+ lrs = sorted(lrs)
+
+ for curr_time, _, new_lr in lrs:
+ for _, lr in list(self.current_allocs):
+ if lr.end_time < curr_time:
+ self.dealloc(lr)
+
+ self.alloc(new_lr)
+
+ assert self.verify_allocation()
+ return self.memory_required
+
+ def verify_allocation(self):
+ lrs = list(self.live_ranges.ranges.values())
+ for n in lrs:
+ for m in lrs:
+ if n != m and n.overlaps_ranges(m):
+ overlap, tens_n, tens_m = n.overlaps_address(m)
+ if overlap:
+ print("Solution failed, overlapping buffer!")
+ print(tens_n.address, tens_n.address + n.size, n.name)
+ print(tens_m.address, tens_m.address + m.size, m.name)
+ print()
+ return False
+
+ return True
+
+
+def allocate_live_ranges(nng, arch, live_ranges, mem_area, verbose_allocation=False):
+ g = GreedyAllocator(nng, arch, live_ranges, mem_area)
+ return g.allocate_live_ranges(verbose_allocation)