From a19b4671dd0594181a2789930cc98bf5dc41ded4 Mon Sep 17 00:00:00 2001
From: Louis Verhaard <louis.verhaard@arm.com>
Date: Mon, 28 Feb 2022 15:12:57 +0100
Subject: MLBEDSW-6249: HillClimb improved stuck avoidance

Added a mechanism that reduces the risk for getting stuck
if the current best allocation cannot be improved by only
swapping 2 indices.

Change-Id: Ife379757752f0c1ed54af7bd826e0a9390d54267
Signed-off-by: Louis Verhaard <louis.verhaard@arm.com>
---
 ethosu/vela/hillclimb_allocation.py | 27 +++++++++++++++++++++++----
 1 file changed, 23 insertions(+), 4 deletions(-)

diff --git a/ethosu/vela/hillclimb_allocation.py b/ethosu/vela/hillclimb_allocation.py
index 2271fe9c..6d3003c5 100644
--- a/ethosu/vela/hillclimb_allocation.py
+++ b/ethosu/vela/hillclimb_allocation.py
@@ -94,6 +94,8 @@ class HillClimbAllocator:
     NOT_ALLOCATED = -1
     # Used for live ranges allocated at address 0
     NO_PREDECESSOR = -1
+    # Special handling if best solution has not improved during this many iterations
+    MAX_ITERATIONS_STUCK = 50
 
     def __init__(self, live_ranges: List[LiveRange]):
         # Contains the live ranges
@@ -190,7 +192,7 @@ class HillClimbAllocator:
                 turn_set.add(turn)
                 turn_list.append(turn)
 
-    def attempt_bottleneck_fix(self, indices: List[int]):
+    def attempt_bottleneck_fix(self, indices: List[int], iterations_stuck):
         """
         Finds the "bottleneck", the live range with highest end address, and reorders the indices
         such that a next allocation might lower the memory usage.
@@ -255,24 +257,41 @@ class HillClimbAllocator:
             ix2 = turn_list[-1]
         # Swap indices
         indices[ix1], indices[ix2] = indices[ix2], indices[ix1]
+        if iterations_stuck > HillClimbAllocator.MAX_ITERATIONS_STUCK:
+            # The best allocation has not improved for a while, maybe improvement is not possible
+            # by single-swapping indices; add more neighbour live ranges and swap up to two more indices.
+            # Adding more neighbours can sometimes resolve the situation where the current bottleneck
+            # is resolved, but always results in a higher bottleneck at a nearby live range.
+            # Magic number is based on tuning
+            for turn in non_nb_turn_list:
+                for lr in self.lrs[indices[turn]].neighbours:
+                    if lr.turn not in turn_set:
+                        turn_set.add(lr.turn)
+                        turn_list.append(lr.turn)
+            ix1 = turn_list[random.randint(0, len(turn_list) - 1)]
+            ix2 = turn_list[random.randint(0, len(turn_list) - 1)]
+            indices[ix1], indices[ix2] = indices[ix2], indices[ix1]
 
     def search(self, indices: List[int], iterations: int):
         """
         Search for a solution, using the given indices as initial solution.
         """
         best_indices = indices[:]
-        for _ in range(iterations):
+        last_improvement_iteration = 0
+        for i in range(iterations):
             # Reorder the indices
-            self.attempt_bottleneck_fix(indices)
+            self.attempt_bottleneck_fix(indices, i - last_improvement_iteration)
             # Allocate the reordered indices and check if it gave an improvement
             new_size = self.allocate_indices(indices)
             if new_size <= self.best_size:
                 # The new allocation produced a new best result remember it
+                if new_size < self.best_size:
+                    last_improvement_iteration = i
                 self.best_size = new_size
                 best_indices = indices[:]
                 self.allocated_addresses = [lr.address for lr in self.lrs]
                 if self.best_size <= self.min_required_size:
-                    # Target reached stop
+                    # Target reached; stop
                     return
             else:
                 # The new allocation produced worse result undo the change
-- 
cgit v1.2.1