/* * Copyright (c) 2020 Arm Limited. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the License); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an AS IS BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * Description: * Implementation of the search-based allocator. */ #include #include #include #include #include "search_allocator.h" SearchAllocator::SearchAllocator(const std::vector &live_ranges, uint32_t size_limit) { lrs = live_ranges; uint32_t max_end_time = 0; for (size_t i = 0; i < lrs.size(); ++i) { auto &lr = lrs[i]; lr.id = static_cast(i); max_end_time = std::max(max_end_time, lr.end_time); } lrs_at_time.resize(max_end_time + 1); size_at_time.resize(max_end_time + 1); neighbours.resize(lrs.size()); // Calculate which live ranges are active at every timestamp for (size_t t = 0; t <= max_end_time; ++t) { lrs_at_time[t].clear(); } for (auto &lr : lrs) { for (auto t = lr.start_time; t <= lr.end_time; ++t) { lrs_at_time[t].push_back(&lr); } } min_required_size = 0; for (size_t t = 0; t <= max_end_time; ++t) { // Calculate minimum needed size at each timestamp uint32_t size_at_t = 0; for (auto &lr : lrs_at_time[t]) { size_at_t += lr->size; } size_at_time[t] = size_at_t; min_required_size = std::max(size_at_t, min_required_size); // Calculate all neighbours for (size_t i = 0; i < lrs_at_time[t].size(); ++i) { auto lr1 = lrs_at_time[t][i]; auto &nb1 = neighbours[lr1->id]; for (size_t j = i + 1; j < lrs_at_time[t].size(); ++j) { auto lr2 = lrs_at_time[t][j]; if (find(nb1.begin(), nb1.end(), lr2) == nb1.end()) { nb1.push_back(lr2); neighbours[lr2->id].push_back(lr1); } } } } target_size = std::max(min_required_size, available_size); // Calculate the urgency of each live range lr_urgency.resize(lrs.size()); for (size_t i = 0; i < lrs.size(); ++i) { auto &lr = lrs[i]; uint32_t urgency = 0; for (size_t t = lr.start_time; t <= lr.end_time; ++t) { urgency = std::max(size_at_time[t], urgency); } lr_urgency[i] = urgency; } best_size = UINT32_MAX; } uint32_t SearchAllocator::allocate(std::vector &output) { output.clear(); nr_iterations = 0; std::vector indices; // Initial solution, using a heuristic allocator for (size_t i = 0; i < lrs.size(); ++i) { indices.push_back(i); } sort_indices_on_prio(indices); // Allocate the initial solution best_size = UINT32_MAX; best_size = allocate_indices(indices); if (best_size <= target_size) { // The heuristic allocation returned an optimal solution. // No need to search. } else { // Try to improve the heuristic allocation search(indices, best_size, MAX_ITERATIONS); } output.clear(); for (auto &lr : lrs) { output.push_back(lr.address); } return best_size; } void SearchAllocator::allocate_lr(LiveRange &lr) const { uint32_t address = 0; int predecessor = NO_PREDECESSOR; bool fits = false; while (!fits) { fits = true; // Find neighbours that overlap with address for (auto lr2_p : neighbours[lr.id]) { if (lr2_p->address == NOT_ALLOCATED || lr2_p->end_address <= address) { continue; } if (lr2_p->overlaps(address, lr.size)) { // Overlap found; increase address fits = false; address = lr2_p->end_address; predecessor = lr2_p->id; } } } lr.address = address; lr.end_address = address + lr.size; lr.predecessor = predecessor; } uint32_t SearchAllocator::allocate_indices(const std::vector &indices) { ++nr_iterations; std::vector count(indices.size()); for (auto &lr : lrs) { lr.address = NOT_ALLOCATED; } uint32_t size = 0; for (size_t turn = 0; size <= best_size && turn < indices.size(); ++turn) { auto &lr = lrs[indices[turn]]; allocate_lr(lr); lr.turn = turn; size = std::max(size, lr.end_address); } return size; } void SearchAllocator::sort_indices_on_prio(std::vector &indices) const { std::sort(indices.begin(), indices.end(), [this] (size_t const& a, size_t const& b) { if (lr_urgency[a] != lr_urgency[b]) { return lr_urgency[a] > lr_urgency[b]; } auto &lr1 = lrs[a]; auto &lr2 = lrs[b]; auto duration1 = lr1.end_time - lr1.start_time; auto duration2 = lr2.end_time - lr2.start_time; if (duration1 != duration2) { return duration1 > duration2; } if (lr1.start_time != lr2.start_time) { return lr1.start_time < lr2.start_time; } if (lr1.size != lr2.size) { return lr1.size > lr2.size; } return lr1.id < lr2.id; }); } void SearchAllocator::add_predecessor_turns(std::set &turns, const LiveRange &lr) const { turns.insert(lr.turn); int id = lr.id; while (lrs[id].predecessor != NO_PREDECESSOR) { id = lrs[id].predecessor; turns.insert(lrs[id].turn); } } void SearchAllocator::attempt_bottleneck_fix(std::vector &indices) { // Find the bottleneck LiveRange *max_lr = &lrs[0]; for (auto &lr : lrs) { if (lr.end_address > max_lr->end_address) { max_lr = &lr; } } // Find all live ranges that affected the placement of the bottleneck live range. // This consists of two types of live ranges: // - direct neighbours of the bottleneck live range // - direct and indirect predecessors of these neighbours + bottleneck // The turns at which these live ranges were allocated are put in the turns vector. std::set turns; add_predecessor_turns(turns, *max_lr); for (auto lr_p : neighbours[max_lr->id]) { add_predecessor_turns(turns, *lr_p); } // Non-direct neighbours that interfere with the allocation of the bottleneck are the // immediate cause for gaps in the allocation, and are selected with higher probability. std::vector turn_list; std::vector non_nb_turn_list; for (auto turn : turns) { turn_list.push_back(turn); auto &lr = lrs[indices[turn]]; if (!max_lr->is_neighbour(lr)) { non_nb_turn_list.push_back(turn); } } size_t ix1; if (rng() % 100 < 30 && !non_nb_turn_list.empty()) { // Pick a live range from the "non-neighbour list" ix1 = non_nb_turn_list[rng() % non_nb_turn_list.size()]; } else { // Pick any affecting live range. ix1 = turn_list[rng() % turn_list.size()]; } size_t ix2 = turn_list[rng() % turn_list.size() - 1]; if (ix1 == ix2) { ix2 = turn_list[turn_list.size() - 1]; } // Swap indices std::swap(indices[ix1], indices[ix2]); } void SearchAllocator::search(std::vector &indices, uint32_t initial_size, int iterations) { std::vector best_indices = indices; std::vector best_lrs = lrs; for (int i = 0; i < iterations; ++i) { // Reorder the indices attempt_bottleneck_fix(indices); // Allocate the reordered indices and check if it gave an improvement auto new_size = allocate_indices(indices); if (new_size <= best_size) { // The new allocation produced a new best result; remember it best_size = new_size; best_indices = indices; best_lrs = lrs; if (best_size <= target_size) { // Target reached; stop return; } } else { // The new allocation produced worse result; undo the change indices = best_indices; lrs = best_lrs; } } lrs = best_lrs; } uint32_t allocate(const std::vector &input, int available_size, std::vector &output) { // Convert input to vector of live ranges std::vector lrs; for (size_t i = 0; i < input.size(); i += 3) { LiveRange lr; lr.start_time = input[i]; lr.end_time = input[i+1]; lr.size = input[i+2]; lrs.push_back(lr); } SearchAllocator allocator(lrs, available_size); return allocator.allocate(output); }