aco/ra: change heuristic to first fit

Totals from 73175 (92.17% of 79395) affected shaders: (GFX11)

MaxWaves: 2217690 -> 2217930 (+0.01%); split: +0.02%, -0.01%
Instrs: 44780731 -> 44784895 (+0.01%); split: -0.14%, +0.15%
CodeSize: 233238960 -> 233255604 (+0.01%); split: -0.11%, +0.12%
VGPRs: 3009116 -> 3007684 (-0.05%); split: -0.29%, +0.24%
Latency: 304320163 -> 304286592 (-0.01%); split: -0.31%, +0.30%
InvThroughput: 49121992 -> 49145025 (+0.05%); split: -0.20%, +0.25%
VClause: 872566 -> 873242 (+0.08%); split: -0.25%, +0.33%
SClause: 1359666 -> 1361640 (+0.15%); split: -0.11%, +0.26%
Copies: 2879649 -> 2881646 (+0.07%); split: -1.13%, +1.20%
Branches: 887102 -> 887093 (-0.00%); split: -0.01%, +0.01%
VALU: 25128240 -> 25128572 (+0.00%); split: -0.12%, +0.12%
SALU: 4328852 -> 4330559 (+0.04%); split: -0.07%, +0.11%
VOPD: 8861 -> 8992 (+1.48%); split: +2.63%, -1.15%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29235>
This commit is contained in:
Daniel Schürmann
2024-05-14 11:34:44 +02:00
committed by Marge Bot
parent d76fc005b6
commit 197943ae27

View File

@@ -913,69 +913,9 @@ get_reg_simple(ra_ctx& ctx, const RegisterFile& reg_file, DefInfo info)
auto is_free = [&](PhysReg reg_index)
{ return reg_file[reg_index] == 0 && !ctx.war_hint[reg_index]; };
if (stride == 1) {
/* best fit algorithm: find the smallest gap to fit in the variable */
PhysRegInterval best_gap{PhysReg{0}, UINT_MAX};
const unsigned max_gpr =
(rc.type() == RegType::vgpr) ? (256 + ctx.max_used_vgpr) : ctx.max_used_sgpr;
PhysRegIterator reg_it = bounds.begin();
const PhysRegIterator end_it =
std::min(bounds.end(), std::max(PhysRegIterator{PhysReg{max_gpr + 1}}, reg_it));
while (reg_it != bounds.end()) {
/* Find the next chunk of available register slots */
reg_it = std::find_if(reg_it, end_it, is_free);
auto next_nonfree_it = std::find_if_not(reg_it, end_it, is_free);
if (reg_it == bounds.end()) {
break;
}
if (next_nonfree_it == end_it) {
/* All registers past max_used_gpr are free */
next_nonfree_it = bounds.end();
}
PhysRegInterval gap = PhysRegInterval::from_until(*reg_it, *next_nonfree_it);
/* early return on exact matches */
if (size == gap.size) {
adjust_max_used_regs(ctx, rc, gap.lo());
return gap.lo();
}
/* check if it fits and the gap size is smaller */
if (size < gap.size && gap.size < best_gap.size) {
best_gap = gap;
}
/* Move past the processed chunk */
reg_it = next_nonfree_it;
}
if (best_gap.size == UINT_MAX)
return {};
/* find best position within gap by leaving a good stride for other variables*/
unsigned buffer = best_gap.size - size;
if (buffer > 1) {
if (((best_gap.lo() + size) % 8 != 0 && (best_gap.lo() + buffer) % 8 == 0) ||
((best_gap.lo() + size) % 4 != 0 && (best_gap.lo() + buffer) % 4 == 0) ||
((best_gap.lo() + size) % 2 != 0 && (best_gap.lo() + buffer) % 2 == 0))
best_gap = {PhysReg{best_gap.lo() + buffer}, best_gap.size - buffer};
}
adjust_max_used_regs(ctx, rc, best_gap.lo());
return best_gap.lo();
}
for (PhysRegInterval reg_win = {bounds.lo(), size}; reg_win.hi() <= bounds.hi();
reg_win += stride) {
if (reg_file[reg_win.lo()] != 0) {
continue;
}
bool is_valid = std::all_of(std::next(reg_win.begin()), reg_win.end(), is_free);
if (is_valid) {
if (std::all_of(reg_win.begin(), reg_win.end(), is_free)) {
adjust_max_used_regs(ctx, rc, reg_win.lo());
return reg_win.lo();
}