aco/ra: change heuristic to first fit
Totals from 73175 (92.17% of 79395) affected shaders: (GFX11) MaxWaves: 2217690 -> 2217930 (+0.01%); split: +0.02%, -0.01% Instrs: 44780731 -> 44784895 (+0.01%); split: -0.14%, +0.15% CodeSize: 233238960 -> 233255604 (+0.01%); split: -0.11%, +0.12% VGPRs: 3009116 -> 3007684 (-0.05%); split: -0.29%, +0.24% Latency: 304320163 -> 304286592 (-0.01%); split: -0.31%, +0.30% InvThroughput: 49121992 -> 49145025 (+0.05%); split: -0.20%, +0.25% VClause: 872566 -> 873242 (+0.08%); split: -0.25%, +0.33% SClause: 1359666 -> 1361640 (+0.15%); split: -0.11%, +0.26% Copies: 2879649 -> 2881646 (+0.07%); split: -1.13%, +1.20% Branches: 887102 -> 887093 (-0.00%); split: -0.01%, +0.01% VALU: 25128240 -> 25128572 (+0.00%); split: -0.12%, +0.12% SALU: 4328852 -> 4330559 (+0.04%); split: -0.07%, +0.11% VOPD: 8861 -> 8992 (+1.48%); split: +2.63%, -1.15% Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29235>
This commit is contained in:

committed by
Marge Bot

parent
d76fc005b6
commit
197943ae27
@@ -913,69 +913,9 @@ get_reg_simple(ra_ctx& ctx, const RegisterFile& reg_file, DefInfo info)
|
||||
auto is_free = [&](PhysReg reg_index)
|
||||
{ return reg_file[reg_index] == 0 && !ctx.war_hint[reg_index]; };
|
||||
|
||||
if (stride == 1) {
|
||||
/* best fit algorithm: find the smallest gap to fit in the variable */
|
||||
PhysRegInterval best_gap{PhysReg{0}, UINT_MAX};
|
||||
const unsigned max_gpr =
|
||||
(rc.type() == RegType::vgpr) ? (256 + ctx.max_used_vgpr) : ctx.max_used_sgpr;
|
||||
|
||||
PhysRegIterator reg_it = bounds.begin();
|
||||
const PhysRegIterator end_it =
|
||||
std::min(bounds.end(), std::max(PhysRegIterator{PhysReg{max_gpr + 1}}, reg_it));
|
||||
while (reg_it != bounds.end()) {
|
||||
/* Find the next chunk of available register slots */
|
||||
reg_it = std::find_if(reg_it, end_it, is_free);
|
||||
auto next_nonfree_it = std::find_if_not(reg_it, end_it, is_free);
|
||||
if (reg_it == bounds.end()) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (next_nonfree_it == end_it) {
|
||||
/* All registers past max_used_gpr are free */
|
||||
next_nonfree_it = bounds.end();
|
||||
}
|
||||
|
||||
PhysRegInterval gap = PhysRegInterval::from_until(*reg_it, *next_nonfree_it);
|
||||
|
||||
/* early return on exact matches */
|
||||
if (size == gap.size) {
|
||||
adjust_max_used_regs(ctx, rc, gap.lo());
|
||||
return gap.lo();
|
||||
}
|
||||
|
||||
/* check if it fits and the gap size is smaller */
|
||||
if (size < gap.size && gap.size < best_gap.size) {
|
||||
best_gap = gap;
|
||||
}
|
||||
|
||||
/* Move past the processed chunk */
|
||||
reg_it = next_nonfree_it;
|
||||
}
|
||||
|
||||
if (best_gap.size == UINT_MAX)
|
||||
return {};
|
||||
|
||||
/* find best position within gap by leaving a good stride for other variables*/
|
||||
unsigned buffer = best_gap.size - size;
|
||||
if (buffer > 1) {
|
||||
if (((best_gap.lo() + size) % 8 != 0 && (best_gap.lo() + buffer) % 8 == 0) ||
|
||||
((best_gap.lo() + size) % 4 != 0 && (best_gap.lo() + buffer) % 4 == 0) ||
|
||||
((best_gap.lo() + size) % 2 != 0 && (best_gap.lo() + buffer) % 2 == 0))
|
||||
best_gap = {PhysReg{best_gap.lo() + buffer}, best_gap.size - buffer};
|
||||
}
|
||||
|
||||
adjust_max_used_regs(ctx, rc, best_gap.lo());
|
||||
return best_gap.lo();
|
||||
}
|
||||
|
||||
for (PhysRegInterval reg_win = {bounds.lo(), size}; reg_win.hi() <= bounds.hi();
|
||||
reg_win += stride) {
|
||||
if (reg_file[reg_win.lo()] != 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
bool is_valid = std::all_of(std::next(reg_win.begin()), reg_win.end(), is_free);
|
||||
if (is_valid) {
|
||||
if (std::all_of(reg_win.begin(), reg_win.end(), is_free)) {
|
||||
adjust_max_used_regs(ctx, rc, reg_win.lo());
|
||||
return reg_win.lo();
|
||||
}
|
||||
|
Reference in New Issue
Block a user