aco: rewrite graph coloring in spiller
I don't think this is much of an optimization in the typical case, but for very complex shaders this should work much better. Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5805>
This commit is contained in:
@@ -1327,9 +1327,110 @@ Temp load_scratch_resource(spill_ctx& ctx, Temp& scratch_offset,
|
||||
Operand(rsrc_conf));
|
||||
}
|
||||
|
||||
void add_interferences(spill_ctx& ctx, std::vector<bool>& is_assigned,
|
||||
std::vector<uint32_t>& slots, std::vector<bool>& slots_used,
|
||||
unsigned id)
|
||||
{
|
||||
RegType type = ctx.interferences[id].first.type();
|
||||
|
||||
for (unsigned other : ctx.interferences[id].second) {
|
||||
if (!is_assigned[other])
|
||||
continue;
|
||||
|
||||
RegClass other_rc = ctx.interferences[other].first;
|
||||
if (other_rc.type() == type) {
|
||||
unsigned slot = slots[other];
|
||||
std::fill(slots_used.begin() + slot, slots_used.begin() + slot + other_rc.size(), true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsigned find_available_slot(std::vector<bool>& used, unsigned wave_size,
|
||||
unsigned size, bool is_sgpr, unsigned *num_slots)
|
||||
{
|
||||
unsigned wave_size_minus_one = wave_size - 1;
|
||||
unsigned slot = 0;
|
||||
|
||||
while (true) {
|
||||
bool available = true;
|
||||
for (unsigned i = 0; i < size; i++) {
|
||||
if (slot + i < used.size() && used[slot + i]) {
|
||||
available = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!available) {
|
||||
slot++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (is_sgpr && ((slot & wave_size_minus_one) > wave_size - size)) {
|
||||
slot = align(slot, wave_size);
|
||||
continue;
|
||||
}
|
||||
|
||||
std::fill(used.begin(), used.end(), false);
|
||||
|
||||
if (slot + size > used.size())
|
||||
used.resize(slot + size);
|
||||
|
||||
return slot;
|
||||
}
|
||||
}
|
||||
|
||||
void assign_spill_slots_helper(spill_ctx& ctx, RegType type,
|
||||
std::vector<bool>& is_assigned,
|
||||
std::vector<uint32_t>& slots,
|
||||
unsigned *num_slots)
|
||||
{
|
||||
std::vector<bool> slots_used(*num_slots);
|
||||
|
||||
/* assign slots for ids with affinities first */
|
||||
for (std::vector<uint32_t>& vec : ctx.affinities) {
|
||||
if (ctx.interferences[vec[0]].first.type() != type)
|
||||
continue;
|
||||
|
||||
for (unsigned id : vec) {
|
||||
if (!ctx.is_reloaded[id])
|
||||
continue;
|
||||
|
||||
add_interferences(ctx, is_assigned, slots, slots_used, id);
|
||||
}
|
||||
|
||||
unsigned slot = find_available_slot(slots_used, ctx.wave_size,
|
||||
ctx.interferences[vec[0]].first.size(),
|
||||
type == RegType::sgpr, num_slots);
|
||||
|
||||
for (unsigned id : vec) {
|
||||
assert(!is_assigned[id]);
|
||||
|
||||
if (ctx.is_reloaded[id]) {
|
||||
slots[id] = slot;
|
||||
is_assigned[id] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* assign slots for ids without affinities */
|
||||
for (unsigned id = 0; id < ctx.interferences.size(); id++) {
|
||||
if (is_assigned[id] || !ctx.is_reloaded[id] || ctx.interferences[id].first.type() != type)
|
||||
continue;
|
||||
|
||||
add_interferences(ctx, is_assigned, slots, slots_used, id);
|
||||
|
||||
unsigned slot = find_available_slot(slots_used, ctx.wave_size,
|
||||
ctx.interferences[id].first.size(),
|
||||
type == RegType::sgpr, num_slots);
|
||||
|
||||
slots[id] = slot;
|
||||
is_assigned[id] = true;
|
||||
}
|
||||
|
||||
*num_slots = slots_used.size();
|
||||
}
|
||||
|
||||
void assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) {
|
||||
std::map<uint32_t, uint32_t> sgpr_slot;
|
||||
std::map<uint32_t, uint32_t> vgpr_slot;
|
||||
std::vector<uint32_t> slots(ctx.interferences.size());
|
||||
std::vector<bool> is_assigned(ctx.interferences.size());
|
||||
|
||||
/* first, handle affinities: just merge all interferences into both spill ids */
|
||||
@@ -1337,13 +1438,6 @@ void assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) {
|
||||
for (unsigned i = 0; i < vec.size(); i++) {
|
||||
for (unsigned j = i + 1; j < vec.size(); j++) {
|
||||
assert(vec[i] != vec[j]);
|
||||
for (uint32_t id : ctx.interferences[vec[i]].second)
|
||||
ctx.interferences[id].second.insert(vec[j]);
|
||||
for (uint32_t id : ctx.interferences[vec[j]].second)
|
||||
ctx.interferences[id].second.insert(vec[i]);
|
||||
ctx.interferences[vec[i]].second.insert(ctx.interferences[vec[j]].second.begin(), ctx.interferences[vec[j]].second.end());
|
||||
ctx.interferences[vec[j]].second.insert(ctx.interferences[vec[i]].second.begin(), ctx.interferences[vec[i]].second.end());
|
||||
|
||||
bool reloaded = ctx.is_reloaded[vec[i]] || ctx.is_reloaded[vec[j]];
|
||||
ctx.is_reloaded[vec[i]] = reloaded;
|
||||
ctx.is_reloaded[vec[j]] = reloaded;
|
||||
@@ -1355,117 +1449,9 @@ void assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) {
|
||||
assert(i != id);
|
||||
|
||||
/* for each spill slot, assign as many spill ids as possible */
|
||||
std::vector<std::set<uint32_t>> spill_slot_interferences;
|
||||
unsigned slot_idx = 0;
|
||||
bool done = false;
|
||||
|
||||
/* assign sgpr spill slots */
|
||||
while (!done) {
|
||||
done = true;
|
||||
for (unsigned id = 0; id < ctx.interferences.size(); id++) {
|
||||
if (is_assigned[id] || !ctx.is_reloaded[id])
|
||||
continue;
|
||||
if (ctx.interferences[id].first.type() != RegType::sgpr)
|
||||
continue;
|
||||
|
||||
/* check interferences */
|
||||
bool interferes = false;
|
||||
for (unsigned i = slot_idx; i < slot_idx + ctx.interferences[id].first.size(); i++) {
|
||||
if (i == spill_slot_interferences.size())
|
||||
spill_slot_interferences.emplace_back(std::set<uint32_t>());
|
||||
if (spill_slot_interferences[i].find(id) != spill_slot_interferences[i].end() || i / ctx.wave_size != slot_idx / ctx.wave_size) {
|
||||
interferes = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (interferes) {
|
||||
done = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* we found a spill id which can be assigned to current spill slot */
|
||||
sgpr_slot[id] = slot_idx;
|
||||
is_assigned[id] = true;
|
||||
for (unsigned i = slot_idx; i < slot_idx + ctx.interferences[id].first.size(); i++)
|
||||
spill_slot_interferences[i].insert(ctx.interferences[id].second.begin(), ctx.interferences[id].second.end());
|
||||
|
||||
/* add all affinities: there are no additional interferences */
|
||||
for (std::vector<uint32_t>& vec : ctx.affinities) {
|
||||
bool found_affinity = false;
|
||||
for (uint32_t entry : vec) {
|
||||
if (entry == id) {
|
||||
found_affinity = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found_affinity)
|
||||
continue;
|
||||
for (uint32_t entry : vec) {
|
||||
sgpr_slot[entry] = slot_idx;
|
||||
is_assigned[entry] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
slot_idx++;
|
||||
}
|
||||
|
||||
unsigned sgpr_spill_slots = spill_slot_interferences.size();
|
||||
spill_slot_interferences.clear();
|
||||
slot_idx = 0;
|
||||
done = false;
|
||||
|
||||
/* assign vgpr spill slots */
|
||||
while (!done) {
|
||||
done = true;
|
||||
for (unsigned id = 0; id < ctx.interferences.size(); id++) {
|
||||
if (is_assigned[id] || !ctx.is_reloaded[id])
|
||||
continue;
|
||||
if (ctx.interferences[id].first.type() != RegType::vgpr)
|
||||
continue;
|
||||
|
||||
/* check interferences */
|
||||
bool interferes = false;
|
||||
for (unsigned i = slot_idx; i < slot_idx + ctx.interferences[id].first.size(); i++) {
|
||||
if (i == spill_slot_interferences.size())
|
||||
spill_slot_interferences.emplace_back(std::set<uint32_t>());
|
||||
/* check for interference and ensure that vector regs are stored next to each other */
|
||||
if (spill_slot_interferences[i].find(id) != spill_slot_interferences[i].end()) {
|
||||
interferes = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (interferes) {
|
||||
done = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* we found a spill id which can be assigned to current spill slot */
|
||||
vgpr_slot[id] = slot_idx;
|
||||
is_assigned[id] = true;
|
||||
for (unsigned i = slot_idx; i < slot_idx + ctx.interferences[id].first.size(); i++)
|
||||
spill_slot_interferences[i].insert(ctx.interferences[id].second.begin(), ctx.interferences[id].second.end());
|
||||
|
||||
/* add all affinities: there are no additional interferences */
|
||||
for (std::vector<uint32_t>& vec : ctx.affinities) {
|
||||
bool found_affinity = false;
|
||||
for (uint32_t entry : vec) {
|
||||
if (entry == id) {
|
||||
found_affinity = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found_affinity)
|
||||
continue;
|
||||
for (uint32_t entry : vec) {
|
||||
vgpr_slot[entry] = slot_idx;
|
||||
is_assigned[entry] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
slot_idx++;
|
||||
}
|
||||
|
||||
unsigned vgpr_spill_slots = spill_slot_interferences.size();
|
||||
unsigned sgpr_spill_slots = 0, vgpr_spill_slots = 0;
|
||||
assign_spill_slots_helper(ctx, RegType::sgpr, is_assigned, slots, &sgpr_spill_slots);
|
||||
assign_spill_slots_helper(ctx, RegType::vgpr, is_assigned, slots, &vgpr_spill_slots);
|
||||
|
||||
for (unsigned id = 0; id < is_assigned.size(); id++)
|
||||
assert(is_assigned[id] || !ctx.is_reloaded[id]);
|
||||
@@ -1478,10 +1464,7 @@ void assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) {
|
||||
continue;
|
||||
assert(ctx.is_reloaded[vec[i]] == ctx.is_reloaded[vec[j]]);
|
||||
assert(ctx.interferences[vec[i]].first.type() == ctx.interferences[vec[j]].first.type());
|
||||
if (ctx.interferences[vec[i]].first.type() == RegType::sgpr)
|
||||
assert(sgpr_slot[vec[i]] == sgpr_slot[vec[j]]);
|
||||
else
|
||||
assert(vgpr_slot[vec[i]] == vgpr_slot[vec[j]]);
|
||||
assert(slots[vec[i]] == slots[vec[j]]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1531,8 +1514,8 @@ void assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) {
|
||||
bool can_destroy = true;
|
||||
for (std::pair<Temp, uint32_t> pair : ctx.spills_exit[block.linear_preds[0]]) {
|
||||
|
||||
if (sgpr_slot.find(pair.second) != sgpr_slot.end() &&
|
||||
sgpr_slot[pair.second] / ctx.wave_size == i) {
|
||||
if (ctx.interferences[pair.second].first.type() == RegType::sgpr &&
|
||||
slots[pair.second] / ctx.wave_size == i) {
|
||||
can_destroy = false;
|
||||
break;
|
||||
}
|
||||
@@ -1553,10 +1536,12 @@ void assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) {
|
||||
|
||||
if (!ctx.is_reloaded[spill_id]) {
|
||||
/* never reloaded, so don't spill */
|
||||
} else if (vgpr_slot.find(spill_id) != vgpr_slot.end()) {
|
||||
} else if (!is_assigned[spill_id]) {
|
||||
unreachable("No spill slot assigned for spill id");
|
||||
} else if (ctx.interferences[spill_id].first.type() == RegType::vgpr) {
|
||||
/* spill vgpr */
|
||||
ctx.program->config->spilled_vgprs += (*it)->operands[0].size();
|
||||
uint32_t spill_slot = vgpr_slot[spill_id];
|
||||
uint32_t spill_slot = slots[spill_id];
|
||||
bool add_offset_to_sgpr = ctx.program->config->scratch_bytes_per_wave / ctx.program->wave_size + vgpr_spill_slots * 4 > 4096;
|
||||
unsigned base_offset = add_offset_to_sgpr ? 0 : ctx.program->config->scratch_bytes_per_wave / ctx.program->wave_size;
|
||||
|
||||
@@ -1586,10 +1571,10 @@ void assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) {
|
||||
} else {
|
||||
bld.mubuf(opcode, scratch_rsrc, Operand(v1), scratch_offset, temp, offset, false);
|
||||
}
|
||||
} else if (sgpr_slot.find(spill_id) != sgpr_slot.end()) {
|
||||
} else {
|
||||
ctx.program->config->spilled_sgprs += (*it)->operands[0].size();
|
||||
|
||||
uint32_t spill_slot = sgpr_slot[spill_id];
|
||||
uint32_t spill_slot = slots[spill_id];
|
||||
|
||||
/* check if the linear vgpr already exists */
|
||||
if (vgpr_spill_temps[spill_slot / ctx.wave_size] == Temp()) {
|
||||
@@ -1615,17 +1600,17 @@ void assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) {
|
||||
spill->operands[1] = Operand(spill_slot % ctx.wave_size);
|
||||
spill->operands[2] = (*it)->operands[0];
|
||||
instructions.emplace_back(aco_ptr<Instruction>(spill));
|
||||
} else {
|
||||
unreachable("No spill slot assigned for spill id");
|
||||
}
|
||||
|
||||
} else if ((*it)->opcode == aco_opcode::p_reload) {
|
||||
uint32_t spill_id = (*it)->operands[0].constantValue();
|
||||
assert(ctx.is_reloaded[spill_id]);
|
||||
|
||||
if (vgpr_slot.find(spill_id) != vgpr_slot.end()) {
|
||||
if (!is_assigned[spill_id]) {
|
||||
unreachable("No spill slot assigned for spill id");
|
||||
} else if (ctx.interferences[spill_id].first.type() == RegType::vgpr) {
|
||||
/* reload vgpr */
|
||||
uint32_t spill_slot = vgpr_slot[spill_id];
|
||||
uint32_t spill_slot = slots[spill_id];
|
||||
bool add_offset_to_sgpr = ctx.program->config->scratch_bytes_per_wave / ctx.program->wave_size + vgpr_spill_slots * 4 > 4096;
|
||||
unsigned base_offset = add_offset_to_sgpr ? 0 : ctx.program->config->scratch_bytes_per_wave / ctx.program->wave_size;
|
||||
|
||||
@@ -1654,8 +1639,8 @@ void assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) {
|
||||
} else {
|
||||
bld.mubuf(opcode, def, scratch_rsrc, Operand(v1), scratch_offset, offset, false);
|
||||
}
|
||||
} else if (sgpr_slot.find(spill_id) != sgpr_slot.end()) {
|
||||
uint32_t spill_slot = sgpr_slot[spill_id];
|
||||
} else {
|
||||
uint32_t spill_slot = slots[spill_id];
|
||||
reload_in_loop[spill_slot / ctx.wave_size] = block.loop_nest_depth > 0;
|
||||
|
||||
/* check if the linear vgpr already exists */
|
||||
@@ -1682,8 +1667,6 @@ void assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) {
|
||||
reload->operands[1] = Operand(spill_slot % ctx.wave_size);
|
||||
reload->definitions[0] = (*it)->definitions[0];
|
||||
instructions.emplace_back(aco_ptr<Instruction>(reload));
|
||||
} else {
|
||||
unreachable("No spill slot assigned for spill id");
|
||||
}
|
||||
} else if (!ctx.remat_used.count(it->get()) || ctx.remat_used[it->get()]) {
|
||||
instructions.emplace_back(std::move(*it));
|
||||
|
Reference in New Issue
Block a user